pem_rfc7468/
grammar.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
//! Helper functions and rules for enforcing the ABNF grammar for
//! RFC 7468-flavored PEM as described in Section 3.
//!
//! The grammar described below is intended to follow the "ABNF (Strict)"
//! subset of the grammar as described in Section 3 Figure 3.

use crate::{Error, Result, PRE_ENCAPSULATION_BOUNDARY};
use core::str;

/// NUL char
pub(crate) const CHAR_NUL: u8 = 0x00;

/// Horizontal tab
pub(crate) const CHAR_HT: u8 = 0x09;

/// Space
pub(crate) const CHAR_SP: u8 = 0x20;

/// Carriage return
pub(crate) const CHAR_CR: u8 = 0x0d;

/// Line feed
pub(crate) const CHAR_LF: u8 = 0x0a;

/// Colon ':'
pub(crate) const CHAR_COLON: u8 = 0x3A;

/// Any printable character except hyphen-minus, as defined in the
/// 'labelchar' production in the RFC 7468 ABNF grammar
pub(crate) fn is_labelchar(char: u8) -> bool {
    matches!(char, 0x21..=0x2C | 0x2E..=0x7E)
}

/// Does the provided byte match a character allowed in a label?
// TODO: allow hyphen-minus to match the 'label' production in the ABNF grammar
pub(crate) fn is_allowed_in_label(char: u8) -> bool {
    is_labelchar(char) || matches!(char, CHAR_HT | CHAR_SP)
}

/// Does the provided byte match the "WSP" ABNF production from Section 3?
///
/// > The common ABNF production WSP is congruent with "blank";
/// > a new production W is used for "whitespace"
pub(crate) fn is_wsp(char: u8) -> bool {
    matches!(char, CHAR_HT | CHAR_SP)
}

/// Strip the "preamble", i.e. data that appears before the PEM
/// pre-encapsulation boundary.
///
/// Presently no attempt is made to ensure the preamble decodes successfully
/// under any particular character encoding. The only byte which is disallowed
/// is the NUL byte. This restriction does not appear in RFC7468, but rather
/// is inspired by the OpenSSL PEM decoder.
///
/// Returns a slice which starts at the beginning of the encapsulated text.
///
/// From RFC7468:
/// > Data before the encapsulation boundaries are permitted, and
/// > parsers MUST NOT malfunction when processing such data.
pub(crate) fn strip_preamble(mut bytes: &[u8]) -> Result<&[u8]> {
    if bytes.starts_with(PRE_ENCAPSULATION_BOUNDARY) {
        return Ok(bytes);
    }

    while let Some((byte, remaining)) = bytes.split_first() {
        match *byte {
            CHAR_NUL => {
                return Err(Error::Preamble);
            }
            CHAR_LF if remaining.starts_with(PRE_ENCAPSULATION_BOUNDARY) => {
                return Ok(remaining);
            }
            _ => (),
        }

        bytes = remaining;
    }

    Err(Error::Preamble)
}

/// Strip a newline (`eol`) from the beginning of the provided byte slice.
///
/// The newline is considered mandatory and a decoding error will occur if it
/// is not present.
///
/// From RFC 7468 Section 3:
/// > lines are divided with CRLF, CR, or LF.
pub(crate) fn strip_leading_eol(bytes: &[u8]) -> Option<&[u8]> {
    match bytes {
        [CHAR_LF, rest @ ..] => Some(rest),
        [CHAR_CR, CHAR_LF, rest @ ..] => Some(rest),
        [CHAR_CR, rest @ ..] => Some(rest),
        _ => None,
    }
}

/// Strip a newline (`eol`) from the end of the provided byte slice.
///
/// The newline is considered mandatory and a decoding error will occur if it
/// is not present.
///
/// From RFC 7468 Section 3:
/// > lines are divided with CRLF, CR, or LF.
pub(crate) fn strip_trailing_eol(bytes: &[u8]) -> Option<&[u8]> {
    match bytes {
        [head @ .., CHAR_CR, CHAR_LF] => Some(head),
        [head @ .., CHAR_LF] => Some(head),
        [head @ .., CHAR_CR] => Some(head),
        _ => None,
    }
}

/// Split a slice beginning with a type label as located in an encapsulation
/// boundary. Returns the label as a `&str`, and slice beginning with the
/// encapsulated text with leading `-----` and newline removed.
///
/// This implementation follows the rules put forth in Section 2, which are
/// stricter than those found in the ABNF grammar:
///
/// > Labels are formally case-sensitive, uppercase, and comprised of zero or more
/// > characters; they do not contain consecutive spaces or hyphen-minuses,
/// > nor do they contain spaces or hyphen-minuses at either end.
///
/// We apply a slightly stricter interpretation:
/// - Labels MAY be empty
/// - Non-empty labels MUST start with an upper-case letter: `'A'..='Z'`
/// - The only allowable characters subsequently are `'A'..='Z'` or WSP.
///   (NOTE: this is an overly strict initial implementation and should be relaxed)
/// - Whitespace MUST NOT contain more than one consecutive WSP character
// TODO(tarcieri): evaluate whether this is too strict; support '-'
pub(crate) fn split_label(bytes: &[u8]) -> Option<(&str, &[u8])> {
    let mut n = 0usize;

    // TODO(tarcieri): handle hyphens in labels as well as spaces
    let mut last_was_wsp = false;

    for &char in bytes {
        // Validate character
        if is_labelchar(char) {
            last_was_wsp = false;
        } else if char == b'-' {
            // Possible start of encapsulation boundary delimiter
            break;
        } else if n != 0 && is_wsp(char) {
            // Repeated whitespace disallowed
            if last_was_wsp {
                return None;
            }

            last_was_wsp = true;
        } else {
            return None;
        }

        n = n.checked_add(1)?;
    }

    let (raw_label, rest) = bytes.split_at(n);
    let label = str::from_utf8(raw_label).ok()?;

    match rest {
        [b'-', b'-', b'-', b'-', b'-', body @ ..] => Some((label, strip_leading_eol(body)?)),
        _ => None,
    }
}

/// Validate that the given bytes are allowed as a PEM type label, i.e. the
/// label encoded in the `BEGIN` and `END` encapsulation boundaries.
pub(crate) fn validate_label(label: &[u8]) -> Result<()> {
    // TODO(tarcieri): handle hyphens in labels as well as spaces
    let mut last_was_wsp = false;

    for &char in label {
        if !is_allowed_in_label(char) {
            return Err(Error::Label);
        }

        if is_wsp(char) {
            // Double sequential whitespace characters disallowed
            if last_was_wsp {
                return Err(Error::Label);
            }

            last_was_wsp = true;
        } else {
            last_was_wsp = false;
        }
    }

    Ok(())
}

#[cfg(test)]
mod tests {
    use super::*;

    /// Empty label is OK.
    #[test]
    fn split_label_empty() {
        let (label, body) = split_label(b"-----\nBODY").unwrap();
        assert_eq!(label, "");
        assert_eq!(body, b"BODY");
    }

    /// Label containing text.
    #[test]
    fn split_label_with_text() {
        let (label, body) = split_label(b"PRIVATE KEY-----\nBODY").unwrap();
        assert_eq!(label, "PRIVATE KEY");
        assert_eq!(body, b"BODY");
    }

    /// Reject labels containing repeated spaces
    #[test]
    fn split_label_with_repeat_wsp_is_err() {
        assert!(split_label(b"PRIVATE  KEY-----\nBODY").is_none());
    }

    /// Basic validation of a label
    #[test]
    fn validate_private_key_label() {
        assert_eq!(validate_label(b"PRIVATE KEY"), Ok(()));
    }

    /// Reject labels with double spaces
    #[test]
    fn validate_private_key_label_reject_double_space() {
        assert_eq!(validate_label(b"PRIVATE  KEY"), Err(Error::Label));
    }
}