pem_rfc7468/
grammar.rs

1//! Helper functions and rules for enforcing the ABNF grammar for
2//! RFC 7468-flavored PEM as described in Section 3.
3//!
4//! The grammar described below is intended to follow the "ABNF (Strict)"
5//! subset of the grammar as described in Section 3 Figure 3.
6
7use crate::{Error, Result, PRE_ENCAPSULATION_BOUNDARY};
8use core::str;
9
10/// NUL char
11pub(crate) const CHAR_NUL: u8 = 0x00;
12
13/// Horizontal tab
14pub(crate) const CHAR_HT: u8 = 0x09;
15
16/// Space
17pub(crate) const CHAR_SP: u8 = 0x20;
18
19/// Carriage return
20pub(crate) const CHAR_CR: u8 = 0x0d;
21
22/// Line feed
23pub(crate) const CHAR_LF: u8 = 0x0a;
24
25/// Colon ':'
26pub(crate) const CHAR_COLON: u8 = 0x3A;
27
28/// Any printable character except hyphen-minus, as defined in the
29/// 'labelchar' production in the RFC 7468 ABNF grammar
30pub(crate) fn is_labelchar(char: u8) -> bool {
31    matches!(char, 0x21..=0x2C | 0x2E..=0x7E)
32}
33
34/// Does the provided byte match a character allowed in a label?
35// TODO: allow hyphen-minus to match the 'label' production in the ABNF grammar
36pub(crate) fn is_allowed_in_label(char: u8) -> bool {
37    is_labelchar(char) || matches!(char, CHAR_HT | CHAR_SP)
38}
39
40/// Does the provided byte match the "WSP" ABNF production from Section 3?
41///
42/// > The common ABNF production WSP is congruent with "blank";
43/// > a new production W is used for "whitespace"
44pub(crate) fn is_wsp(char: u8) -> bool {
45    matches!(char, CHAR_HT | CHAR_SP)
46}
47
48/// Strip the "preamble", i.e. data that appears before the PEM
49/// pre-encapsulation boundary.
50///
51/// Presently no attempt is made to ensure the preamble decodes successfully
52/// under any particular character encoding. The only byte which is disallowed
53/// is the NUL byte. This restriction does not appear in RFC7468, but rather
54/// is inspired by the OpenSSL PEM decoder.
55///
56/// Returns a slice which starts at the beginning of the encapsulated text.
57///
58/// From RFC7468:
59/// > Data before the encapsulation boundaries are permitted, and
60/// > parsers MUST NOT malfunction when processing such data.
61pub(crate) fn strip_preamble(mut bytes: &[u8]) -> Result<&[u8]> {
62    if bytes.starts_with(PRE_ENCAPSULATION_BOUNDARY) {
63        return Ok(bytes);
64    }
65
66    while let Some((byte, remaining)) = bytes.split_first() {
67        match *byte {
68            CHAR_NUL => {
69                return Err(Error::Preamble);
70            }
71            CHAR_LF if remaining.starts_with(PRE_ENCAPSULATION_BOUNDARY) => {
72                return Ok(remaining);
73            }
74            _ => (),
75        }
76
77        bytes = remaining;
78    }
79
80    Err(Error::Preamble)
81}
82
83/// Strip a newline (`eol`) from the beginning of the provided byte slice.
84///
85/// The newline is considered mandatory and a decoding error will occur if it
86/// is not present.
87///
88/// From RFC 7468 Section 3:
89/// > lines are divided with CRLF, CR, or LF.
90pub(crate) fn strip_leading_eol(bytes: &[u8]) -> Option<&[u8]> {
91    match bytes {
92        [CHAR_LF, rest @ ..] => Some(rest),
93        [CHAR_CR, CHAR_LF, rest @ ..] => Some(rest),
94        [CHAR_CR, rest @ ..] => Some(rest),
95        _ => None,
96    }
97}
98
99/// Strip a newline (`eol`) from the end of the provided byte slice.
100///
101/// The newline is considered mandatory and a decoding error will occur if it
102/// is not present.
103///
104/// From RFC 7468 Section 3:
105/// > lines are divided with CRLF, CR, or LF.
106pub(crate) fn strip_trailing_eol(bytes: &[u8]) -> Option<&[u8]> {
107    match bytes {
108        [head @ .., CHAR_CR, CHAR_LF] => Some(head),
109        [head @ .., CHAR_LF] => Some(head),
110        [head @ .., CHAR_CR] => Some(head),
111        _ => None,
112    }
113}
114
115/// Split a slice beginning with a type label as located in an encapsulation
116/// boundary. Returns the label as a `&str`, and slice beginning with the
117/// encapsulated text with leading `-----` and newline removed.
118///
119/// This implementation follows the rules put forth in Section 2, which are
120/// stricter than those found in the ABNF grammar:
121///
122/// > Labels are formally case-sensitive, uppercase, and comprised of zero or more
123/// > characters; they do not contain consecutive spaces or hyphen-minuses,
124/// > nor do they contain spaces or hyphen-minuses at either end.
125///
126/// We apply a slightly stricter interpretation:
127/// - Labels MAY be empty
128/// - Non-empty labels MUST start with an upper-case letter: `'A'..='Z'`
129/// - The only allowable characters subsequently are `'A'..='Z'` or WSP.
130///   (NOTE: this is an overly strict initial implementation and should be relaxed)
131/// - Whitespace MUST NOT contain more than one consecutive WSP character
132// TODO(tarcieri): evaluate whether this is too strict; support '-'
133pub(crate) fn split_label(bytes: &[u8]) -> Option<(&str, &[u8])> {
134    let mut n = 0usize;
135
136    // TODO(tarcieri): handle hyphens in labels as well as spaces
137    let mut last_was_wsp = false;
138
139    for &char in bytes {
140        // Validate character
141        if is_labelchar(char) {
142            last_was_wsp = false;
143        } else if char == b'-' {
144            // Possible start of encapsulation boundary delimiter
145            break;
146        } else if n != 0 && is_wsp(char) {
147            // Repeated whitespace disallowed
148            if last_was_wsp {
149                return None;
150            }
151
152            last_was_wsp = true;
153        } else {
154            return None;
155        }
156
157        n = n.checked_add(1)?;
158    }
159
160    let (raw_label, rest) = bytes.split_at(n);
161    let label = str::from_utf8(raw_label).ok()?;
162
163    match rest {
164        [b'-', b'-', b'-', b'-', b'-', body @ ..] => Some((label, strip_leading_eol(body)?)),
165        _ => None,
166    }
167}
168
169/// Validate that the given bytes are allowed as a PEM type label, i.e. the
170/// label encoded in the `BEGIN` and `END` encapsulation boundaries.
171pub(crate) fn validate_label(label: &[u8]) -> Result<()> {
172    // TODO(tarcieri): handle hyphens in labels as well as spaces
173    let mut last_was_wsp = false;
174
175    for &char in label {
176        if !is_allowed_in_label(char) {
177            return Err(Error::Label);
178        }
179
180        if is_wsp(char) {
181            // Double sequential whitespace characters disallowed
182            if last_was_wsp {
183                return Err(Error::Label);
184            }
185
186            last_was_wsp = true;
187        } else {
188            last_was_wsp = false;
189        }
190    }
191
192    Ok(())
193}
194
195#[cfg(test)]
196mod tests {
197    use super::*;
198
199    /// Empty label is OK.
200    #[test]
201    fn split_label_empty() {
202        let (label, body) = split_label(b"-----\nBODY").unwrap();
203        assert_eq!(label, "");
204        assert_eq!(body, b"BODY");
205    }
206
207    /// Label containing text.
208    #[test]
209    fn split_label_with_text() {
210        let (label, body) = split_label(b"PRIVATE KEY-----\nBODY").unwrap();
211        assert_eq!(label, "PRIVATE KEY");
212        assert_eq!(body, b"BODY");
213    }
214
215    /// Reject labels containing repeated spaces
216    #[test]
217    fn split_label_with_repeat_wsp_is_err() {
218        assert!(split_label(b"PRIVATE  KEY-----\nBODY").is_none());
219    }
220
221    /// Basic validation of a label
222    #[test]
223    fn validate_private_key_label() {
224        assert_eq!(validate_label(b"PRIVATE KEY"), Ok(()));
225    }
226
227    /// Reject labels with double spaces
228    #[test]
229    fn validate_private_key_label_reject_double_space() {
230        assert_eq!(validate_label(b"PRIVATE  KEY"), Err(Error::Label));
231    }
232}