pem_rfc7468/grammar.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232
//! Helper functions and rules for enforcing the ABNF grammar for
//! RFC 7468-flavored PEM as described in Section 3.
//!
//! The grammar described below is intended to follow the "ABNF (Strict)"
//! subset of the grammar as described in Section 3 Figure 3.
use crate::{Error, Result, PRE_ENCAPSULATION_BOUNDARY};
use core::str;
/// NUL char
pub(crate) const CHAR_NUL: u8 = 0x00;
/// Horizontal tab
pub(crate) const CHAR_HT: u8 = 0x09;
/// Space
pub(crate) const CHAR_SP: u8 = 0x20;
/// Carriage return
pub(crate) const CHAR_CR: u8 = 0x0d;
/// Line feed
pub(crate) const CHAR_LF: u8 = 0x0a;
/// Colon ':'
pub(crate) const CHAR_COLON: u8 = 0x3A;
/// Any printable character except hyphen-minus, as defined in the
/// 'labelchar' production in the RFC 7468 ABNF grammar
pub(crate) fn is_labelchar(char: u8) -> bool {
matches!(char, 0x21..=0x2C | 0x2E..=0x7E)
}
/// Does the provided byte match a character allowed in a label?
// TODO: allow hyphen-minus to match the 'label' production in the ABNF grammar
pub(crate) fn is_allowed_in_label(char: u8) -> bool {
is_labelchar(char) || matches!(char, CHAR_HT | CHAR_SP)
}
/// Does the provided byte match the "WSP" ABNF production from Section 3?
///
/// > The common ABNF production WSP is congruent with "blank";
/// > a new production W is used for "whitespace"
pub(crate) fn is_wsp(char: u8) -> bool {
matches!(char, CHAR_HT | CHAR_SP)
}
/// Strip the "preamble", i.e. data that appears before the PEM
/// pre-encapsulation boundary.
///
/// Presently no attempt is made to ensure the preamble decodes successfully
/// under any particular character encoding. The only byte which is disallowed
/// is the NUL byte. This restriction does not appear in RFC7468, but rather
/// is inspired by the OpenSSL PEM decoder.
///
/// Returns a slice which starts at the beginning of the encapsulated text.
///
/// From RFC7468:
/// > Data before the encapsulation boundaries are permitted, and
/// > parsers MUST NOT malfunction when processing such data.
pub(crate) fn strip_preamble(mut bytes: &[u8]) -> Result<&[u8]> {
if bytes.starts_with(PRE_ENCAPSULATION_BOUNDARY) {
return Ok(bytes);
}
while let Some((byte, remaining)) = bytes.split_first() {
match *byte {
CHAR_NUL => {
return Err(Error::Preamble);
}
CHAR_LF if remaining.starts_with(PRE_ENCAPSULATION_BOUNDARY) => {
return Ok(remaining);
}
_ => (),
}
bytes = remaining;
}
Err(Error::Preamble)
}
/// Strip a newline (`eol`) from the beginning of the provided byte slice.
///
/// The newline is considered mandatory and a decoding error will occur if it
/// is not present.
///
/// From RFC 7468 Section 3:
/// > lines are divided with CRLF, CR, or LF.
pub(crate) fn strip_leading_eol(bytes: &[u8]) -> Option<&[u8]> {
match bytes {
[CHAR_LF, rest @ ..] => Some(rest),
[CHAR_CR, CHAR_LF, rest @ ..] => Some(rest),
[CHAR_CR, rest @ ..] => Some(rest),
_ => None,
}
}
/// Strip a newline (`eol`) from the end of the provided byte slice.
///
/// The newline is considered mandatory and a decoding error will occur if it
/// is not present.
///
/// From RFC 7468 Section 3:
/// > lines are divided with CRLF, CR, or LF.
pub(crate) fn strip_trailing_eol(bytes: &[u8]) -> Option<&[u8]> {
match bytes {
[head @ .., CHAR_CR, CHAR_LF] => Some(head),
[head @ .., CHAR_LF] => Some(head),
[head @ .., CHAR_CR] => Some(head),
_ => None,
}
}
/// Split a slice beginning with a type label as located in an encapsulation
/// boundary. Returns the label as a `&str`, and slice beginning with the
/// encapsulated text with leading `-----` and newline removed.
///
/// This implementation follows the rules put forth in Section 2, which are
/// stricter than those found in the ABNF grammar:
///
/// > Labels are formally case-sensitive, uppercase, and comprised of zero or more
/// > characters; they do not contain consecutive spaces or hyphen-minuses,
/// > nor do they contain spaces or hyphen-minuses at either end.
///
/// We apply a slightly stricter interpretation:
/// - Labels MAY be empty
/// - Non-empty labels MUST start with an upper-case letter: `'A'..='Z'`
/// - The only allowable characters subsequently are `'A'..='Z'` or WSP.
/// (NOTE: this is an overly strict initial implementation and should be relaxed)
/// - Whitespace MUST NOT contain more than one consecutive WSP character
// TODO(tarcieri): evaluate whether this is too strict; support '-'
pub(crate) fn split_label(bytes: &[u8]) -> Option<(&str, &[u8])> {
let mut n = 0usize;
// TODO(tarcieri): handle hyphens in labels as well as spaces
let mut last_was_wsp = false;
for &char in bytes {
// Validate character
if is_labelchar(char) {
last_was_wsp = false;
} else if char == b'-' {
// Possible start of encapsulation boundary delimiter
break;
} else if n != 0 && is_wsp(char) {
// Repeated whitespace disallowed
if last_was_wsp {
return None;
}
last_was_wsp = true;
} else {
return None;
}
n = n.checked_add(1)?;
}
let (raw_label, rest) = bytes.split_at(n);
let label = str::from_utf8(raw_label).ok()?;
match rest {
[b'-', b'-', b'-', b'-', b'-', body @ ..] => Some((label, strip_leading_eol(body)?)),
_ => None,
}
}
/// Validate that the given bytes are allowed as a PEM type label, i.e. the
/// label encoded in the `BEGIN` and `END` encapsulation boundaries.
pub(crate) fn validate_label(label: &[u8]) -> Result<()> {
// TODO(tarcieri): handle hyphens in labels as well as spaces
let mut last_was_wsp = false;
for &char in label {
if !is_allowed_in_label(char) {
return Err(Error::Label);
}
if is_wsp(char) {
// Double sequential whitespace characters disallowed
if last_was_wsp {
return Err(Error::Label);
}
last_was_wsp = true;
} else {
last_was_wsp = false;
}
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
/// Empty label is OK.
#[test]
fn split_label_empty() {
let (label, body) = split_label(b"-----\nBODY").unwrap();
assert_eq!(label, "");
assert_eq!(body, b"BODY");
}
/// Label containing text.
#[test]
fn split_label_with_text() {
let (label, body) = split_label(b"PRIVATE KEY-----\nBODY").unwrap();
assert_eq!(label, "PRIVATE KEY");
assert_eq!(body, b"BODY");
}
/// Reject labels containing repeated spaces
#[test]
fn split_label_with_repeat_wsp_is_err() {
assert!(split_label(b"PRIVATE KEY-----\nBODY").is_none());
}
/// Basic validation of a label
#[test]
fn validate_private_key_label() {
assert_eq!(validate_label(b"PRIVATE KEY"), Ok(()));
}
/// Reject labels with double spaces
#[test]
fn validate_private_key_label_reject_double_space() {
assert_eq!(validate_label(b"PRIVATE KEY"), Err(Error::Label));
}
}