xml/escape.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
//! Contains functions for performing XML special characters escaping.
use std::borrow::Cow;
enum Value {
Char(char),
Str(&'static str)
}
impl Value {
fn dispatch_for_attribute(c: char) -> Value {
match c {
'<' => Value::Str("<"),
'>' => Value::Str(">"),
'"' => Value::Str("""),
'\'' => Value::Str("'"),
'&' => Value::Str("&"),
'\n' => Value::Str("
"),
'\r' => Value::Str("
"),
_ => Value::Char(c)
}
}
fn dispatch_for_pcdata(c: char) -> Value {
match c {
'<' => Value::Str("<"),
'&' => Value::Str("&"),
_ => Value::Char(c)
}
}
}
enum Process<'a> {
Borrowed(&'a str),
Owned(String)
}
impl<'a> Process<'a> {
fn process(&mut self, (i, next): (usize, Value)) {
match next {
Value::Str(s) => match *self {
Process::Owned(ref mut o) => o.push_str(s),
Process::Borrowed(b) => {
let mut r = String::with_capacity(b.len() + s.len());
r.push_str(&b[..i]);
r.push_str(s);
*self = Process::Owned(r);
}
},
Value::Char(c) => match *self {
Process::Borrowed(_) => {}
Process::Owned(ref mut o) => o.push(c)
}
}
}
fn into_result(self) -> Cow<'a, str> {
match self {
Process::Borrowed(b) => Cow::Borrowed(b),
Process::Owned(o) => Cow::Owned(o)
}
}
}
impl<'a> Extend<(usize, Value)> for Process<'a> {
fn extend<I: IntoIterator<Item=(usize, Value)>>(&mut self, it: I) {
for v in it.into_iter() {
self.process(v);
}
}
}
fn escape_str(s: &str, dispatch: fn(char) -> Value) -> Cow<str> {
let mut p = Process::Borrowed(s);
p.extend(s.char_indices().map(|(ind, c)| (ind, dispatch(c))));
p.into_result()
}
/// Performs escaping of common XML characters inside an attribute value.
///
/// This function replaces several important markup characters with their
/// entity equivalents:
///
/// * `<` → `<`
/// * `>` → `>`
/// * `"` → `"`
/// * `'` → `'`
/// * `&` → `&`
///
/// The resulting string is safe to use inside XML attribute values or in PCDATA sections.
///
/// Does not perform allocations if the given string does not contain escapable characters.
#[inline]
pub fn escape_str_attribute(s: &str) -> Cow<str> {
escape_str(s, Value::dispatch_for_attribute)
}
/// Performs escaping of common XML characters inside PCDATA.
///
/// This function replaces several important markup characters with their
/// entity equivalents:
///
/// * `<` → `<`
/// * `&` → `&`
///
/// The resulting string is safe to use inside PCDATA sections but NOT inside attribute values.
///
/// Does not perform allocations if the given string does not contain escapable characters.
#[inline]
pub fn escape_str_pcdata(s: &str) -> Cow<str> {
escape_str(s, Value::dispatch_for_pcdata)
}
#[cfg(test)]
mod tests {
use super::{escape_str_pcdata, escape_str_attribute};
// TODO: add more tests
#[test]
fn test_escape_multibyte_code_points() {
assert_eq!(escape_str_attribute("☃<"), "☃<");
assert_eq!(escape_str_pcdata("☃<"), "☃<");
}
}