1#[derive(Clone, Debug)]
9pub struct EscapeBytes<'a> {
10 remaining: &'a [u8],
11 state: EscapeState,
12}
13
14impl<'a> EscapeBytes<'a> {
15 pub(crate) fn new(bytes: &'a [u8]) -> EscapeBytes {
16 EscapeBytes { remaining: bytes, state: EscapeState::Start }
17 }
18}
19
20impl<'a> Iterator for EscapeBytes<'a> {
21 type Item = char;
22
23 #[inline]
24 fn next(&mut self) -> Option<char> {
25 use self::EscapeState::*;
26
27 match self.state {
28 Start => {
29 let byte = match crate::decode_utf8(self.remaining) {
30 (None, 0) => return None,
31 (None, _) | (Some(_), 1) => {
36 let byte = self.remaining[0];
37 self.remaining = &self.remaining[1..];
38 byte
39 }
40 (Some(ch), size) => {
43 self.remaining = &self.remaining[size..];
44 return Some(ch);
45 }
46 };
47 self.state = match byte {
48 0x21..=0x5B | 0x5D..=0x7E => {
49 return Some(char::from(byte))
50 }
51 b'\0' => SpecialEscape('0'),
52 b'\n' => SpecialEscape('n'),
53 b'\r' => SpecialEscape('r'),
54 b'\t' => SpecialEscape('t'),
55 b'\\' => SpecialEscape('\\'),
56 _ => HexEscapeX(byte),
57 };
58 Some('\\')
59 }
60 SpecialEscape(ch) => {
61 self.state = Start;
62 Some(ch)
63 }
64 HexEscapeX(byte) => {
65 self.state = HexEscapeHighNybble(byte);
66 Some('x')
67 }
68 HexEscapeHighNybble(byte) => {
69 self.state = HexEscapeLowNybble(byte);
70 let nybble = byte >> 4;
71 Some(hexdigit_to_char(nybble))
72 }
73 HexEscapeLowNybble(byte) => {
74 self.state = Start;
75 let nybble = byte & 0xF;
76 Some(hexdigit_to_char(nybble))
77 }
78 }
79 }
80}
81
82impl<'a> core::fmt::Display for EscapeBytes<'a> {
83 fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
84 use core::fmt::Write;
85 for ch in self.clone() {
86 f.write_char(ch)?;
87 }
88 Ok(())
89 }
90}
91
92#[derive(Clone, Debug)]
94enum EscapeState {
95 Start,
104 SpecialEscape(char),
107 HexEscapeX(u8),
110 HexEscapeHighNybble(u8),
114 HexEscapeLowNybble(u8),
118}
119
120#[derive(Clone, Debug)]
132#[cfg(feature = "alloc")]
133pub(crate) struct UnescapeBytes<I> {
134 it: I,
135 state: UnescapeState,
136}
137
138#[cfg(feature = "alloc")]
139impl<I: Iterator<Item = char>> UnescapeBytes<I> {
140 pub(crate) fn new<T: IntoIterator<IntoIter = I>>(
141 t: T,
142 ) -> UnescapeBytes<I> {
143 UnescapeBytes { it: t.into_iter(), state: UnescapeState::Start }
144 }
145}
146
147#[cfg(feature = "alloc")]
148impl<I: Iterator<Item = char>> Iterator for UnescapeBytes<I> {
149 type Item = u8;
150
151 fn next(&mut self) -> Option<u8> {
152 use self::UnescapeState::*;
153
154 loop {
155 match self.state {
156 Start => {
157 let ch = self.it.next()?;
158 match ch {
159 '\\' => {
160 self.state = Escape;
161 }
162 ch => {
163 self.state = UnescapeState::bytes(&[], ch);
164 }
165 }
166 }
167 Bytes { buf, mut cur, len } => {
168 let byte = buf[cur];
169 cur += 1;
170 if cur >= len {
171 self.state = Start;
172 } else {
173 self.state = Bytes { buf, cur, len };
174 }
175 return Some(byte);
176 }
177 Escape => {
178 let ch = match self.it.next() {
179 Some(ch) => ch,
180 None => {
181 self.state = Start;
182 return Some(b'\\');
185 }
186 };
187 match ch {
188 '0' => {
189 self.state = Start;
190 return Some(b'\x00');
191 }
192 '\\' => {
193 self.state = Start;
194 return Some(b'\\');
195 }
196 'r' => {
197 self.state = Start;
198 return Some(b'\r');
199 }
200 'n' => {
201 self.state = Start;
202 return Some(b'\n');
203 }
204 't' => {
205 self.state = Start;
206 return Some(b'\t');
207 }
208 'x' => {
209 self.state = HexFirst;
210 }
211 ch => {
212 self.state = UnescapeState::bytes(&[b'\\'], ch);
214 }
215 }
216 }
217 HexFirst => {
218 let ch = match self.it.next() {
219 Some(ch) => ch,
220 None => {
221 self.state = UnescapeState::bytes_raw(&[b'x']);
224 return Some(b'\\');
225 }
226 };
227 match ch {
228 '0'..='9' | 'A'..='F' | 'a'..='f' => {
229 self.state = HexSecond(ch);
230 }
231 ch => {
232 self.state = UnescapeState::bytes(&[b'x'], ch);
234 return Some(b'\\');
235 }
236 }
237 }
238 HexSecond(first) => {
239 let second = match self.it.next() {
240 Some(ch) => ch,
241 None => {
242 self.state = UnescapeState::bytes(&[b'x'], first);
245 return Some(b'\\');
246 }
247 };
248 match second {
249 '0'..='9' | 'A'..='F' | 'a'..='f' => {
250 self.state = Start;
251 let hinybble = char_to_hexdigit(first);
252 let lonybble = char_to_hexdigit(second);
253 let byte = hinybble << 4 | lonybble;
254 return Some(byte);
255 }
256 ch => {
257 self.state =
259 UnescapeState::bytes2(&[b'x'], first, ch);
260 return Some(b'\\');
261 }
262 }
263 }
264 }
265 }
266 }
267}
268
269#[derive(Clone, Debug)]
271#[cfg(feature = "alloc")]
272enum UnescapeState {
273 Start,
276 Bytes { buf: [u8; 11], cur: usize, len: usize },
281 Escape,
283 HexFirst,
285 HexSecond(char),
288}
289
290#[cfg(feature = "alloc")]
291impl UnescapeState {
292 fn bytes_raw(bytes: &[u8]) -> UnescapeState {
298 assert!(bytes.len() <= 11, "no more than 11 bytes allowed");
301 let mut buf = [0; 11];
302 buf[..bytes.len()].copy_from_slice(bytes);
303 UnescapeState::Bytes { buf, cur: 0, len: bytes.len() }
304 }
305
306 fn bytes(prefix: &[u8], ch: char) -> UnescapeState {
313 assert!(prefix.len() <= 3, "no more than 3 bytes allowed");
316 let mut buf = [0; 11];
317 buf[..prefix.len()].copy_from_slice(prefix);
318 let chlen = ch.encode_utf8(&mut buf[prefix.len()..]).len();
319 UnescapeState::Bytes { buf, cur: 0, len: prefix.len() + chlen }
320 }
321
322 fn bytes2(prefix: &[u8], ch1: char, ch2: char) -> UnescapeState {
329 assert!(prefix.len() <= 3, "no more than 3 bytes allowed");
332 let mut buf = [0; 11];
333 buf[..prefix.len()].copy_from_slice(prefix);
334 let len1 = ch1.encode_utf8(&mut buf[prefix.len()..]).len();
335 let len2 = ch2.encode_utf8(&mut buf[prefix.len() + len1..]).len();
336 UnescapeState::Bytes { buf, cur: 0, len: prefix.len() + len1 + len2 }
337 }
338}
339
340#[cfg(feature = "alloc")]
346fn char_to_hexdigit(ch: char) -> u8 {
347 u8::try_from(ch.to_digit(16).unwrap()).unwrap()
348}
349
350fn hexdigit_to_char(digit: u8) -> char {
356 char::from_digit(u32::from(digit), 16).unwrap().to_ascii_uppercase()
357}
358
359#[cfg(all(test, feature = "std"))]
360mod tests {
361 use crate::BString;
362
363 use super::*;
364
365 #[allow(non_snake_case)]
366 fn B<B: AsRef<[u8]>>(bytes: B) -> BString {
367 BString::from(bytes.as_ref())
368 }
369
370 fn e<B: AsRef<[u8]>>(bytes: B) -> String {
371 EscapeBytes::new(bytes.as_ref()).to_string()
372 }
373
374 fn u(string: &str) -> BString {
375 UnescapeBytes::new(string.chars()).collect()
376 }
377
378 #[test]
379 fn escape() {
380 assert_eq!(r"a", e(br"a"));
381 assert_eq!(r"\\x61", e(br"\x61"));
382 assert_eq!(r"a", e(b"\x61"));
383 assert_eq!(r"~", e(b"\x7E"));
384 assert_eq!(r"\x7F", e(b"\x7F"));
385
386 assert_eq!(r"\n", e(b"\n"));
387 assert_eq!(r"\r", e(b"\r"));
388 assert_eq!(r"\t", e(b"\t"));
389 assert_eq!(r"\\", e(b"\\"));
390 assert_eq!(r"\0", e(b"\0"));
391 assert_eq!(r"\0", e(b"\x00"));
392
393 assert_eq!(r"\x88", e(b"\x88"));
394 assert_eq!(r"\x8F", e(b"\x8F"));
395 assert_eq!(r"\xF8", e(b"\xF8"));
396 assert_eq!(r"\xFF", e(b"\xFF"));
397
398 assert_eq!(r"\xE2", e(b"\xE2"));
399 assert_eq!(r"\xE2\x98", e(b"\xE2\x98"));
400 assert_eq!(r"☃", e(b"\xE2\x98\x83"));
401
402 assert_eq!(r"\xF0", e(b"\xF0"));
403 assert_eq!(r"\xF0\x9F", e(b"\xF0\x9F"));
404 assert_eq!(r"\xF0\x9F\x92", e(b"\xF0\x9F\x92"));
405 assert_eq!(r"💩", e(b"\xF0\x9F\x92\xA9"));
406 }
407
408 #[test]
409 fn unescape() {
410 assert_eq!(B(r"a"), u(r"a"));
411 assert_eq!(B(r"\x61"), u(r"\\x61"));
412 assert_eq!(B(r"a"), u(r"\x61"));
413 assert_eq!(B(r"~"), u(r"\x7E"));
414 assert_eq!(B(b"\x7F"), u(r"\x7F"));
415
416 assert_eq!(B(b"\n"), u(r"\n"));
417 assert_eq!(B(b"\r"), u(r"\r"));
418 assert_eq!(B(b"\t"), u(r"\t"));
419 assert_eq!(B(b"\\"), u(r"\\"));
420 assert_eq!(B(b"\0"), u(r"\0"));
421 assert_eq!(B(b"\0"), u(r"\x00"));
422
423 assert_eq!(B(b"\x88"), u(r"\x88"));
424 assert_eq!(B(b"\x8F"), u(r"\x8F"));
425 assert_eq!(B(b"\xF8"), u(r"\xF8"));
426 assert_eq!(B(b"\xFF"), u(r"\xFF"));
427
428 assert_eq!(B(b"\xE2"), u(r"\xE2"));
429 assert_eq!(B(b"\xE2\x98"), u(r"\xE2\x98"));
430 assert_eq!(B("☃"), u(r"\xE2\x98\x83"));
431
432 assert_eq!(B(b"\xF0"), u(r"\xf0"));
433 assert_eq!(B(b"\xF0\x9F"), u(r"\xf0\x9f"));
434 assert_eq!(B(b"\xF0\x9F\x92"), u(r"\xf0\x9f\x92"));
435 assert_eq!(B("💩"), u(r"\xf0\x9f\x92\xa9"));
436 }
437
438 #[test]
439 fn unescape_weird() {
440 assert_eq!(B(b"\\"), u(r"\"));
441 assert_eq!(B(b"\\"), u(r"\\"));
442 assert_eq!(B(b"\\x"), u(r"\x"));
443 assert_eq!(B(b"\\xA"), u(r"\xA"));
444
445 assert_eq!(B(b"\\xZ"), u(r"\xZ"));
446 assert_eq!(B(b"\\xZZ"), u(r"\xZZ"));
447 assert_eq!(B(b"\\i"), u(r"\i"));
448 assert_eq!(B(b"\\u"), u(r"\u"));
449 assert_eq!(B(b"\\u{2603}"), u(r"\u{2603}"));
450 }
451}