1use std::fmt;
6use std::collections::VecDeque;
7use std::io::Read;
8use std::result;
9use std::borrow::Cow;
10
11use common::{Position, TextPosition, is_whitespace_char, is_name_char};
12use reader::Error;
13use util;
14
15#[derive(Copy, Clone, PartialEq, Eq, Debug)]
18pub enum Token {
19 ProcessingInstructionStart,
21 ProcessingInstructionEnd,
23 DoctypeStart,
25 OpeningTagStart,
27 ClosingTagStart,
29 TagEnd,
31 EmptyTagEnd,
33 CommentStart,
35 CommentEnd,
37 Chunk(&'static str),
39 Character(char),
41 Whitespace(char),
43 EqualsSign,
45 SingleQuote,
47 DoubleQuote,
49 CDataStart,
51 CDataEnd,
53 ReferenceStart,
55 ReferenceEnd,
57}
58
59impl fmt::Display for Token {
60 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
61 match *self {
62 Token::Chunk(s) => write!(f, "{}", s),
63 Token::Character(c) | Token::Whitespace(c) => write!(f, "{}", c),
64 other => write!(f, "{}", match other {
65 Token::OpeningTagStart => "<",
66 Token::ProcessingInstructionStart => "<?",
67 Token::DoctypeStart => "<!DOCTYPE",
68 Token::ClosingTagStart => "</",
69 Token::CommentStart => "<!--",
70 Token::CDataStart => "<![CDATA[",
71 Token::TagEnd => ">",
72 Token::EmptyTagEnd => "/>",
73 Token::ProcessingInstructionEnd => "?>",
74 Token::CommentEnd => "-->",
75 Token::CDataEnd => "]]>",
76 Token::ReferenceStart => "&",
77 Token::ReferenceEnd => ";",
78 Token::EqualsSign => "=",
79 Token::SingleQuote => "'",
80 Token::DoubleQuote => "\"",
81 _ => unreachable!()
82 })
83 }
84 }
85}
86
87impl Token {
88 pub fn as_static_str(&self) -> Option<&'static str> {
89 match *self {
90 Token::OpeningTagStart => Some("<"),
91 Token::ProcessingInstructionStart => Some("<?"),
92 Token::DoctypeStart => Some("<!DOCTYPE"),
93 Token::ClosingTagStart => Some("</"),
94 Token::CommentStart => Some("<!--"),
95 Token::CDataStart => Some("<![CDATA["),
96 Token::TagEnd => Some(">"),
97 Token::EmptyTagEnd => Some("/>"),
98 Token::ProcessingInstructionEnd => Some("?>"),
99 Token::CommentEnd => Some("-->"),
100 Token::CDataEnd => Some("]]>"),
101 Token::ReferenceStart => Some("&"),
102 Token::ReferenceEnd => Some(";"),
103 Token::EqualsSign => Some("="),
104 Token::SingleQuote => Some("'"),
105 Token::DoubleQuote => Some("\""),
106 Token::Chunk(s) => Some(s),
107 _ => None
108 }
109 }
110
111 pub fn push_to_string(&self, target: &mut String) {
113 match self.as_static_str() {
114 Some(s) => { target.push_str(s); }
115 None => {
116 match *self {
117 Token::Character(c) | Token::Whitespace(c) => target.push(c),
118 _ => unreachable!()
119 }
120 }
121 }
122 }
123
124 #[inline]
127 pub fn contains_char_data(&self) -> bool {
128 match *self {
129 Token::Whitespace(_) | Token::Chunk(_) | Token::Character(_) | Token::CommentEnd |
130 Token::TagEnd | Token::EqualsSign | Token::DoubleQuote | Token::SingleQuote => true,
131 _ => false
132 }
133 }
134
135 #[inline]
137 pub fn is_whitespace(&self) -> bool {
138 match *self {
139 Token::Whitespace(_) => true,
140 _ => false
141 }
142 }
143}
144
145enum State {
146 TagStarted,
148 CommentOrCDataOrDoctypeStarted,
150 CommentStarted,
152 DoctypeStarted(DoctypeStartedSubstate),
154 CDataStarted(CDataStartedSubstate),
156 ProcessingInstructionClosing,
158 EmptyTagClosing,
160 CommentClosing(ClosingSubstate),
162 CDataClosing(ClosingSubstate),
164 Normal
166}
167
168#[derive(Copy, Clone)]
169enum ClosingSubstate {
170 First, Second
171}
172
173#[derive(Copy, Clone)]
174enum DoctypeStartedSubstate {
175 D, DO, DOC, DOCT, DOCTY, DOCTYP
176}
177
178#[derive(Copy, Clone)]
179enum CDataStartedSubstate {
180 E, C, CD, CDA, CDAT, CDATA
181}
182
183pub type Result = result::Result<Option<Token>, Error>;
185
186macro_rules! dispatch_on_enum_state(
189 ($_self:ident, $s:expr, $c:expr, $is:expr,
190 $($st:ident; $stc:expr ; $next_st:ident ; $chunk:expr),+;
191 $end_st:ident ; $end_c:expr ; $end_chunk:expr ; $e:expr) => (
192 match $s {
193 $(
194 $st => match $c {
195 $stc => $_self.move_to($is($next_st)),
196 _ => $_self.handle_error($chunk, $c)
197 },
198 )+
199 $end_st => match $c {
200 $end_c => $e,
201 _ => $_self.handle_error($end_chunk, $c)
202 }
203 }
204 )
205);
206
207pub struct Lexer {
217 pos: TextPosition,
218 head_pos: TextPosition,
219 char_queue: VecDeque<char>,
220 st: State,
221 skip_errors: bool,
222 inside_comment: bool,
223 inside_token: bool,
224 eof_handled: bool
225}
226
227impl Position for Lexer {
228 #[inline]
229 fn position(&self) -> TextPosition { self.pos }
231}
232
233impl Lexer {
234 pub fn new() -> Lexer {
236 Lexer {
237 pos: TextPosition::new(),
238 head_pos: TextPosition::new(),
239 char_queue: VecDeque::with_capacity(4), st: State::Normal,
241 skip_errors: false,
242 inside_comment: false,
243 inside_token: false,
244 eof_handled: false
245 }
246 }
247
248 #[inline]
251 pub fn enable_errors(&mut self) { self.skip_errors = false; }
252
253 #[inline]
256 pub fn disable_errors(&mut self) { self.skip_errors = true; }
257
258 #[inline]
261 pub fn inside_comment(&mut self) { self.inside_comment = true; }
262
263 #[inline]
265 pub fn outside_comment(&mut self) { self.inside_comment = false; }
266
267 #[inline]
269 pub fn reset_eof_handled(&mut self) { self.eof_handled = false; }
270
271 pub fn next_token<B: Read>(&mut self, b: &mut B) -> Result {
281 if self.eof_handled {
283 return Ok(None);
284 }
285
286 if !self.inside_token {
287 self.pos = self.head_pos;
288 self.inside_token = true;
289 }
290
291 while let Some(c) = self.char_queue.pop_front() {
293 match try!(self.read_next_token(c)) {
294 Some(t) => {
295 self.inside_token = false;
296 return Ok(Some(t));
297 }
298 None => {} }
300 }
301
302 loop {
303 let c = match try!(util::next_char_from(b)) {
305 Some(c) => c, None => break, };
308
309 match try!(self.read_next_token(c)) {
310 Some(t) => {
311 self.inside_token = false;
312 return Ok(Some(t));
313 }
314 None => {
315 }
317 }
318 }
319
320 self.eof_handled = true;
322 self.pos = self.head_pos;
323 match self.st {
324 State::TagStarted | State::CommentOrCDataOrDoctypeStarted |
325 State::CommentStarted | State::CDataStarted(_)| State::DoctypeStarted(_) |
326 State::CommentClosing(ClosingSubstate::Second) =>
327 Err(self.error("Unexpected end of stream")),
328 State::ProcessingInstructionClosing =>
329 Ok(Some(Token::Character('?'))),
330 State::EmptyTagClosing =>
331 Ok(Some(Token::Character('/'))),
332 State::CommentClosing(ClosingSubstate::First) =>
333 Ok(Some(Token::Character('-'))),
334 State::CDataClosing(ClosingSubstate::First) =>
335 Ok(Some(Token::Character(']'))),
336 State::CDataClosing(ClosingSubstate::Second) =>
337 Ok(Some(Token::Chunk("]]"))),
338 State::Normal =>
339 Ok(None)
340 }
341 }
342
343 #[inline]
344 fn error<M: Into<Cow<'static, str>>>(&self, msg: M) -> Error {
345 (self, msg).into()
346 }
347
348 #[inline]
349 fn read_next_token(&mut self, c: char) -> Result {
350 let res = self.dispatch_char(c);
351 if self.char_queue.is_empty() {
352 if c == '\n' {
353 self.head_pos.new_line();
354 } else {
355 self.head_pos.advance(1);
356 }
357 }
358 res
359 }
360
361 fn dispatch_char(&mut self, c: char) -> Result {
362 match self.st {
363 State::Normal => self.normal(c),
364 State::TagStarted => self.tag_opened(c),
365 State::CommentOrCDataOrDoctypeStarted => self.comment_or_cdata_or_doctype_started(c),
366 State::CommentStarted => self.comment_started(c),
367 State::CDataStarted(s) => self.cdata_started(c, s),
368 State::DoctypeStarted(s) => self.doctype_started(c, s),
369 State::ProcessingInstructionClosing => self.processing_instruction_closing(c),
370 State::EmptyTagClosing => self.empty_element_closing(c),
371 State::CommentClosing(s) => self.comment_closing(c, s),
372 State::CDataClosing(s) => self.cdata_closing(c, s)
373 }
374 }
375
376 #[inline]
377 fn move_to(&mut self, st: State) -> Result {
378 self.st = st;
379 Ok(None)
380 }
381
382 #[inline]
383 fn move_to_with(&mut self, st: State, token: Token) -> Result {
384 self.st = st;
385 Ok(Some(token))
386 }
387
388 #[inline]
389 fn move_to_with_unread(&mut self, st: State, cs: &[char], token: Token) -> Result {
390 self.char_queue.extend(cs.iter().cloned());
391 self.move_to_with(st, token)
392 }
393
394 fn handle_error(&mut self, chunk: &'static str, c: char) -> Result {
395 self.char_queue.push_back(c);
396 if self.skip_errors || (self.inside_comment && chunk != "--") { self.move_to_with(State::Normal, Token::Chunk(chunk))
398 } else {
399 Err(self.error(format!("Unexpected token '{}' before '{}'", chunk, c)))
400 }
401 }
402
403 fn normal(&mut self, c: char) -> Result {
405 match c {
406 '<' => self.move_to(State::TagStarted),
407 '>' => Ok(Some(Token::TagEnd)),
408 '/' => self.move_to(State::EmptyTagClosing),
409 '=' => Ok(Some(Token::EqualsSign)),
410 '"' => Ok(Some(Token::DoubleQuote)),
411 '\'' => Ok(Some(Token::SingleQuote)),
412 '?' => self.move_to(State::ProcessingInstructionClosing),
413 '-' => self.move_to(State::CommentClosing(ClosingSubstate::First)),
414 ']' => self.move_to(State::CDataClosing(ClosingSubstate::First)),
415 '&' => Ok(Some(Token::ReferenceStart)),
416 ';' => Ok(Some(Token::ReferenceEnd)),
417 _ if is_whitespace_char(c) => Ok(Some(Token::Whitespace(c))),
418 _ => Ok(Some(Token::Character(c)))
419 }
420 }
421
422 fn tag_opened(&mut self, c: char) -> Result {
424 match c {
425 '?' => self.move_to_with(State::Normal, Token::ProcessingInstructionStart),
426 '/' => self.move_to_with(State::Normal, Token::ClosingTagStart),
427 '!' => self.move_to(State::CommentOrCDataOrDoctypeStarted),
428 _ if is_whitespace_char(c) => self.move_to_with_unread(State::Normal, &[c], Token::OpeningTagStart),
429 _ if is_name_char(c) => self.move_to_with_unread(State::Normal, &[c], Token::OpeningTagStart),
430 _ => self.handle_error("<", c)
431 }
432 }
433
434 fn comment_or_cdata_or_doctype_started(&mut self, c: char) -> Result {
436 match c {
437 '-' => self.move_to(State::CommentStarted),
438 '[' => self.move_to(State::CDataStarted(CDataStartedSubstate::E)),
439 'D' => self.move_to(State::DoctypeStarted(DoctypeStartedSubstate::D)),
440 _ => self.handle_error("<!", c)
441 }
442 }
443
444 fn comment_started(&mut self, c: char) -> Result {
446 match c {
447 '-' => self.move_to_with(State::Normal, Token::CommentStart),
448 _ => self.handle_error("<!-", c)
449 }
450 }
451
452 fn cdata_started(&mut self, c: char, s: CDataStartedSubstate) -> Result {
454 use self::CDataStartedSubstate::{E, C, CD, CDA, CDAT, CDATA};
455 dispatch_on_enum_state!(self, s, c, State::CDataStarted,
456 E ; 'C' ; C ; "<![",
457 C ; 'D' ; CD ; "<![C",
458 CD ; 'A' ; CDA ; "<![CD",
459 CDA ; 'T' ; CDAT ; "<![CDA",
460 CDAT ; 'A' ; CDATA ; "<![CDAT";
461 CDATA ; '[' ; "<![CDATA" ; self.move_to_with(State::Normal, Token::CDataStart)
462 )
463 }
464
465 fn doctype_started(&mut self, c: char, s: DoctypeStartedSubstate) -> Result {
467 use self::DoctypeStartedSubstate::{D, DO, DOC, DOCT, DOCTY, DOCTYP};
468 dispatch_on_enum_state!(self, s, c, State::DoctypeStarted,
469 D ; 'O' ; DO ; "<!D",
470 DO ; 'C' ; DOC ; "<!DO",
471 DOC ; 'T' ; DOCT ; "<!DOC",
472 DOCT ; 'Y' ; DOCTY ; "<!DOCT",
473 DOCTY ; 'P' ; DOCTYP ; "<!DOCTY";
474 DOCTYP ; 'E' ; "<!DOCTYP" ; self.move_to_with(State::Normal, Token::DoctypeStart)
475 )
476 }
477
478 fn processing_instruction_closing(&mut self, c: char) -> Result {
480 match c {
481 '>' => self.move_to_with(State::Normal, Token::ProcessingInstructionEnd),
482 _ => self.move_to_with_unread(State::Normal, &[c], Token::Character('?')),
483 }
484 }
485
486 fn empty_element_closing(&mut self, c: char) -> Result {
488 match c {
489 '>' => self.move_to_with(State::Normal, Token::EmptyTagEnd),
490 _ => self.move_to_with_unread(State::Normal, &[c], Token::Character('/')),
491 }
492 }
493
494 fn comment_closing(&mut self, c: char, s: ClosingSubstate) -> Result {
496 match s {
497 ClosingSubstate::First => match c {
498 '-' => self.move_to(State::CommentClosing(ClosingSubstate::Second)),
499 _ => self.move_to_with_unread(State::Normal, &[c], Token::Character('-'))
500 },
501 ClosingSubstate::Second => match c {
502 '>' => self.move_to_with(State::Normal, Token::CommentEnd),
503 _ if self.inside_comment => self.handle_error("--", c),
505 _ => self.move_to_with_unread(State::Normal, &[c], Token::Chunk("--"))
510 }
511 }
512 }
513
514 fn cdata_closing(&mut self, c: char, s: ClosingSubstate) -> Result {
516 match s {
517 ClosingSubstate::First => match c {
518 ']' => self.move_to(State::CDataClosing(ClosingSubstate::Second)),
519 _ => self.move_to_with_unread(State::Normal, &[c], Token::Character(']'))
520 },
521 ClosingSubstate::Second => match c {
522 '>' => self.move_to_with(State::Normal, Token::CDataEnd),
523 _ => self.move_to_with_unread(State::Normal, &[']', c], Token::Character(']'))
524 }
525 }
526 }
527}
528
529#[cfg(test)]
530mod tests {
531 use common::{Position};
532 use std::io::{BufReader, Cursor};
533
534 use super::{Lexer, Token};
535
536 macro_rules! assert_oks(
537 (for $lex:ident and $buf:ident ; $($e:expr)+) => ({
538 $(
539 assert_eq!(Ok(Some($e)), $lex.next_token(&mut $buf));
540 )+
541 })
542 );
543
544 macro_rules! assert_err(
545 (for $lex:ident and $buf:ident expect row $r:expr ; $c:expr, $s:expr) => ({
546 let err = $lex.next_token(&mut $buf);
547 assert!(err.is_err());
548 let err = err.unwrap_err();
549 assert_eq!($r as u64, err.position().row);
550 assert_eq!($c as u64, err.position().column);
551 assert_eq!($s, err.msg());
552 })
553 );
554
555 macro_rules! assert_none(
556 (for $lex:ident and $buf:ident) => (
557 assert_eq!(Ok(None), $lex.next_token(&mut $buf));
558 )
559 );
560
561 fn make_lex_and_buf(s: &str) -> (Lexer, BufReader<Cursor<Vec<u8>>>) {
562 (Lexer::new(), BufReader::new(Cursor::new(s.to_owned().into_bytes())))
563 }
564
565 #[test]
566 fn simple_lexer_test() {
567 let (mut lex, mut buf) = make_lex_and_buf(
568 r#"<a p='q'> x<b z="y">d </b></a><p/> <?nm ?> <!-- a c --> "#
569 );
570
571 assert_oks!(for lex and buf ;
572 Token::OpeningTagStart
573 Token::Character('a')
574 Token::Whitespace(' ')
575 Token::Character('p')
576 Token::EqualsSign
577 Token::SingleQuote
578 Token::Character('q')
579 Token::SingleQuote
580 Token::TagEnd
581 Token::Whitespace(' ')
582 Token::Character('x')
583 Token::OpeningTagStart
584 Token::Character('b')
585 Token::Whitespace(' ')
586 Token::Character('z')
587 Token::EqualsSign
588 Token::DoubleQuote
589 Token::Character('y')
590 Token::DoubleQuote
591 Token::TagEnd
592 Token::Character('d')
593 Token::Whitespace('\t')
594 Token::ClosingTagStart
595 Token::Character('b')
596 Token::TagEnd
597 Token::ClosingTagStart
598 Token::Character('a')
599 Token::TagEnd
600 Token::OpeningTagStart
601 Token::Character('p')
602 Token::EmptyTagEnd
603 Token::Whitespace(' ')
604 Token::ProcessingInstructionStart
605 Token::Character('n')
606 Token::Character('m')
607 Token::Whitespace(' ')
608 Token::ProcessingInstructionEnd
609 Token::Whitespace(' ')
610 Token::CommentStart
611 Token::Whitespace(' ')
612 Token::Character('a')
613 Token::Whitespace(' ')
614 Token::Character('c')
615 Token::Whitespace(' ')
616 Token::CommentEnd
617 Token::Whitespace(' ')
618 Token::ReferenceStart
619 Token::Character('n')
620 Token::Character('b')
621 Token::Character('s')
622 Token::Character('p')
623 Token::ReferenceEnd
624 );
625 assert_none!(for lex and buf);
626 }
627
628 #[test]
629 fn special_chars_test() {
630 let (mut lex, mut buf) = make_lex_and_buf(
631 r#"?x!+ // -| ]z]]"#
632 );
633
634 assert_oks!(for lex and buf ;
635 Token::Character('?')
636 Token::Character('x')
637 Token::Character('!')
638 Token::Character('+')
639 Token::Whitespace(' ')
640 Token::Character('/')
641 Token::Character('/')
642 Token::Whitespace(' ')
643 Token::Character('-')
644 Token::Character('|')
645 Token::Whitespace(' ')
646 Token::Character(']')
647 Token::Character('z')
648 Token::Chunk("]]")
649 );
650 assert_none!(for lex and buf);
651 }
652
653 #[test]
654 fn cdata_test() {
655 let (mut lex, mut buf) = make_lex_and_buf(
656 r#"<a><![CDATA[x y ?]]> </a>"#
657 );
658
659 assert_oks!(for lex and buf ;
660 Token::OpeningTagStart
661 Token::Character('a')
662 Token::TagEnd
663 Token::CDataStart
664 Token::Character('x')
665 Token::Whitespace(' ')
666 Token::Character('y')
667 Token::Whitespace(' ')
668 Token::Character('?')
669 Token::CDataEnd
670 Token::Whitespace(' ')
671 Token::ClosingTagStart
672 Token::Character('a')
673 Token::TagEnd
674 );
675 assert_none!(for lex and buf);
676 }
677
678 #[test]
679 fn doctype_test() {
680 let (mut lex, mut buf) = make_lex_and_buf(
681 r#"<a><!DOCTYPE ab xx z> "#
682 );
683 assert_oks!(for lex and buf ;
684 Token::OpeningTagStart
685 Token::Character('a')
686 Token::TagEnd
687 Token::DoctypeStart
688 Token::Whitespace(' ')
689 Token::Character('a')
690 Token::Character('b')
691 Token::Whitespace(' ')
692 Token::Character('x')
693 Token::Character('x')
694 Token::Whitespace(' ')
695 Token::Character('z')
696 Token::TagEnd
697 Token::Whitespace(' ')
698 );
699 assert_none!(for lex and buf)
700 }
701
702 #[test]
703 fn end_of_stream_handling_ok() {
704 macro_rules! eof_check(
705 ($data:expr ; $token:expr) => ({
706 let (mut lex, mut buf) = make_lex_and_buf($data);
707 assert_oks!(for lex and buf ; $token);
708 assert_none!(for lex and buf);
709 })
710 );
711 eof_check!("?" ; Token::Character('?'));
712 eof_check!("/" ; Token::Character('/'));
713 eof_check!("-" ; Token::Character('-'));
714 eof_check!("]" ; Token::Character(']'));
715 eof_check!("]]" ; Token::Chunk("]]"));
716 }
717
718 #[test]
719 fn end_of_stream_handling_error() {
720 macro_rules! eof_check(
721 ($data:expr; $r:expr, $c:expr) => ({
722 let (mut lex, mut buf) = make_lex_and_buf($data);
723 assert_err!(for lex and buf expect row $r ; $c, "Unexpected end of stream");
724 assert_none!(for lex and buf);
725 })
726 );
727 eof_check!("<" ; 0, 1);
728 eof_check!("<!" ; 0, 2);
729 eof_check!("<!-" ; 0, 3);
730 eof_check!("<![" ; 0, 3);
731 eof_check!("<![C" ; 0, 4);
732 eof_check!("<![CD" ; 0, 5);
733 eof_check!("<![CDA" ; 0, 6);
734 eof_check!("<![CDAT" ; 0, 7);
735 eof_check!("<![CDATA" ; 0, 8);
736 eof_check!("--" ; 0, 2);
737 }
738
739 #[test]
740 fn error_in_comment_or_cdata_prefix() {
741 let (mut lex, mut buf) = make_lex_and_buf("<!x");
742 assert_err!(for lex and buf expect row 0 ; 0,
743 "Unexpected token '<!' before 'x'"
744 );
745
746 let (mut lex, mut buf) = make_lex_and_buf("<!x");
747 lex.disable_errors();
748 assert_oks!(for lex and buf ;
749 Token::Chunk("<!")
750 Token::Character('x')
751 );
752 assert_none!(for lex and buf);
753 }
754
755 #[test]
756 fn error_in_comment_started() {
757 let (mut lex, mut buf) = make_lex_and_buf("<!-\t");
758 assert_err!(for lex and buf expect row 0 ; 0,
759 "Unexpected token '<!-' before '\t'"
760 );
761
762 let (mut lex, mut buf) = make_lex_and_buf("<!-\t");
763 lex.disable_errors();
764 assert_oks!(for lex and buf ;
765 Token::Chunk("<!-")
766 Token::Whitespace('\t')
767 );
768 assert_none!(for lex and buf);
769 }
770
771 #[test]
772 fn error_in_comment_two_dashes_not_at_end() {
773 let (mut lex, mut buf) = make_lex_and_buf("--x");
774 lex.inside_comment();
775 assert_err!(for lex and buf expect row 0; 0,
776 "Unexpected token '--' before 'x'"
777 );
778
779 let (mut lex, mut buf) = make_lex_and_buf("--x");
780 assert_oks!(for lex and buf ;
781 Token::Chunk("--")
782 Token::Character('x')
783 );
784 }
785
786 macro_rules! check_case(
787 ($chunk:expr, $app:expr; $data:expr; $r:expr, $c:expr, $s:expr) => ({
788 let (mut lex, mut buf) = make_lex_and_buf($data);
789 assert_err!(for lex and buf expect row $r ; $c, $s);
790
791 let (mut lex, mut buf) = make_lex_and_buf($data);
792 lex.disable_errors();
793 assert_oks!(for lex and buf ;
794 Token::Chunk($chunk)
795 Token::Character($app)
796 );
797 assert_none!(for lex and buf);
798 })
799 );
800
801 #[test]
802 fn error_in_cdata_started() {
803 check_case!("<![", '['; "<![[" ; 0, 0, "Unexpected token '<![' before '['");
804 check_case!("<![C", '['; "<![C[" ; 0, 0, "Unexpected token '<![C' before '['");
805 check_case!("<![CD", '['; "<![CD[" ; 0, 0, "Unexpected token '<![CD' before '['");
806 check_case!("<![CDA", '['; "<![CDA[" ; 0, 0, "Unexpected token '<![CDA' before '['");
807 check_case!("<![CDAT", '['; "<![CDAT[" ; 0, 0, "Unexpected token '<![CDAT' before '['");
808 check_case!("<![CDATA", '|'; "<![CDATA|" ; 0, 0, "Unexpected token '<![CDATA' before '|'");
809 }
810
811 #[test]
812 fn error_in_doctype_started() {
813 check_case!("<!D", 'a'; "<!Da" ; 0, 0, "Unexpected token '<!D' before 'a'");
814 check_case!("<!DO", 'b'; "<!DOb" ; 0, 0, "Unexpected token '<!DO' before 'b'");
815 check_case!("<!DOC", 'c'; "<!DOCc" ; 0, 0, "Unexpected token '<!DOC' before 'c'");
816 check_case!("<!DOCT", 'd'; "<!DOCTd" ; 0, 0, "Unexpected token '<!DOCT' before 'd'");
817 check_case!("<!DOCTY", 'e'; "<!DOCTYe" ; 0, 0, "Unexpected token '<!DOCTY' before 'e'");
818 check_case!("<!DOCTYP", 'f'; "<!DOCTYPf" ; 0, 0, "Unexpected token '<!DOCTYP' before 'f'");
819 }
820
821
822
823 #[test]
824 fn issue_98_cdata_ending_with_right_bracket() {
825 let (mut lex, mut buf) = make_lex_and_buf(
826 r#"<![CDATA[Foo [Bar]]]>"#
827 );
828
829 assert_oks!(for lex and buf ;
830 Token::CDataStart
831 Token::Character('F')
832 Token::Character('o')
833 Token::Character('o')
834 Token::Whitespace(' ')
835 Token::Character('[')
836 Token::Character('B')
837 Token::Character('a')
838 Token::Character('r')
839 Token::Character(']')
840 Token::CDataEnd
841 );
842 assert_none!(for lex and buf);
843 }
844}