xml/reader/parser/
outside_tag.rs

1use common::is_whitespace_char;
2
3use reader::events::XmlEvent;
4use reader::lexer::Token;
5
6use super::{
7    Result, PullParser, State, ClosingTagSubstate, OpeningTagSubstate,
8    ProcessingInstructionSubstate, DEFAULT_VERSION, DEFAULT_ENCODING, DEFAULT_STANDALONE
9};
10
11impl PullParser {
12    pub fn outside_tag(&mut self, t: Token) -> Option<Result> {
13        match t {
14            Token::ReferenceStart =>
15                self.into_state_continue(State::InsideReference(Box::new(State::OutsideTag))),
16
17            Token::Whitespace(_) if self.depth() == 0 => None,  // skip whitespace outside of the root element
18
19            _ if t.contains_char_data() && self.depth() == 0 =>
20                Some(self_error!(self; "Unexpected characters outside the root element: {}", t)),
21
22            Token::Whitespace(_) if self.config.trim_whitespace && !self.buf_has_data() => None,
23
24            Token::Whitespace(c) => {
25                if !self.buf_has_data() {
26                    self.push_pos();
27                }
28                self.append_char_continue(c)
29            }
30
31            _ if t.contains_char_data() => {  // Non-whitespace char data
32                if !self.buf_has_data() {
33                    self.push_pos();
34                }
35                self.inside_whitespace = false;
36                t.push_to_string(&mut self.buf);
37                None
38            }
39
40            Token::ReferenceEnd => { // Semi-colon in a text outside an entity
41                self.inside_whitespace = false;
42                Token::ReferenceEnd.push_to_string(&mut self.buf);
43                None
44            }
45
46            Token::CommentStart if self.config.coalesce_characters && self.config.ignore_comments => {
47                // We need to switch the lexer into a comment mode inside comments
48                self.lexer.inside_comment();
49                self.into_state_continue(State::InsideComment)
50            }
51
52            Token::CDataStart if self.config.coalesce_characters && self.config.cdata_to_characters => {
53                if !self.buf_has_data() {
54                    self.push_pos();
55                }
56                // We need to disable lexing errors inside CDATA
57                self.lexer.disable_errors();
58                self.into_state_continue(State::InsideCData)
59            }
60
61            _ => {
62                // Encountered some markup event, flush the buffer as characters
63                // or a whitespace
64                let mut next_event = if self.buf_has_data() {
65                    let buf = self.take_buf();
66                    if self.inside_whitespace && self.config.trim_whitespace {
67                        None
68                    } else if self.inside_whitespace && !self.config.whitespace_to_characters {
69                        Some(Ok(XmlEvent::Whitespace(buf)))
70                    } else if self.config.trim_whitespace {
71                        Some(Ok(XmlEvent::Characters(buf.trim_matches(is_whitespace_char).into())))
72                    } else {
73                        Some(Ok(XmlEvent::Characters(buf)))
74                    }
75                } else { None };
76                self.inside_whitespace = true;  // Reset inside_whitespace flag
77                self.push_pos();
78                match t {
79                    Token::ProcessingInstructionStart =>
80                        self.into_state(State::InsideProcessingInstruction(ProcessingInstructionSubstate::PIInsideName), next_event),
81
82                    Token::DoctypeStart if !self.encountered_element => {
83                        // We don't have a doctype event so skip this position
84                        // FIXME: update when we have a doctype event
85                        self.next_pos();
86                        self.lexer.disable_errors();
87                        self.into_state(State::InsideDoctype, next_event)
88                    }
89
90                    Token::OpeningTagStart => {
91                        // If declaration was not parsed and we have encountered an element,
92                        // emit this declaration as the next event.
93                        if !self.parsed_declaration {
94                            self.parsed_declaration = true;
95                            let sd_event = XmlEvent::StartDocument {
96                                version: DEFAULT_VERSION,
97                                encoding: DEFAULT_ENCODING.into(),
98                                standalone: DEFAULT_STANDALONE
99                            };
100                            // next_event is always none here because we're outside of
101                            // the root element
102                            next_event = Some(Ok(sd_event));
103                            self.push_pos();
104                        }
105                        self.encountered_element = true;
106                        self.nst.push_empty();
107                        self.into_state(State::InsideOpeningTag(OpeningTagSubstate::InsideName), next_event)
108                    }
109
110                    Token::ClosingTagStart if self.depth() > 0 =>
111                        self.into_state(State::InsideClosingTag(ClosingTagSubstate::CTInsideName), next_event),
112
113                    Token::CommentStart => {
114                        // We need to switch the lexer into a comment mode inside comments
115                        self.lexer.inside_comment();
116                        self.into_state(State::InsideComment, next_event)
117                    }
118
119                    Token::CDataStart => {
120                        // We need to disable lexing errors inside CDATA
121                        self.lexer.disable_errors();
122                        self.into_state(State::InsideCData, next_event)
123                    }
124
125                    _ => Some(self_error!(self; "Unexpected token: {}", t))
126                }
127            }
128        }
129    }
130}