xml/reader/parser/
outside_tag.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
use common::is_whitespace_char;

use reader::events::XmlEvent;
use reader::lexer::Token;

use super::{
    Result, PullParser, State, ClosingTagSubstate, OpeningTagSubstate,
    ProcessingInstructionSubstate, DEFAULT_VERSION, DEFAULT_ENCODING, DEFAULT_STANDALONE
};

impl PullParser {
    pub fn outside_tag(&mut self, t: Token) -> Option<Result> {
        match t {
            Token::ReferenceStart =>
                self.into_state_continue(State::InsideReference(Box::new(State::OutsideTag))),

            Token::Whitespace(_) if self.depth() == 0 => None,  // skip whitespace outside of the root element

            _ if t.contains_char_data() && self.depth() == 0 =>
                Some(self_error!(self; "Unexpected characters outside the root element: {}", t)),

            Token::Whitespace(_) if self.config.trim_whitespace && !self.buf_has_data() => None,

            Token::Whitespace(c) => {
                if !self.buf_has_data() {
                    self.push_pos();
                }
                self.append_char_continue(c)
            }

            _ if t.contains_char_data() => {  // Non-whitespace char data
                if !self.buf_has_data() {
                    self.push_pos();
                }
                self.inside_whitespace = false;
                t.push_to_string(&mut self.buf);
                None
            }

            Token::ReferenceEnd => { // Semi-colon in a text outside an entity
                self.inside_whitespace = false;
                Token::ReferenceEnd.push_to_string(&mut self.buf);
                None
            }

            Token::CommentStart if self.config.coalesce_characters && self.config.ignore_comments => {
                // We need to switch the lexer into a comment mode inside comments
                self.lexer.inside_comment();
                self.into_state_continue(State::InsideComment)
            }

            Token::CDataStart if self.config.coalesce_characters && self.config.cdata_to_characters => {
                if !self.buf_has_data() {
                    self.push_pos();
                }
                // We need to disable lexing errors inside CDATA
                self.lexer.disable_errors();
                self.into_state_continue(State::InsideCData)
            }

            _ => {
                // Encountered some markup event, flush the buffer as characters
                // or a whitespace
                let mut next_event = if self.buf_has_data() {
                    let buf = self.take_buf();
                    if self.inside_whitespace && self.config.trim_whitespace {
                        None
                    } else if self.inside_whitespace && !self.config.whitespace_to_characters {
                        Some(Ok(XmlEvent::Whitespace(buf)))
                    } else if self.config.trim_whitespace {
                        Some(Ok(XmlEvent::Characters(buf.trim_matches(is_whitespace_char).into())))
                    } else {
                        Some(Ok(XmlEvent::Characters(buf)))
                    }
                } else { None };
                self.inside_whitespace = true;  // Reset inside_whitespace flag
                self.push_pos();
                match t {
                    Token::ProcessingInstructionStart =>
                        self.into_state(State::InsideProcessingInstruction(ProcessingInstructionSubstate::PIInsideName), next_event),

                    Token::DoctypeStart if !self.encountered_element => {
                        // We don't have a doctype event so skip this position
                        // FIXME: update when we have a doctype event
                        self.next_pos();
                        self.lexer.disable_errors();
                        self.into_state(State::InsideDoctype, next_event)
                    }

                    Token::OpeningTagStart => {
                        // If declaration was not parsed and we have encountered an element,
                        // emit this declaration as the next event.
                        if !self.parsed_declaration {
                            self.parsed_declaration = true;
                            let sd_event = XmlEvent::StartDocument {
                                version: DEFAULT_VERSION,
                                encoding: DEFAULT_ENCODING.into(),
                                standalone: DEFAULT_STANDALONE
                            };
                            // next_event is always none here because we're outside of
                            // the root element
                            next_event = Some(Ok(sd_event));
                            self.push_pos();
                        }
                        self.encountered_element = true;
                        self.nst.push_empty();
                        self.into_state(State::InsideOpeningTag(OpeningTagSubstate::InsideName), next_event)
                    }

                    Token::ClosingTagStart if self.depth() > 0 =>
                        self.into_state(State::InsideClosingTag(ClosingTagSubstate::CTInsideName), next_event),

                    Token::CommentStart => {
                        // We need to switch the lexer into a comment mode inside comments
                        self.lexer.inside_comment();
                        self.into_state(State::InsideComment, next_event)
                    }

                    Token::CDataStart => {
                        // We need to disable lexing errors inside CDATA
                        self.lexer.disable_errors();
                        self.into_state(State::InsideCData, next_event)
                    }

                    _ => Some(self_error!(self; "Unexpected token: {}", t))
                }
            }
        }
    }
}