xml/reader/config.rs
1//! Contains parser configuration structure.
2use std::io::Read;
3use std::collections::HashMap;
4
5use reader::EventReader;
6
7/// Parser configuration structure.
8///
9/// This structure contains various configuration options which affect
10/// behavior of the parser.
11#[derive(Clone, PartialEq, Eq, Debug)]
12pub struct ParserConfig {
13 /// Whether or not should whitespace in textual events be removed. Default is false.
14 ///
15 /// When true, all standalone whitespace will be removed (this means no
16 /// `Whitespace` events will ve emitted), and leading and trailing whitespace
17 /// from `Character` events will be deleted. If after trimming `Characters`
18 /// event will be empty, it will also be omitted from output stream. This is
19 /// possible, however, only if `whitespace_to_characters` or
20 /// `cdata_to_characters` options are set.
21 ///
22 /// This option does not affect CDATA events, unless `cdata_to_characters`
23 /// option is also set. In that case CDATA content will also be trimmed.
24 pub trim_whitespace: bool,
25
26 /// Whether or not should whitespace be converted to characters.
27 /// Default is false.
28 ///
29 /// If true, instead of `Whitespace` events `Characters` events with the
30 /// same content will be emitted. If `trim_whitespace` is also true, these
31 /// events will be trimmed to nothing and, consequently, not emitted.
32 pub whitespace_to_characters: bool,
33
34 /// Whether or not should CDATA be converted to characters.
35 /// Default is false.
36 ///
37 /// If true, instead of `CData` events `Characters` events with the same
38 /// content will be emitted. If `trim_whitespace` is also true, these events
39 /// will be trimmed. If corresponding CDATA contained nothing but whitespace,
40 /// this event will be omitted from the stream.
41 pub cdata_to_characters: bool,
42
43 /// Whether or not should comments be omitted. Default is true.
44 ///
45 /// If true, `Comment` events will not be emitted at all.
46 pub ignore_comments: bool,
47
48 /// Whether or not should sequential `Characters` events be merged.
49 /// Default is true.
50 ///
51 /// If true, multiple sequential `Characters` events will be merged into
52 /// a single event, that is, their data will be concatenated.
53 ///
54 /// Multiple sequential `Characters` events are only possible if either
55 /// `cdata_to_characters` or `ignore_comments` are set. Otherwise character
56 /// events will always be separated by other events.
57 pub coalesce_characters: bool,
58
59 /// A map of extra entities recognized by the parser. Default is an empty map.
60 ///
61 /// By default the XML parser recognizes the entities defined in the XML spec. Sometimes,
62 /// however, it is convenient to make the parser recognize additional entities which
63 /// are also not available through the DTD definitions (especially given that at the moment
64 /// DTD parsing is not supported).
65 pub extra_entities: HashMap<String, String>,
66
67 /// Whether or not the parser should ignore the end of stream. Default is false.
68 ///
69 /// By default the parser will either error out when it encounters a premature end of
70 /// stream or complete normally if the end of stream was expected. If you want to continue
71 /// reading from a stream whose input is supplied progressively, you can set this option to true.
72 /// In this case the parser will allow you to invoke the next() method even if a supposed end
73 /// of stream has happened.
74 ///
75 /// Note that support for this functionality is incomplete; for example, the parser will fail if
76 /// the premature end of stream happens inside PCDATA. Therefore, use this option at your own risk.
77 pub ignore_end_of_stream: bool
78}
79
80impl ParserConfig {
81 /// Returns a new config with default values.
82 ///
83 /// You can tweak default values using builder-like pattern:
84 ///
85 /// ```rust
86 /// use xml::reader::ParserConfig;
87 ///
88 /// let config = ParserConfig::new()
89 /// .trim_whitespace(true)
90 /// .ignore_comments(true)
91 /// .coalesce_characters(false);
92 /// ```
93 pub fn new() -> ParserConfig {
94 ParserConfig {
95 trim_whitespace: false,
96 whitespace_to_characters: false,
97 cdata_to_characters: false,
98 ignore_comments: true,
99 coalesce_characters: true,
100 extra_entities: HashMap::new(),
101 ignore_end_of_stream: false
102 }
103 }
104
105 /// Creates an XML reader with this configuration.
106 ///
107 /// This is a convenience method for configuring and creating a reader at the same time:
108 ///
109 /// ```rust
110 /// use xml::reader::ParserConfig;
111 ///
112 /// let mut source: &[u8] = b"...";
113 ///
114 /// let reader = ParserConfig::new()
115 /// .trim_whitespace(true)
116 /// .ignore_comments(true)
117 /// .coalesce_characters(false)
118 /// .create_reader(&mut source);
119 /// ```
120 ///
121 /// This method is exactly equivalent to calling `EventReader::new_with_config()` with
122 /// this configuration object.
123 #[inline]
124 pub fn create_reader<R: Read>(self, source: R) -> EventReader<R> {
125 EventReader::new_with_config(source, self)
126 }
127
128 /// Adds a new entity mapping and returns an updated config object.
129 ///
130 /// This is a convenience method for adding external entities mappings to the XML parser.
131 /// An example:
132 ///
133 /// ```rust
134 /// use xml::reader::ParserConfig;
135 ///
136 /// let mut source: &[u8] = b"...";
137 ///
138 /// let reader = ParserConfig::new()
139 /// .add_entity("nbsp", " ")
140 /// .add_entity("copy", "©")
141 /// .add_entity("reg", "®")
142 /// .create_reader(&mut source);
143 /// ```
144 pub fn add_entity<S: Into<String>, T: Into<String>>(mut self, entity: S, value: T) -> ParserConfig {
145 self.extra_entities.insert(entity.into(), value.into());
146 self
147 }
148}
149
150impl Default for ParserConfig {
151 #[inline]
152 fn default() -> ParserConfig {
153 ParserConfig::new()
154 }
155}
156
157gen_setters! { ParserConfig,
158 trim_whitespace: val bool,
159 whitespace_to_characters: val bool,
160 cdata_to_characters: val bool,
161 ignore_comments: val bool,
162 coalesce_characters: val bool,
163 ignore_end_of_stream: val bool
164}