pest/iterators/pair.rs
1// pest. The Elegant Parser
2// Copyright (c) 2018 DragoČ™ Tiselice
3//
4// Licensed under the Apache License, Version 2.0
5// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. All files in the project carrying such notice may not be copied,
8// modified, or distributed except according to those terms.
9
10use alloc::format;
11use alloc::rc::Rc;
12#[cfg(feature = "pretty-print")]
13use alloc::string::String;
14use alloc::vec::Vec;
15use core::borrow::Borrow;
16use core::fmt;
17use core::hash::{Hash, Hasher};
18use core::ptr;
19use core::str;
20
21#[cfg(feature = "pretty-print")]
22use serde::ser::SerializeStruct;
23
24use super::line_index::LineIndex;
25use super::pairs::{self, Pairs};
26use super::queueable_token::QueueableToken;
27use super::tokens::{self, Tokens};
28use crate::span::Span;
29use crate::RuleType;
30
31/// A matching pair of [`Token`]s and everything between them.
32///
33/// A matching `Token` pair is formed by a `Token::Start` and a subsequent `Token::End` with the
34/// same `Rule`, with the condition that all `Token`s between them can form such pairs as well.
35/// This is similar to the [brace matching problem](https://en.wikipedia.org/wiki/Brace_matching) in
36/// editors.
37///
38/// [`Token`]: ../enum.Token.html
39#[derive(Clone)]
40pub struct Pair<'i, R> {
41 queue: Rc<Vec<QueueableToken<'i, R>>>,
42 input: &'i str,
43 /// Token index into `queue`.
44 start: usize,
45 line_index: Rc<LineIndex>,
46}
47
48pub fn new<'i, R: RuleType>(
49 queue: Rc<Vec<QueueableToken<'i, R>>>,
50 input: &'i str,
51 line_index: Rc<LineIndex>,
52 start: usize,
53) -> Pair<'i, R> {
54 Pair {
55 queue,
56 input,
57 start,
58 line_index,
59 }
60}
61
62impl<'i, R: RuleType> Pair<'i, R> {
63 /// Returns the `Rule` of the `Pair`.
64 ///
65 /// # Examples
66 ///
67 /// ```
68 /// # use std::rc::Rc;
69 /// # use pest;
70 /// # #[allow(non_camel_case_types)]
71 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
72 /// enum Rule {
73 /// a
74 /// }
75 ///
76 /// let input = "";
77 /// let pair = pest::state(input, |state| {
78 /// // generating Token pair with Rule::a ...
79 /// # state.rule(Rule::a, |s| Ok(s))
80 /// }).unwrap().next().unwrap();
81 ///
82 /// assert_eq!(pair.as_rule(), Rule::a);
83 /// ```
84 #[inline]
85 pub fn as_rule(&self) -> R {
86 match self.queue[self.pair()] {
87 QueueableToken::End { rule, .. } => rule,
88 _ => unreachable!(),
89 }
90 }
91
92 /// Captures a slice from the `&str` defined by the token `Pair`.
93 ///
94 /// # Examples
95 ///
96 /// ```
97 /// # use std::rc::Rc;
98 /// # use pest;
99 /// # #[allow(non_camel_case_types)]
100 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
101 /// enum Rule {
102 /// ab
103 /// }
104 ///
105 /// let input = "ab";
106 /// let pair = pest::state(input, |state| {
107 /// // generating Token pair with Rule::ab ...
108 /// # state.rule(Rule::ab, |s| s.match_string("ab"))
109 /// }).unwrap().next().unwrap();
110 ///
111 /// assert_eq!(pair.as_str(), "ab");
112 /// ```
113 #[inline]
114 pub fn as_str(&self) -> &'i str {
115 let start = self.pos(self.start);
116 let end = self.pos(self.pair());
117
118 // Generated positions always come from Positions and are UTF-8 borders.
119 &self.input[start..end]
120 }
121
122 /// Returns the input string of the `Pair`.
123 ///
124 /// This function returns the input string of the `Pair` as a `&str`. This is the source string
125 /// from which the `Pair` was created. The returned `&str` can be used to examine the contents of
126 /// the `Pair` or to perform further processing on the string.
127 ///
128 /// # Examples
129 ///
130 /// ```
131 /// # use std::rc::Rc;
132 /// # use pest;
133 /// # #[allow(non_camel_case_types)]
134 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
135 /// enum Rule {
136 /// ab
137 /// }
138 ///
139 /// // Example: Get input string from a Pair
140 ///
141 /// let input = "ab";
142 /// let pair = pest::state(input, |state| {
143 /// // generating Token pair with Rule::ab ...
144 /// # state.rule(Rule::ab, |s| s.match_string("ab"))
145 /// }).unwrap().next().unwrap();
146 ///
147 /// assert_eq!(pair.as_str(), "ab");
148 /// assert_eq!(input, pair.get_input());
149 /// ```
150 pub fn get_input(&self) -> &'i str {
151 self.input
152 }
153
154 /// Returns the `Span` defined by the `Pair`, consuming it.
155 ///
156 /// # Examples
157 ///
158 /// ```
159 /// # use std::rc::Rc;
160 /// # use pest;
161 /// # #[allow(non_camel_case_types)]
162 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
163 /// enum Rule {
164 /// ab
165 /// }
166 ///
167 /// let input = "ab";
168 /// let pair = pest::state(input, |state| {
169 /// // generating Token pair with Rule::ab ...
170 /// # state.rule(Rule::ab, |s| s.match_string("ab"))
171 /// }).unwrap().next().unwrap();
172 ///
173 /// assert_eq!(pair.into_span().as_str(), "ab");
174 /// ```
175 #[inline]
176 #[deprecated(since = "2.0.0", note = "Please use `as_span` instead")]
177 pub fn into_span(self) -> Span<'i> {
178 self.as_span()
179 }
180
181 /// Returns the `Span` defined by the `Pair`, **without** consuming it.
182 ///
183 /// # Examples
184 ///
185 /// ```
186 /// # use std::rc::Rc;
187 /// # use pest;
188 /// # #[allow(non_camel_case_types)]
189 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
190 /// enum Rule {
191 /// ab
192 /// }
193 ///
194 /// let input = "ab";
195 /// let pair = pest::state(input, |state| {
196 /// // generating Token pair with Rule::ab ...
197 /// # state.rule(Rule::ab, |s| s.match_string("ab"))
198 /// }).unwrap().next().unwrap();
199 ///
200 /// assert_eq!(pair.as_span().as_str(), "ab");
201 /// ```
202 #[inline]
203 pub fn as_span(&self) -> Span<'i> {
204 let start = self.pos(self.start);
205 let end = self.pos(self.pair());
206
207 Span::new_internal(self.input, start, end)
208 }
209
210 /// Get current node tag
211 #[inline]
212 pub fn as_node_tag(&self) -> Option<&str> {
213 match &self.queue[self.pair()] {
214 QueueableToken::End { tag, .. } => tag.as_ref().map(|x| x.borrow()),
215 _ => None,
216 }
217 }
218
219 /// Returns the inner `Pairs` between the `Pair`, consuming it.
220 ///
221 /// # Examples
222 ///
223 /// ```
224 /// # use std::rc::Rc;
225 /// # use pest;
226 /// # #[allow(non_camel_case_types)]
227 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
228 /// enum Rule {
229 /// a
230 /// }
231 ///
232 /// let input = "";
233 /// let pair = pest::state(input, |state| {
234 /// // generating Token pair with Rule::a ...
235 /// # state.rule(Rule::a, |s| Ok(s))
236 /// }).unwrap().next().unwrap();
237 ///
238 /// assert!(pair.into_inner().next().is_none());
239 /// ```
240 #[inline]
241 pub fn into_inner(self) -> Pairs<'i, R> {
242 let pair = self.pair();
243
244 pairs::new(
245 self.queue,
246 self.input,
247 Some(self.line_index),
248 self.start + 1,
249 pair,
250 )
251 }
252
253 /// Returns the `Tokens` for the `Pair`.
254 ///
255 /// # Examples
256 ///
257 /// ```
258 /// # use std::rc::Rc;
259 /// # use pest;
260 /// # #[allow(non_camel_case_types)]
261 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
262 /// enum Rule {
263 /// a
264 /// }
265 ///
266 /// let input = "";
267 /// let pair = pest::state(input, |state| {
268 /// // generating Token pair with Rule::a ...
269 /// # state.rule(Rule::a, |s| Ok(s))
270 /// }).unwrap().next().unwrap();
271 /// let tokens: Vec<_> = pair.tokens().collect();
272 ///
273 /// assert_eq!(tokens.len(), 2);
274 /// ```
275 #[inline]
276 pub fn tokens(self) -> Tokens<'i, R> {
277 let end = self.pair();
278
279 tokens::new(self.queue, self.input, self.start, end + 1)
280 }
281
282 /// Generates a string that stores the lexical information of `self` in
283 /// a pretty-printed JSON format.
284 #[cfg(feature = "pretty-print")]
285 pub fn to_json(&self) -> String {
286 ::serde_json::to_string_pretty(self).expect("Failed to pretty-print Pair to json.")
287 }
288
289 /// Returns the `line`, `col` of this pair start.
290 pub fn line_col(&self) -> (usize, usize) {
291 let pos = self.pos(self.start);
292 self.line_index.line_col(self.input, pos)
293 }
294
295 fn pair(&self) -> usize {
296 match self.queue[self.start] {
297 QueueableToken::Start {
298 end_token_index, ..
299 } => end_token_index,
300 _ => unreachable!(),
301 }
302 }
303
304 fn pos(&self, index: usize) -> usize {
305 match self.queue[index] {
306 QueueableToken::Start { input_pos, .. } | QueueableToken::End { input_pos, .. } => {
307 input_pos
308 }
309 }
310 }
311}
312
313impl<'i, R: RuleType> Pairs<'i, R> {
314 /// Create a new `Pairs` iterator containing just the single `Pair`.
315 pub fn single(pair: Pair<'i, R>) -> Self {
316 let end = pair.pair();
317 pairs::new(
318 pair.queue,
319 pair.input,
320 Some(pair.line_index),
321 pair.start,
322 end,
323 )
324 }
325}
326
327impl<R: RuleType> fmt::Debug for Pair<'_, R> {
328 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
329 let pair = &mut f.debug_struct("Pair");
330 pair.field("rule", &self.as_rule());
331 // In order not to break compatibility
332 if let Some(s) = self.as_node_tag() {
333 pair.field("node_tag", &s);
334 }
335 pair.field("span", &self.as_span())
336 .field("inner", &self.clone().into_inner().collect::<Vec<_>>())
337 .finish()
338 }
339}
340
341impl<R: RuleType> fmt::Display for Pair<'_, R> {
342 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
343 if f.alternate() {
344 let rule = self.as_rule();
345 let start = self.pos(self.start);
346 let end = self.pos(self.pair());
347 let mut pairs = self.clone().into_inner().peekable();
348
349 if pairs.peek().is_none() {
350 write!(f, "{:?}({}, {})", rule, start, end)
351 } else {
352 write!(
353 f,
354 "{:?}({}, {}, [{}])",
355 rule,
356 start,
357 end,
358 pairs
359 .map(|pair| format!("{:#}", pair))
360 .collect::<Vec<_>>()
361 .join(", ")
362 )
363 }
364 } else {
365 write!(f, "{}", self.as_str())
366 }
367 }
368}
369
370impl<'i, R: PartialEq> PartialEq for Pair<'i, R> {
371 fn eq(&self, other: &Pair<'i, R>) -> bool {
372 Rc::ptr_eq(&self.queue, &other.queue)
373 && ptr::eq(self.input, other.input)
374 && self.start == other.start
375 }
376}
377
378impl<R: Eq> Eq for Pair<'_, R> {}
379
380impl<'i, R: Hash> Hash for Pair<'i, R> {
381 fn hash<H: Hasher>(&self, state: &mut H) {
382 (&*self.queue as *const Vec<QueueableToken<'i, R>>).hash(state);
383 (self.input as *const str).hash(state);
384 self.start.hash(state);
385 }
386}
387
388#[cfg(feature = "pretty-print")]
389impl<R: RuleType> ::serde::Serialize for Pair<'_, R> {
390 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
391 where
392 S: ::serde::Serializer,
393 {
394 let start = self.pos(self.start);
395 let end = self.pos(self.pair());
396 let rule = format!("{:?}", self.as_rule());
397 let inner = self.clone().into_inner();
398
399 let mut ser = serializer.serialize_struct("Pairs", 3)?;
400 ser.serialize_field("pos", &(start, end))?;
401 ser.serialize_field("rule", &rule)?;
402
403 if inner.peek().is_none() {
404 ser.serialize_field("inner", &self.as_str())?;
405 } else {
406 ser.serialize_field("inner", &inner)?;
407 }
408
409 ser.end()
410 }
411}
412
413#[cfg(test)]
414mod tests {
415 use crate::alloc::{borrow::ToOwned, format, string::ToString};
416 use crate::macros::tests::*;
417 use crate::parser::Parser;
418
419 #[test]
420 #[cfg(feature = "pretty-print")]
421 fn test_pretty_print() {
422 let pair = AbcParser::parse(Rule::a, "abcde").unwrap().next().unwrap();
423
424 let expected = r#"{
425 "pos": [
426 0,
427 3
428 ],
429 "rule": "a",
430 "inner": {
431 "pos": [
432 1,
433 2
434 ],
435 "pairs": [
436 {
437 "pos": [
438 1,
439 2
440 ],
441 "rule": "b",
442 "inner": "b"
443 }
444 ]
445 }
446}"#;
447
448 assert_eq!(expected, pair.to_json());
449 }
450
451 #[test]
452 fn pair_into_inner() {
453 let pair = AbcParser::parse(Rule::a, "abcde").unwrap().next().unwrap(); // the tokens a(b())
454
455 let pairs = pair.into_inner(); // the tokens b()
456
457 assert_eq!(2, pairs.tokens().count());
458 }
459
460 #[test]
461 fn get_input_of_pair() {
462 let input = "abcde";
463 let pair = AbcParser::parse(Rule::a, input).unwrap().next().unwrap();
464
465 assert_eq!(input, pair.get_input());
466 }
467 #[test]
468 fn pair_to_string_matches_as_str() {
469 let pair = AbcParser::parse(Rule::a, "abcde").unwrap().next().unwrap();
470
471 assert_eq!(pair.to_string(), pair.as_str().to_string());
472 }
473 #[test]
474 fn alternate_format() {
475 let pair = AbcParser::parse(Rule::a, "abcde").unwrap().next().unwrap();
476 assert_eq!(format!("{}", pair), "abc".to_owned());
477 assert_eq!(format!("{:#}", pair), "a(0, 3, [b(1, 2)])".to_owned());
478 }
479}