pest/iterators/
pair.rs

1// pest. The Elegant Parser
2// Copyright (c) 2018 Dragoș Tiselice
3//
4// Licensed under the Apache License, Version 2.0
5// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. All files in the project carrying such notice may not be copied,
8// modified, or distributed except according to those terms.
9
10use alloc::format;
11use alloc::rc::Rc;
12#[cfg(feature = "pretty-print")]
13use alloc::string::String;
14use alloc::vec::Vec;
15use core::borrow::Borrow;
16use core::fmt;
17use core::hash::{Hash, Hasher};
18use core::ptr;
19use core::str;
20
21#[cfg(feature = "pretty-print")]
22use serde::ser::SerializeStruct;
23
24use super::line_index::LineIndex;
25use super::pairs::{self, Pairs};
26use super::queueable_token::QueueableToken;
27use super::tokens::{self, Tokens};
28use crate::span::{self, Span};
29use crate::RuleType;
30
31/// A matching pair of [`Token`]s and everything between them.
32///
33/// A matching `Token` pair is formed by a `Token::Start` and a subsequent `Token::End` with the
34/// same `Rule`, with the condition that all `Token`s between them can form such pairs as well.
35/// This is similar to the [brace matching problem](https://en.wikipedia.org/wiki/Brace_matching) in
36/// editors.
37///
38/// [`Token`]: ../enum.Token.html
39#[derive(Clone)]
40pub struct Pair<'i, R> {
41    /// # Safety
42    ///
43    /// All `QueueableToken`s' `input_pos` must be valid character boundary indices into `input`.
44    queue: Rc<Vec<QueueableToken<'i, R>>>,
45    input: &'i str,
46    /// Token index into `queue`.
47    start: usize,
48    line_index: Rc<LineIndex>,
49}
50
51/// # Safety
52///
53/// All `QueueableToken`s' `input_pos` must be valid character boundary indices into `input`.
54pub unsafe fn new<'i, R: RuleType>(
55    queue: Rc<Vec<QueueableToken<'i, R>>>,
56    input: &'i str,
57    line_index: Rc<LineIndex>,
58    start: usize,
59) -> Pair<'i, R> {
60    Pair {
61        queue,
62        input,
63        start,
64        line_index,
65    }
66}
67
68impl<'i, R: RuleType> Pair<'i, R> {
69    /// Returns the `Rule` of the `Pair`.
70    ///
71    /// # Examples
72    ///
73    /// ```
74    /// # use std::rc::Rc;
75    /// # use pest;
76    /// # #[allow(non_camel_case_types)]
77    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
78    /// enum Rule {
79    ///     a
80    /// }
81    ///
82    /// let input = "";
83    /// let pair = pest::state(input, |state| {
84    ///     // generating Token pair with Rule::a ...
85    /// #     state.rule(Rule::a, |s| Ok(s))
86    /// }).unwrap().next().unwrap();
87    ///
88    /// assert_eq!(pair.as_rule(), Rule::a);
89    /// ```
90    #[inline]
91    pub fn as_rule(&self) -> R {
92        match self.queue[self.pair()] {
93            QueueableToken::End { rule, .. } => rule,
94            _ => unreachable!(),
95        }
96    }
97
98    /// Captures a slice from the `&str` defined by the token `Pair`.
99    ///
100    /// # Examples
101    ///
102    /// ```
103    /// # use std::rc::Rc;
104    /// # use pest;
105    /// # #[allow(non_camel_case_types)]
106    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
107    /// enum Rule {
108    ///     ab
109    /// }
110    ///
111    /// let input = "ab";
112    /// let pair = pest::state(input, |state| {
113    ///     // generating Token pair with Rule::ab ...
114    /// #     state.rule(Rule::ab, |s| s.match_string("ab"))
115    /// }).unwrap().next().unwrap();
116    ///
117    /// assert_eq!(pair.as_str(), "ab");
118    /// ```
119    #[inline]
120    pub fn as_str(&self) -> &'i str {
121        let start = self.pos(self.start);
122        let end = self.pos(self.pair());
123
124        // Generated positions always come from Positions and are UTF-8 borders.
125        &self.input[start..end]
126    }
127
128    /// Returns the input string of the `Pair`.
129    ///
130    /// This function returns the input string of the `Pair` as a `&str`. This is the source string
131    /// from which the `Pair` was created. The returned `&str` can be used to examine the contents of
132    /// the `Pair` or to perform further processing on the string.
133    ///
134    /// # Examples
135    ///
136    /// ```
137    /// # use std::rc::Rc;
138    /// # use pest;
139    /// # #[allow(non_camel_case_types)]
140    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
141    /// enum Rule {
142    ///     ab
143    /// }
144    ///
145    /// // Example: Get input string from a Pair
146    ///
147    /// let input = "ab";
148    /// let pair = pest::state(input, |state| {
149    ///     // generating Token pair with Rule::ab ...
150    /// #     state.rule(Rule::ab, |s| s.match_string("ab"))
151    /// }).unwrap().next().unwrap();
152    ///
153    /// assert_eq!(pair.as_str(), "ab");
154    /// assert_eq!(input, pair.get_input());
155    /// ```
156    pub fn get_input(&self) -> &'i str {
157        self.input
158    }
159
160    /// Returns the `Span` defined by the `Pair`, consuming it.
161    ///
162    /// # Examples
163    ///
164    /// ```
165    /// # use std::rc::Rc;
166    /// # use pest;
167    /// # #[allow(non_camel_case_types)]
168    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
169    /// enum Rule {
170    ///     ab
171    /// }
172    ///
173    /// let input = "ab";
174    /// let pair = pest::state(input, |state| {
175    ///     // generating Token pair with Rule::ab ...
176    /// #     state.rule(Rule::ab, |s| s.match_string("ab"))
177    /// }).unwrap().next().unwrap();
178    ///
179    /// assert_eq!(pair.into_span().as_str(), "ab");
180    /// ```
181    #[inline]
182    #[deprecated(since = "2.0.0", note = "Please use `as_span` instead")]
183    pub fn into_span(self) -> Span<'i> {
184        self.as_span()
185    }
186
187    /// Returns the `Span` defined by the `Pair`, **without** consuming it.
188    ///
189    /// # Examples
190    ///
191    /// ```
192    /// # use std::rc::Rc;
193    /// # use pest;
194    /// # #[allow(non_camel_case_types)]
195    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
196    /// enum Rule {
197    ///     ab
198    /// }
199    ///
200    /// let input = "ab";
201    /// let pair = pest::state(input, |state| {
202    ///     // generating Token pair with Rule::ab ...
203    /// #     state.rule(Rule::ab, |s| s.match_string("ab"))
204    /// }).unwrap().next().unwrap();
205    ///
206    /// assert_eq!(pair.as_span().as_str(), "ab");
207    /// ```
208    #[inline]
209    pub fn as_span(&self) -> Span<'i> {
210        let start = self.pos(self.start);
211        let end = self.pos(self.pair());
212
213        // Generated positions always come from Positions and are UTF-8 borders.
214        unsafe { span::Span::new_unchecked(self.input, start, end) }
215    }
216
217    /// Get current node tag
218    #[inline]
219    pub fn as_node_tag(&self) -> Option<&str> {
220        match &self.queue[self.pair()] {
221            QueueableToken::End { tag, .. } => tag.as_ref().map(|x| x.borrow()),
222            _ => None,
223        }
224    }
225
226    /// Returns the inner `Pairs` between the `Pair`, consuming it.
227    ///
228    /// # Examples
229    ///
230    /// ```
231    /// # use std::rc::Rc;
232    /// # use pest;
233    /// # #[allow(non_camel_case_types)]
234    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
235    /// enum Rule {
236    ///     a
237    /// }
238    ///
239    /// let input = "";
240    /// let pair = pest::state(input, |state| {
241    ///     // generating Token pair with Rule::a ...
242    /// #     state.rule(Rule::a, |s| Ok(s))
243    /// }).unwrap().next().unwrap();
244    ///
245    /// assert!(pair.into_inner().next().is_none());
246    /// ```
247    #[inline]
248    pub fn into_inner(self) -> Pairs<'i, R> {
249        let pair = self.pair();
250
251        pairs::new(
252            self.queue,
253            self.input,
254            Some(self.line_index),
255            self.start + 1,
256            pair,
257        )
258    }
259
260    /// Returns the `Tokens` for the `Pair`.
261    ///
262    /// # Examples
263    ///
264    /// ```
265    /// # use std::rc::Rc;
266    /// # use pest;
267    /// # #[allow(non_camel_case_types)]
268    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
269    /// enum Rule {
270    ///     a
271    /// }
272    ///
273    /// let input = "";
274    /// let pair = pest::state(input, |state| {
275    ///     // generating Token pair with Rule::a ...
276    /// #     state.rule(Rule::a, |s| Ok(s))
277    /// }).unwrap().next().unwrap();
278    /// let tokens: Vec<_> = pair.tokens().collect();
279    ///
280    /// assert_eq!(tokens.len(), 2);
281    /// ```
282    #[inline]
283    pub fn tokens(self) -> Tokens<'i, R> {
284        let end = self.pair();
285
286        tokens::new(self.queue, self.input, self.start, end + 1)
287    }
288
289    /// Generates a string that stores the lexical information of `self` in
290    /// a pretty-printed JSON format.
291    #[cfg(feature = "pretty-print")]
292    pub fn to_json(&self) -> String {
293        ::serde_json::to_string_pretty(self).expect("Failed to pretty-print Pair to json.")
294    }
295
296    /// Returns the `line`, `col` of this pair start.
297    pub fn line_col(&self) -> (usize, usize) {
298        let pos = self.pos(self.start);
299        self.line_index.line_col(self.input, pos)
300    }
301
302    fn pair(&self) -> usize {
303        match self.queue[self.start] {
304            QueueableToken::Start {
305                end_token_index, ..
306            } => end_token_index,
307            _ => unreachable!(),
308        }
309    }
310
311    fn pos(&self, index: usize) -> usize {
312        match self.queue[index] {
313            QueueableToken::Start { input_pos, .. } | QueueableToken::End { input_pos, .. } => {
314                input_pos
315            }
316        }
317    }
318}
319
320impl<'i, R: RuleType> Pairs<'i, R> {
321    /// Create a new `Pairs` iterator containing just the single `Pair`.
322    pub fn single(pair: Pair<'i, R>) -> Self {
323        let end = pair.pair();
324        pairs::new(
325            pair.queue,
326            pair.input,
327            Some(pair.line_index),
328            pair.start,
329            end,
330        )
331    }
332}
333
334impl<'i, R: RuleType> fmt::Debug for Pair<'i, R> {
335    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
336        let pair = &mut f.debug_struct("Pair");
337        pair.field("rule", &self.as_rule());
338        // In order not to break compatibility
339        if let Some(s) = self.as_node_tag() {
340            pair.field("node_tag", &s);
341        }
342        pair.field("span", &self.as_span())
343            .field("inner", &self.clone().into_inner().collect::<Vec<_>>())
344            .finish()
345    }
346}
347
348impl<'i, R: RuleType> fmt::Display for Pair<'i, R> {
349    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
350        let rule = self.as_rule();
351        let start = self.pos(self.start);
352        let end = self.pos(self.pair());
353        let mut pairs = self.clone().into_inner().peekable();
354
355        if pairs.peek().is_none() {
356            write!(f, "{:?}({}, {})", rule, start, end)
357        } else {
358            write!(
359                f,
360                "{:?}({}, {}, [{}])",
361                rule,
362                start,
363                end,
364                pairs
365                    .map(|pair| format!("{}", pair))
366                    .collect::<Vec<_>>()
367                    .join(", ")
368            )
369        }
370    }
371}
372
373impl<'i, R: PartialEq> PartialEq for Pair<'i, R> {
374    fn eq(&self, other: &Pair<'i, R>) -> bool {
375        Rc::ptr_eq(&self.queue, &other.queue)
376            && ptr::eq(self.input, other.input)
377            && self.start == other.start
378    }
379}
380
381impl<'i, R: Eq> Eq for Pair<'i, R> {}
382
383impl<'i, R: Hash> Hash for Pair<'i, R> {
384    fn hash<H: Hasher>(&self, state: &mut H) {
385        (&*self.queue as *const Vec<QueueableToken<'i, R>>).hash(state);
386        (self.input as *const str).hash(state);
387        self.start.hash(state);
388    }
389}
390
391#[cfg(feature = "pretty-print")]
392impl<'i, R: RuleType> ::serde::Serialize for Pair<'i, R> {
393    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
394    where
395        S: ::serde::Serializer,
396    {
397        let start = self.pos(self.start);
398        let end = self.pos(self.pair());
399        let rule = format!("{:?}", self.as_rule());
400        let inner = self.clone().into_inner();
401
402        let mut ser = serializer.serialize_struct("Pairs", 3)?;
403        ser.serialize_field("pos", &(start, end))?;
404        ser.serialize_field("rule", &rule)?;
405
406        if inner.peek().is_none() {
407            ser.serialize_field("inner", &self.as_str())?;
408        } else {
409            ser.serialize_field("inner", &inner)?;
410        }
411
412        ser.end()
413    }
414}
415
416#[cfg(test)]
417mod tests {
418    use crate::macros::tests::*;
419    use crate::parser::Parser;
420
421    #[test]
422    #[cfg(feature = "pretty-print")]
423    fn test_pretty_print() {
424        let pair = AbcParser::parse(Rule::a, "abcde").unwrap().next().unwrap();
425
426        let expected = r#"{
427  "pos": [
428    0,
429    3
430  ],
431  "rule": "a",
432  "inner": {
433    "pos": [
434      1,
435      2
436    ],
437    "pairs": [
438      {
439        "pos": [
440          1,
441          2
442        ],
443        "rule": "b",
444        "inner": "b"
445      }
446    ]
447  }
448}"#;
449
450        assert_eq!(expected, pair.to_json());
451    }
452
453    #[test]
454    fn pair_into_inner() {
455        let pair = AbcParser::parse(Rule::a, "abcde").unwrap().next().unwrap(); // the tokens a(b())
456
457        let pairs = pair.into_inner(); // the tokens b()
458
459        assert_eq!(2, pairs.tokens().count());
460    }
461
462    #[test]
463    fn get_input_of_pair() {
464        let input = "abcde";
465        let pair = AbcParser::parse(Rule::a, input).unwrap().next().unwrap();
466
467        assert_eq!(input, pair.get_input());
468    }
469}