pest/iterators/pair.rs
1// pest. The Elegant Parser
2// Copyright (c) 2018 Dragoș Tiselice
3//
4// Licensed under the Apache License, Version 2.0
5// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. All files in the project carrying such notice may not be copied,
8// modified, or distributed except according to those terms.
9
10use alloc::format;
11use alloc::rc::Rc;
12#[cfg(feature = "pretty-print")]
13use alloc::string::String;
14use alloc::vec::Vec;
15use core::borrow::Borrow;
16use core::fmt;
17use core::hash::{Hash, Hasher};
18use core::ptr;
19use core::str;
20
21#[cfg(feature = "pretty-print")]
22use serde::ser::SerializeStruct;
23
24use super::line_index::LineIndex;
25use super::pairs::{self, Pairs};
26use super::queueable_token::QueueableToken;
27use super::tokens::{self, Tokens};
28use crate::span::{self, Span};
29use crate::RuleType;
30
31/// A matching pair of [`Token`]s and everything between them.
32///
33/// A matching `Token` pair is formed by a `Token::Start` and a subsequent `Token::End` with the
34/// same `Rule`, with the condition that all `Token`s between them can form such pairs as well.
35/// This is similar to the [brace matching problem](https://en.wikipedia.org/wiki/Brace_matching) in
36/// editors.
37///
38/// [`Token`]: ../enum.Token.html
39#[derive(Clone)]
40pub struct Pair<'i, R> {
41 /// # Safety
42 ///
43 /// All `QueueableToken`s' `input_pos` must be valid character boundary indices into `input`.
44 queue: Rc<Vec<QueueableToken<'i, R>>>,
45 input: &'i str,
46 /// Token index into `queue`.
47 start: usize,
48 line_index: Rc<LineIndex>,
49}
50
51/// # Safety
52///
53/// All `QueueableToken`s' `input_pos` must be valid character boundary indices into `input`.
54pub unsafe fn new<'i, R: RuleType>(
55 queue: Rc<Vec<QueueableToken<'i, R>>>,
56 input: &'i str,
57 line_index: Rc<LineIndex>,
58 start: usize,
59) -> Pair<'i, R> {
60 Pair {
61 queue,
62 input,
63 start,
64 line_index,
65 }
66}
67
68impl<'i, R: RuleType> Pair<'i, R> {
69 /// Returns the `Rule` of the `Pair`.
70 ///
71 /// # Examples
72 ///
73 /// ```
74 /// # use std::rc::Rc;
75 /// # use pest;
76 /// # #[allow(non_camel_case_types)]
77 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
78 /// enum Rule {
79 /// a
80 /// }
81 ///
82 /// let input = "";
83 /// let pair = pest::state(input, |state| {
84 /// // generating Token pair with Rule::a ...
85 /// # state.rule(Rule::a, |s| Ok(s))
86 /// }).unwrap().next().unwrap();
87 ///
88 /// assert_eq!(pair.as_rule(), Rule::a);
89 /// ```
90 #[inline]
91 pub fn as_rule(&self) -> R {
92 match self.queue[self.pair()] {
93 QueueableToken::End { rule, .. } => rule,
94 _ => unreachable!(),
95 }
96 }
97
98 /// Captures a slice from the `&str` defined by the token `Pair`.
99 ///
100 /// # Examples
101 ///
102 /// ```
103 /// # use std::rc::Rc;
104 /// # use pest;
105 /// # #[allow(non_camel_case_types)]
106 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
107 /// enum Rule {
108 /// ab
109 /// }
110 ///
111 /// let input = "ab";
112 /// let pair = pest::state(input, |state| {
113 /// // generating Token pair with Rule::ab ...
114 /// # state.rule(Rule::ab, |s| s.match_string("ab"))
115 /// }).unwrap().next().unwrap();
116 ///
117 /// assert_eq!(pair.as_str(), "ab");
118 /// ```
119 #[inline]
120 pub fn as_str(&self) -> &'i str {
121 let start = self.pos(self.start);
122 let end = self.pos(self.pair());
123
124 // Generated positions always come from Positions and are UTF-8 borders.
125 &self.input[start..end]
126 }
127
128 /// Returns the input string of the `Pair`.
129 ///
130 /// This function returns the input string of the `Pair` as a `&str`. This is the source string
131 /// from which the `Pair` was created. The returned `&str` can be used to examine the contents of
132 /// the `Pair` or to perform further processing on the string.
133 ///
134 /// # Examples
135 ///
136 /// ```
137 /// # use std::rc::Rc;
138 /// # use pest;
139 /// # #[allow(non_camel_case_types)]
140 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
141 /// enum Rule {
142 /// ab
143 /// }
144 ///
145 /// // Example: Get input string from a Pair
146 ///
147 /// let input = "ab";
148 /// let pair = pest::state(input, |state| {
149 /// // generating Token pair with Rule::ab ...
150 /// # state.rule(Rule::ab, |s| s.match_string("ab"))
151 /// }).unwrap().next().unwrap();
152 ///
153 /// assert_eq!(pair.as_str(), "ab");
154 /// assert_eq!(input, pair.get_input());
155 /// ```
156 pub fn get_input(&self) -> &'i str {
157 self.input
158 }
159
160 /// Returns the `Span` defined by the `Pair`, consuming it.
161 ///
162 /// # Examples
163 ///
164 /// ```
165 /// # use std::rc::Rc;
166 /// # use pest;
167 /// # #[allow(non_camel_case_types)]
168 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
169 /// enum Rule {
170 /// ab
171 /// }
172 ///
173 /// let input = "ab";
174 /// let pair = pest::state(input, |state| {
175 /// // generating Token pair with Rule::ab ...
176 /// # state.rule(Rule::ab, |s| s.match_string("ab"))
177 /// }).unwrap().next().unwrap();
178 ///
179 /// assert_eq!(pair.into_span().as_str(), "ab");
180 /// ```
181 #[inline]
182 #[deprecated(since = "2.0.0", note = "Please use `as_span` instead")]
183 pub fn into_span(self) -> Span<'i> {
184 self.as_span()
185 }
186
187 /// Returns the `Span` defined by the `Pair`, **without** consuming it.
188 ///
189 /// # Examples
190 ///
191 /// ```
192 /// # use std::rc::Rc;
193 /// # use pest;
194 /// # #[allow(non_camel_case_types)]
195 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
196 /// enum Rule {
197 /// ab
198 /// }
199 ///
200 /// let input = "ab";
201 /// let pair = pest::state(input, |state| {
202 /// // generating Token pair with Rule::ab ...
203 /// # state.rule(Rule::ab, |s| s.match_string("ab"))
204 /// }).unwrap().next().unwrap();
205 ///
206 /// assert_eq!(pair.as_span().as_str(), "ab");
207 /// ```
208 #[inline]
209 pub fn as_span(&self) -> Span<'i> {
210 let start = self.pos(self.start);
211 let end = self.pos(self.pair());
212
213 // Generated positions always come from Positions and are UTF-8 borders.
214 unsafe { span::Span::new_unchecked(self.input, start, end) }
215 }
216
217 /// Get current node tag
218 #[inline]
219 pub fn as_node_tag(&self) -> Option<&str> {
220 match &self.queue[self.pair()] {
221 QueueableToken::End { tag, .. } => tag.as_ref().map(|x| x.borrow()),
222 _ => None,
223 }
224 }
225
226 /// Returns the inner `Pairs` between the `Pair`, consuming it.
227 ///
228 /// # Examples
229 ///
230 /// ```
231 /// # use std::rc::Rc;
232 /// # use pest;
233 /// # #[allow(non_camel_case_types)]
234 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
235 /// enum Rule {
236 /// a
237 /// }
238 ///
239 /// let input = "";
240 /// let pair = pest::state(input, |state| {
241 /// // generating Token pair with Rule::a ...
242 /// # state.rule(Rule::a, |s| Ok(s))
243 /// }).unwrap().next().unwrap();
244 ///
245 /// assert!(pair.into_inner().next().is_none());
246 /// ```
247 #[inline]
248 pub fn into_inner(self) -> Pairs<'i, R> {
249 let pair = self.pair();
250
251 pairs::new(
252 self.queue,
253 self.input,
254 Some(self.line_index),
255 self.start + 1,
256 pair,
257 )
258 }
259
260 /// Returns the `Tokens` for the `Pair`.
261 ///
262 /// # Examples
263 ///
264 /// ```
265 /// # use std::rc::Rc;
266 /// # use pest;
267 /// # #[allow(non_camel_case_types)]
268 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
269 /// enum Rule {
270 /// a
271 /// }
272 ///
273 /// let input = "";
274 /// let pair = pest::state(input, |state| {
275 /// // generating Token pair with Rule::a ...
276 /// # state.rule(Rule::a, |s| Ok(s))
277 /// }).unwrap().next().unwrap();
278 /// let tokens: Vec<_> = pair.tokens().collect();
279 ///
280 /// assert_eq!(tokens.len(), 2);
281 /// ```
282 #[inline]
283 pub fn tokens(self) -> Tokens<'i, R> {
284 let end = self.pair();
285
286 tokens::new(self.queue, self.input, self.start, end + 1)
287 }
288
289 /// Generates a string that stores the lexical information of `self` in
290 /// a pretty-printed JSON format.
291 #[cfg(feature = "pretty-print")]
292 pub fn to_json(&self) -> String {
293 ::serde_json::to_string_pretty(self).expect("Failed to pretty-print Pair to json.")
294 }
295
296 /// Returns the `line`, `col` of this pair start.
297 pub fn line_col(&self) -> (usize, usize) {
298 let pos = self.pos(self.start);
299 self.line_index.line_col(self.input, pos)
300 }
301
302 fn pair(&self) -> usize {
303 match self.queue[self.start] {
304 QueueableToken::Start {
305 end_token_index, ..
306 } => end_token_index,
307 _ => unreachable!(),
308 }
309 }
310
311 fn pos(&self, index: usize) -> usize {
312 match self.queue[index] {
313 QueueableToken::Start { input_pos, .. } | QueueableToken::End { input_pos, .. } => {
314 input_pos
315 }
316 }
317 }
318}
319
320impl<'i, R: RuleType> Pairs<'i, R> {
321 /// Create a new `Pairs` iterator containing just the single `Pair`.
322 pub fn single(pair: Pair<'i, R>) -> Self {
323 let end = pair.pair();
324 pairs::new(
325 pair.queue,
326 pair.input,
327 Some(pair.line_index),
328 pair.start,
329 end,
330 )
331 }
332}
333
334impl<'i, R: RuleType> fmt::Debug for Pair<'i, R> {
335 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
336 let pair = &mut f.debug_struct("Pair");
337 pair.field("rule", &self.as_rule());
338 // In order not to break compatibility
339 if let Some(s) = self.as_node_tag() {
340 pair.field("node_tag", &s);
341 }
342 pair.field("span", &self.as_span())
343 .field("inner", &self.clone().into_inner().collect::<Vec<_>>())
344 .finish()
345 }
346}
347
348impl<'i, R: RuleType> fmt::Display for Pair<'i, R> {
349 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
350 let rule = self.as_rule();
351 let start = self.pos(self.start);
352 let end = self.pos(self.pair());
353 let mut pairs = self.clone().into_inner().peekable();
354
355 if pairs.peek().is_none() {
356 write!(f, "{:?}({}, {})", rule, start, end)
357 } else {
358 write!(
359 f,
360 "{:?}({}, {}, [{}])",
361 rule,
362 start,
363 end,
364 pairs
365 .map(|pair| format!("{}", pair))
366 .collect::<Vec<_>>()
367 .join(", ")
368 )
369 }
370 }
371}
372
373impl<'i, R: PartialEq> PartialEq for Pair<'i, R> {
374 fn eq(&self, other: &Pair<'i, R>) -> bool {
375 Rc::ptr_eq(&self.queue, &other.queue)
376 && ptr::eq(self.input, other.input)
377 && self.start == other.start
378 }
379}
380
381impl<'i, R: Eq> Eq for Pair<'i, R> {}
382
383impl<'i, R: Hash> Hash for Pair<'i, R> {
384 fn hash<H: Hasher>(&self, state: &mut H) {
385 (&*self.queue as *const Vec<QueueableToken<'i, R>>).hash(state);
386 (self.input as *const str).hash(state);
387 self.start.hash(state);
388 }
389}
390
391#[cfg(feature = "pretty-print")]
392impl<'i, R: RuleType> ::serde::Serialize for Pair<'i, R> {
393 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
394 where
395 S: ::serde::Serializer,
396 {
397 let start = self.pos(self.start);
398 let end = self.pos(self.pair());
399 let rule = format!("{:?}", self.as_rule());
400 let inner = self.clone().into_inner();
401
402 let mut ser = serializer.serialize_struct("Pairs", 3)?;
403 ser.serialize_field("pos", &(start, end))?;
404 ser.serialize_field("rule", &rule)?;
405
406 if inner.peek().is_none() {
407 ser.serialize_field("inner", &self.as_str())?;
408 } else {
409 ser.serialize_field("inner", &inner)?;
410 }
411
412 ser.end()
413 }
414}
415
416#[cfg(test)]
417mod tests {
418 use crate::macros::tests::*;
419 use crate::parser::Parser;
420
421 #[test]
422 #[cfg(feature = "pretty-print")]
423 fn test_pretty_print() {
424 let pair = AbcParser::parse(Rule::a, "abcde").unwrap().next().unwrap();
425
426 let expected = r#"{
427 "pos": [
428 0,
429 3
430 ],
431 "rule": "a",
432 "inner": {
433 "pos": [
434 1,
435 2
436 ],
437 "pairs": [
438 {
439 "pos": [
440 1,
441 2
442 ],
443 "rule": "b",
444 "inner": "b"
445 }
446 ]
447 }
448}"#;
449
450 assert_eq!(expected, pair.to_json());
451 }
452
453 #[test]
454 fn pair_into_inner() {
455 let pair = AbcParser::parse(Rule::a, "abcde").unwrap().next().unwrap(); // the tokens a(b())
456
457 let pairs = pair.into_inner(); // the tokens b()
458
459 assert_eq!(2, pairs.tokens().count());
460 }
461
462 #[test]
463 fn get_input_of_pair() {
464 let input = "abcde";
465 let pair = AbcParser::parse(Rule::a, input).unwrap().next().unwrap();
466
467 assert_eq!(input, pair.get_input());
468 }
469}