pest/
span.rs

1// pest. The Elegant Parser
2// Copyright (c) 2018 Dragoș Tiselice
3//
4// Licensed under the Apache License, Version 2.0
5// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. All files in the project carrying such notice may not be copied,
8// modified, or distributed except according to those terms.
9
10use core::fmt;
11use core::hash::{Hash, Hasher};
12use core::ops::{Bound, RangeBounds};
13use core::ptr;
14use core::str;
15
16use crate::position;
17
18/// A span over a `&str`. It is created from either [two `Position`s] or from a [`Pair`].
19///
20/// [two `Position`s]: struct.Position.html#method.span
21/// [`Pair`]: ../iterators/struct.Pair.html#method.span
22#[derive(Clone, Copy)]
23pub struct Span<'i> {
24    input: &'i str,
25    /// # Safety
26    ///
27    /// Must be a valid character boundary index into `input`.
28    start: usize,
29    /// # Safety
30    ///
31    /// Must be a valid character boundary index into `input`.
32    end: usize,
33}
34
35impl<'i> Span<'i> {
36    /// Create a new `Span` without checking invariants. (Checked with `debug_assertions`.)
37    ///
38    /// # Safety
39    ///
40    /// `input[start..end]` must be a valid subslice; that is, said indexing should not panic.
41    pub(crate) unsafe fn new_unchecked(input: &str, start: usize, end: usize) -> Span<'_> {
42        debug_assert!(input.get(start..end).is_some());
43        Span { input, start, end }
44    }
45
46    /// Attempts to create a new span. Will return `None` if `input[start..end]` is an invalid index
47    /// into `input`.
48    ///
49    /// # Examples
50    ///
51    /// ```
52    /// # use pest::Span;
53    /// let input = "Hello!";
54    /// assert_eq!(None, Span::new(input, 100, 0));
55    /// assert!(Span::new(input, 0, input.len()).is_some());
56    /// ```
57    pub fn new(input: &str, start: usize, end: usize) -> Option<Span<'_>> {
58        if input.get(start..end).is_some() {
59            Some(Span { input, start, end })
60        } else {
61            None
62        }
63    }
64
65    /// Attempts to create a new span based on a sub-range.
66    ///
67    /// ```
68    /// use pest::Span;
69    /// let input = "Hello World!";
70    /// let world = Span::new(input, 6, input.len()).unwrap();
71    /// let orl = world.get(1..=3);
72    /// assert!(orl.is_some());
73    /// assert_eq!(orl.unwrap().as_str(), "orl");
74    /// ```
75    ///
76    /// # Examples
77    pub fn get(&self, range: impl RangeBounds<usize>) -> Option<Span<'i>> {
78        let start = match range.start_bound() {
79            Bound::Included(offset) => *offset,
80            Bound::Excluded(offset) => *offset + 1,
81            Bound::Unbounded => 0,
82        };
83        let end = match range.end_bound() {
84            Bound::Included(offset) => *offset + 1,
85            Bound::Excluded(offset) => *offset,
86            Bound::Unbounded => self.as_str().len(),
87        };
88
89        self.as_str().get(start..end).map(|_| Span {
90            input: self.input,
91            start: self.start + start,
92            end: self.start + end,
93        })
94    }
95
96    /// Returns the `Span`'s start byte position as a `usize`.
97    ///
98    /// # Examples
99    ///
100    /// ```
101    /// # use pest::Position;
102    /// let input = "ab";
103    /// let start = Position::from_start(input);
104    /// let end = start.clone();
105    /// let span = start.span(&end);
106    ///
107    /// assert_eq!(span.start(), 0);
108    /// ```
109    #[inline]
110    pub fn start(&self) -> usize {
111        self.start
112    }
113
114    /// Returns the `Span`'s end byte position as a `usize`.
115    ///
116    /// # Examples
117    ///
118    /// ```
119    /// # use pest::Position;
120    /// let input = "ab";
121    /// let start = Position::from_start(input);
122    /// let end = start.clone();
123    /// let span = start.span(&end);
124    ///
125    /// assert_eq!(span.end(), 0);
126    /// ```
127    #[inline]
128    pub fn end(&self) -> usize {
129        self.end
130    }
131
132    /// Returns the `Span`'s start `Position`.
133    ///
134    /// # Examples
135    ///
136    /// ```
137    /// # use pest::Position;
138    /// let input = "ab";
139    /// let start = Position::from_start(input);
140    /// let end = start.clone();
141    /// let span = start.clone().span(&end);
142    ///
143    /// assert_eq!(span.start_pos(), start);
144    /// ```
145    #[inline]
146    pub fn start_pos(&self) -> position::Position<'i> {
147        // Span's start position is always a UTF-8 border.
148        unsafe { position::Position::new_unchecked(self.input, self.start) }
149    }
150
151    /// Returns the `Span`'s end `Position`.
152    ///
153    /// # Examples
154    ///
155    /// ```
156    /// # use pest::Position;
157    /// let input = "ab";
158    /// let start = Position::from_start(input);
159    /// let end = start.clone();
160    /// let span = start.span(&end);
161    ///
162    /// assert_eq!(span.end_pos(), end);
163    /// ```
164    #[inline]
165    pub fn end_pos(&self) -> position::Position<'i> {
166        // Span's end position is always a UTF-8 border.
167        unsafe { position::Position::new_unchecked(self.input, self.end) }
168    }
169
170    /// Splits the `Span` into a pair of `Position`s.
171    ///
172    /// # Examples
173    ///
174    /// ```
175    /// # use pest::Position;
176    /// let input = "ab";
177    /// let start = Position::from_start(input);
178    /// let end = start.clone();
179    /// let span = start.clone().span(&end);
180    ///
181    /// assert_eq!(span.split(), (start, end));
182    /// ```
183    #[inline]
184    pub fn split(self) -> (position::Position<'i>, position::Position<'i>) {
185        // Span's start and end positions are always a UTF-8 borders.
186        let pos1 = unsafe { position::Position::new_unchecked(self.input, self.start) };
187        let pos2 = unsafe { position::Position::new_unchecked(self.input, self.end) };
188
189        (pos1, pos2)
190    }
191
192    /// Captures a slice from the `&str` defined by the `Span`.
193    ///
194    /// # Examples
195    ///
196    /// ```
197    /// # use pest;
198    /// # #[allow(non_camel_case_types)]
199    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
200    /// enum Rule {}
201    ///
202    /// let input = "abc";
203    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input).skip(1).unwrap();
204    /// let start_pos = state.position().clone();
205    /// state = state.match_string("b").unwrap();
206    /// let span = start_pos.span(&state.position().clone());
207    /// assert_eq!(span.as_str(), "b");
208    /// ```
209    #[inline]
210    pub fn as_str(&self) -> &'i str {
211        // Span's start and end positions are always a UTF-8 borders.
212        &self.input[self.start..self.end]
213    }
214
215    /// Returns the input string of the `Span`.
216    ///
217    /// This function returns the input string of the `Span` as a `&str`. This is the source string
218    /// from which the `Span` was created. The returned `&str` can be used to examine the contents of
219    /// the `Span` or to perform further processing on the string.
220    ///
221    /// # Examples
222    ///
223    /// ```
224    /// # use pest;
225    /// # use pest::Span;
226    ///
227    /// // Example: Get input string from a span
228    /// let input = "abc\ndef\nghi";
229    /// let span = Span::new(input, 1, 7).unwrap();
230    /// assert_eq!(span.get_input(), input);
231    /// ```
232    pub fn get_input(&self) -> &'i str {
233        self.input
234    }
235
236    /// Iterates over all lines (partially) covered by this span. Yielding a `&str` for each line.
237    ///
238    /// # Examples
239    ///
240    /// ```
241    /// # use pest;
242    /// # #[allow(non_camel_case_types)]
243    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
244    /// enum Rule {}
245    ///
246    /// let input = "a\nb\nc";
247    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input).skip(2).unwrap();
248    /// let start_pos = state.position().clone();
249    /// state = state.match_string("b\nc").unwrap();
250    /// let span = start_pos.span(&state.position().clone());
251    /// assert_eq!(span.lines().collect::<Vec<_>>(), vec!["b\n", "c"]);
252    /// ```
253    #[inline]
254    pub fn lines(&self) -> Lines<'_> {
255        Lines {
256            inner: self.lines_span(),
257        }
258    }
259
260    /// Iterates over all lines (partially) covered by this span. Yielding a `Span` for each line.
261    ///
262    /// # Examples
263    ///
264    /// ```
265    /// # use pest;
266    /// # use pest::Span;
267    /// # #[allow(non_camel_case_types)]
268    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
269    /// enum Rule {}
270    ///
271    /// let input = "a\nb\nc";
272    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input).skip(2).unwrap();
273    /// let start_pos = state.position().clone();
274    /// state = state.match_string("b\nc").unwrap();
275    /// let span = start_pos.span(&state.position().clone());
276    /// assert_eq!(span.lines_span().collect::<Vec<_>>(), vec![Span::new(input, 2, 4).unwrap(), Span::new(input, 4, 5).unwrap()]);
277    /// ```
278    pub fn lines_span(&self) -> LinesSpan<'_> {
279        LinesSpan {
280            span: self,
281            pos: self.start,
282        }
283    }
284}
285
286impl<'i> fmt::Debug for Span<'i> {
287    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
288        f.debug_struct("Span")
289            .field("str", &self.as_str())
290            .field("start", &self.start)
291            .field("end", &self.end)
292            .finish()
293    }
294}
295
296impl<'i> PartialEq for Span<'i> {
297    fn eq(&self, other: &Span<'i>) -> bool {
298        ptr::eq(self.input, other.input) && self.start == other.start && self.end == other.end
299    }
300}
301
302impl<'i> Eq for Span<'i> {}
303
304impl<'i> Hash for Span<'i> {
305    fn hash<H: Hasher>(&self, state: &mut H) {
306        (self.input as *const str).hash(state);
307        self.start.hash(state);
308        self.end.hash(state);
309    }
310}
311
312/// Merges two spans into one.
313///
314/// This function merges two spans that are contiguous or overlapping into a single span
315/// that covers the entire range of the two input spans. This is useful when you want to
316/// aggregate information from multiple spans into a single entity.
317///
318/// The function checks if the input spans are overlapping or contiguous by comparing their
319/// start and end positions. If they are, a new span is created with the minimum start position
320/// and the maximum end position of the two input spans.
321///
322/// If the input spans are neither overlapping nor contiguous, the function returns None,
323/// indicating that a merge operation was not possible.
324///
325/// # Examples
326///
327/// ```
328/// # use pest;
329/// # use pest::Span;
330/// # use pest::merge_spans;
331///
332/// // Example 1: Contiguous spans
333/// let input = "abc\ndef\nghi";
334/// let span1 = Span::new(input, 1, 7).unwrap();
335/// let span2 = Span::new(input, 7, 11).unwrap();
336/// let merged = merge_spans(&span1, &span2).unwrap();
337/// assert_eq!(merged, Span::new(input, 1, 11).unwrap());
338///
339/// // Example 2: Overlapping spans
340/// let input = "abc\ndef\nghi";
341/// let span1 = Span::new(input, 1, 7).unwrap();
342/// let span2 = Span::new(input, 5, 11).unwrap();
343/// let merged = merge_spans(&span1, &span2).unwrap();
344/// assert_eq!(merged, Span::new(input, 1, 11).unwrap());
345///
346/// // Example 3: Non-contiguous spans
347/// let input = "abc\ndef\nghi";
348/// let span1 = Span::new(input, 1, 7).unwrap();
349/// let span2 = Span::new(input, 8, 11).unwrap();
350/// let merged = merge_spans(&span1, &span2);
351/// assert!(merged.is_none());
352/// ```
353pub fn merge_spans<'i>(a: &Span<'i>, b: &Span<'i>) -> Option<Span<'i>> {
354    if a.end() >= b.start() && a.start() <= b.end() {
355        // The spans overlap or are contiguous, so they can be merged.
356        Span::new(
357            a.get_input(),
358            core::cmp::min(a.start(), b.start()),
359            core::cmp::max(a.end(), b.end()),
360        )
361    } else {
362        // The spans don't overlap and aren't contiguous, so they can't be merged.
363        None
364    }
365}
366
367/// Line iterator for Spans, created by [`Span::lines_span()`].
368///
369/// Iterates all lines that are at least _partially_ covered by the span. Yielding a `Span` for each.
370///
371/// [`Span::lines_span()`]: struct.Span.html#method.lines_span
372pub struct LinesSpan<'i> {
373    span: &'i Span<'i>,
374    pos: usize,
375}
376
377impl<'i> Iterator for LinesSpan<'i> {
378    type Item = Span<'i>;
379    fn next(&mut self) -> Option<Self::Item> {
380        if self.pos > self.span.end {
381            return None;
382        }
383        let pos = position::Position::new(self.span.input, self.pos)?;
384        if pos.at_end() {
385            return None;
386        }
387
388        let line_start = pos.find_line_start();
389        self.pos = pos.find_line_end();
390
391        Span::new(self.span.input, line_start, self.pos)
392    }
393}
394
395/// Line iterator for Spans, created by [`Span::lines()`].
396///
397/// Iterates all lines that are at least _partially_ covered by the span. Yielding a `&str` for each.
398///
399/// [`Span::lines()`]: struct.Span.html#method.lines
400pub struct Lines<'i> {
401    inner: LinesSpan<'i>,
402}
403
404impl<'i> Iterator for Lines<'i> {
405    type Item = &'i str;
406    fn next(&mut self) -> Option<Self::Item> {
407        self.inner.next().map(|span| span.as_str())
408    }
409}
410
411#[cfg(test)]
412mod tests {
413    use super::*;
414    use alloc::borrow::ToOwned;
415    use alloc::vec::Vec;
416
417    #[test]
418    fn get() {
419        let input = "abc123abc";
420        let span = Span::new(input, 3, input.len()).unwrap();
421        assert_eq!(span.as_str(), "123abc");
422        assert_eq!(span.input, input);
423
424        let span1 = span.get(..=2);
425        assert!(span1.is_some());
426        assert_eq!(span1.unwrap().input, input);
427        assert_eq!(span1.unwrap().as_str(), "123");
428
429        let span2 = span.get(..);
430        assert!(span2.is_some());
431        assert_eq!(span2.unwrap().input, input);
432        assert_eq!(span2.unwrap().as_str(), "123abc");
433
434        let span3 = span.get(3..);
435        assert!(span3.is_some());
436        assert_eq!(span3.unwrap().input, input);
437        assert_eq!(span3.unwrap().as_str(), "abc");
438
439        let span4 = span.get(0..0);
440        assert!(span4.is_some());
441        assert_eq!(span4.unwrap().input, input);
442        assert_eq!(span4.unwrap().as_str(), "");
443    }
444
445    #[test]
446    fn get_fails() {
447        let input = "abc";
448        let span = Span::new(input, 0, input.len()).unwrap();
449
450        let span1 = span.get(0..100);
451        assert!(span1.is_none());
452
453        let span2 = span.get(100..200);
454        assert!(span2.is_none());
455    }
456
457    #[test]
458    fn span_comp() {
459        let input = "abc\ndef\nghi";
460        let span = Span::new(input, 1, 7).unwrap();
461        let span2 = Span::new(input, 50, 51);
462        assert!(span2.is_none());
463        let span3 = Span::new(input, 0, 8).unwrap();
464        assert!(span != span3);
465    }
466
467    #[test]
468    fn split() {
469        let input = "a";
470        let start = position::Position::from_start(input);
471        let mut end = start;
472
473        assert!(end.skip(1));
474
475        let span = start.clone().span(&end.clone());
476
477        assert_eq!(span.split(), (start, end));
478    }
479
480    #[test]
481    fn lines_mid() {
482        let input = "abc\ndef\nghi";
483        let span = Span::new(input, 1, 7).unwrap();
484        let lines: Vec<_> = span.lines().collect();
485        let lines_span: Vec<_> = span.lines_span().map(|span| span.as_str()).collect();
486
487        assert_eq!(lines.len(), 2);
488        assert_eq!(lines[0], "abc\n".to_owned());
489        assert_eq!(lines[1], "def\n".to_owned());
490        assert_eq!(lines, lines_span) // Verify parity with lines_span()
491    }
492
493    #[test]
494    fn lines_eof() {
495        let input = "abc\ndef\nghi";
496        let span = Span::new(input, 5, 11).unwrap();
497        assert!(span.end_pos().at_end());
498        assert_eq!(span.end(), 11);
499        let lines: Vec<_> = span.lines().collect();
500        let lines_span: Vec<_> = span.lines_span().map(|span| span.as_str()).collect();
501
502        assert_eq!(lines.len(), 2);
503        assert_eq!(lines[0], "def\n".to_owned());
504        assert_eq!(lines[1], "ghi".to_owned());
505        assert_eq!(lines, lines_span) // Verify parity with lines_span()
506    }
507
508    #[test]
509    fn lines_span() {
510        let input = "abc\ndef\nghi";
511        let span = Span::new(input, 1, 7).unwrap();
512        let lines_span: Vec<_> = span.lines_span().collect();
513        let lines: Vec<_> = span.lines().collect();
514
515        assert_eq!(lines_span.len(), 2);
516        assert_eq!(lines_span[0], Span::new(input, 0, 4).unwrap());
517        assert_eq!(lines_span[1], Span::new(input, 4, 8).unwrap());
518        assert_eq!(
519            lines_span
520                .iter()
521                .map(|span| span.as_str())
522                .collect::<Vec<_>>(),
523            lines
524        );
525    }
526
527    #[test]
528    fn get_input_of_span() {
529        let input = "abc\ndef\nghi";
530        let span = Span::new(input, 1, 7).unwrap();
531
532        assert_eq!(span.get_input(), input);
533    }
534
535    #[test]
536    fn merge_contiguous() {
537        let input = "abc\ndef\nghi";
538        let span1 = Span::new(input, 1, 7).unwrap();
539        let span2 = Span::new(input, 7, 11).unwrap();
540        let merged = merge_spans(&span1, &span2).unwrap();
541
542        assert_eq!(merged, Span::new(input, 1, 11).unwrap());
543    }
544
545    #[test]
546    fn merge_overlapping() {
547        let input = "abc\ndef\nghi";
548        let span1 = Span::new(input, 1, 7).unwrap();
549        let span2 = Span::new(input, 5, 11).unwrap();
550        let merged = merge_spans(&span1, &span2).unwrap();
551
552        assert_eq!(merged, Span::new(input, 1, 11).unwrap());
553    }
554
555    #[test]
556    fn merge_non_contiguous() {
557        let input = "abc\ndef\nghi";
558        let span1 = Span::new(input, 1, 7).unwrap();
559        let span2 = Span::new(input, 8, 11).unwrap();
560        let merged = merge_spans(&span1, &span2);
561
562        assert!(merged.is_none());
563    }
564}