nom_language/precedence/
mod.rs

1//! Combinators to parse expressions with operator precedence.
2
3#[cfg(test)]
4mod tests;
5
6use nom::error::{ErrorKind, FromExternalError, ParseError};
7use nom::{Check, Err, IResult, Input, Mode, OutputM, OutputMode, Parser};
8
9/// An unary operator.
10pub struct Unary<V, Q: Ord + Copy> {
11  value: V,
12  precedence: Q,
13}
14
15/// A binary operator.
16pub struct Binary<V, Q: Ord + Copy> {
17  value: V,
18  precedence: Q,
19  assoc: Assoc,
20}
21
22/// A single evaluation step.
23pub enum Operation<P1, P2, P3, O> {
24  /// A prefix operation.
25  Prefix(P1, O),
26  /// A postfix operation.
27  Postfix(O, P2),
28  /// A binary operation.
29  Binary(O, P3, O),
30}
31
32/// Associativity for binary operators.
33#[derive(Copy, Clone, PartialEq, Eq)]
34pub enum Assoc {
35  /// Left associative.
36  Left,
37  /// Right associative.
38  Right,
39}
40
41/// Element for operator stack.
42enum Operator<P1, P2, P3, Q: Ord + Copy> {
43  Prefix(P1, Q),
44  Postfix(P2, Q),
45  Binary(P3, Q, Assoc),
46}
47
48impl<P1, P2, P3, Q> Operator<P1, P2, P3, Q>
49where
50  Q: Ord + Copy,
51{
52  fn precedence(&self) -> Q {
53    match self {
54      Operator::Prefix(_, p) => *p,
55      Operator::Postfix(_, p) => *p,
56      Operator::Binary(_, p, _) => *p,
57    }
58  }
59
60  fn is_postfix(&self) -> bool {
61    match self {
62      Operator::Postfix(_, _) => true,
63      _ => false,
64    }
65  }
66}
67
68/// Runs the inner parser and transforms the result into an unary operator with the given precedence.
69///
70/// Intended for use with [precedence].
71/// # Arguments
72/// * `precedence` The precedence of the operator.
73/// * `parser` The parser to apply.
74pub fn unary_op<I, O, E, P, Q>(
75  precedence: Q,
76  mut parser: P,
77) -> impl FnMut(I) -> IResult<I, Unary<O, Q>, E>
78where
79  P: Parser<I, Output = O, Error = E>,
80  Q: Ord + Copy,
81{
82  move |input| match parser.parse(input) {
83    Ok((i, value)) => Ok((i, Unary { value, precedence })),
84    Err(e) => Err(e),
85  }
86}
87
88/// Runs the inner parser and transforms the result into a binary operator with the given precedence and associativity.
89///
90/// Intended for use with [precedence].
91/// # Arguments
92/// * `precedence` The precedence of the operator.
93/// * `assoc` The associativity of the operator.
94/// * `parser` The parser to apply.
95pub fn binary_op<I, O, E, P, Q>(
96  precedence: Q,
97  assoc: Assoc,
98  mut parser: P,
99) -> impl FnMut(I) -> IResult<I, Binary<O, Q>, E>
100where
101  P: Parser<I, Output = O, Error = E>,
102  Q: Ord + Copy,
103{
104  move |input| match parser.parse(input) {
105    Ok((i, value)) => Ok((
106      i,
107      Binary {
108        value,
109        precedence,
110        assoc,
111      },
112    )),
113    Err(e) => Err(e),
114  }
115}
116
117/// Parses an expression with operator precedence.
118///
119/// Supports prefix, postfix and binary operators. Operators are applied in ascending precedence.
120///
121/// The parser will track its current position inside the expression and call the respective
122/// operand/operator parsers. The prefix and postfix parsers are called repeatedly until they fail before
123/// execution moves on to the operand or binary parser.
124///
125/// Expressions are folded as soon as possible. The result will be reused as another operand. After the
126/// expression has been read completely any remaining operations are folded and the resulting, single
127/// operand is returned as the result.
128///
129/// It will return `Err(Err:Error((_, ErrorKind::Precedence)))` if:
130/// * the `fold` function returns an `Err`.
131/// * more than one or no operands remain after the expression has been evaluated completely.
132/// * the input does not match the pattern: `prefix* operand postfix* (binary prefix* operand postfix*)*`
133///
134/// # Arguments
135/// * `prefix` Parser for prefix unary operators.
136/// * `postfix` Parser for postfix unary operators.
137/// * `binary` Parser for binary operators.
138/// * `operand` Parser for operands.
139/// * `fold` Function that evaluates a single operation and returns the result.
140///
141/// # Example
142/// ```rust
143/// # use nom::{Err, error::{Error, ErrorKind}, IResult};
144/// use nom_language::precedence::{precedence, unary_op, binary_op, Assoc, Operation};
145/// use nom::character::complete::digit1;
146/// use nom::combinator::{map_res, fail};
147/// use nom::sequence::delimited;
148/// use nom::bytes::complete::tag;
149/// use nom::branch::alt;
150///
151/// fn parser(i: &str) -> IResult<&str, i64> {
152///   precedence(
153///     unary_op(1, tag("-")),
154///     fail(),
155///     alt((
156///       binary_op(2, Assoc::Left, tag("*")),
157///       binary_op(2, Assoc::Left, tag("/")),
158///       binary_op(3, Assoc::Left, tag("+")),
159///       binary_op(3, Assoc::Left, tag("-")),
160///     )),
161///     alt((
162///       map_res(digit1, |s: &str| s.parse::<i64>()),
163///       delimited(tag("("), parser, tag(")")),
164///     )),
165///     |op: Operation<&str, &str, &str, i64>| {
166///       use nom_language::precedence::Operation::*;
167///       match op {
168///         Prefix("-", o) => Ok(-o),
169///         Binary(lhs, "*", rhs) => Ok(lhs * rhs),
170///         Binary(lhs, "/", rhs) => Ok(lhs / rhs),
171///         Binary(lhs, "+", rhs) => Ok(lhs + rhs),
172///         Binary(lhs, "-", rhs) => Ok(lhs - rhs),
173///         _ => Err("Invalid combination"),
174///       }
175///     }
176///   )(i)
177/// }
178///
179/// assert_eq!(parser("8-2*2"), Ok(("", 4)));
180/// assert_eq!(parser("4-(2+2)"), Ok(("", 0)));
181/// assert_eq!(parser("3-(2*3)+7+2*2-(2*(2+4))"), Ok(("", -4)));
182/// ```
183///
184/// # Evaluation order
185/// This parser reads expressions from left to right and folds operations as soon as possible. This
186/// behaviour is only important when using an operator grammar that allows for ambigious expressions.
187///
188/// For example, the expression `-a++**b` is ambigious with the following precedence.
189///
190/// | Operator | Position | Precedence | Associativity |
191/// |----------|----------|------------|---------------|
192/// | **       | Binary   | 1          | Right         |
193/// | -        | Prefix   | 2          | N/A           |
194/// | ++       | Postfix  | 3          | N/A           |
195///
196/// The expression can be parsed in two ways: `-((a++)**b)` or `((-a)++)**b`. This parser will always
197/// parse it as the latter because of how it evaluates expressions:
198/// * It reads, left-to-right, the first two operators `-a++`.
199/// * Because the minus takes precedence over the increment it is evaluated immediately `(-a)++`.
200/// * It then reads the remaining input and evaluates the increment next in order to preserve its
201/// position in the expression \
202/// `((-a)++)**b`.
203pub fn precedence<I, O, E, E2, F, G, H1, H3, H2, P1, P2, P3, Q>(
204  mut prefix: H1,
205  mut postfix: H2,
206  mut binary: H3,
207  mut operand: F,
208  mut fold: G,
209) -> impl FnMut(I) -> IResult<I, O, E>
210where
211  I: Clone + PartialEq,
212  E: ParseError<I> + FromExternalError<I, E2>,
213  F: Parser<I, Output = O, Error = E>,
214  G: FnMut(Operation<P1, P2, P3, O>) -> Result<O, E2>,
215  H1: Parser<I, Output = Unary<P1, Q>, Error = E>,
216  H2: Parser<I, Output = Unary<P2, Q>, Error = E>,
217  H3: Parser<I, Output = Binary<P3, Q>, Error = E>,
218  Q: Ord + Copy,
219{
220  move |mut i| {
221    let mut operands = Vec::new();
222    let mut operators = Vec::new();
223    let mut i1 = i.clone();
224
225    'main: loop {
226      'prefix: loop {
227        match prefix.parse(i1.clone()) {
228          Err(Err::Error(_)) => break 'prefix,
229          Err(e) => return Err(e),
230          Ok((i2, o)) => {
231            // infinite loop check: the parser must always consume
232            if i2 == i1 {
233              return Err(Err::Error(E::from_error_kind(i1, ErrorKind::Precedence)));
234            }
235            i1 = i2;
236            operators.push(Operator::Prefix(o.value, o.precedence));
237          }
238        }
239      }
240
241      let (i2, o) = match operand.parse(i1.clone()) {
242        Ok((i, o)) => (i, o),
243        Err(Err::Error(e)) => return Err(Err::Error(E::append(i, ErrorKind::Precedence, e))),
244        Err(e) => return Err(e),
245      };
246      i1 = i2;
247      operands.push(o);
248
249      'postfix: loop {
250        match postfix.parse(i1.clone()) {
251          Err(Err::Error(_)) => break 'postfix,
252          Err(e) => return Err(e),
253          Ok((i2, o)) => {
254            // infinite loop check: the parser must always consume
255            if i2 == i1 {
256              return Err(Err::Error(E::from_error_kind(i1, ErrorKind::Precedence)));
257            }
258
259            while operators
260              .last()
261              .map(|op| op.precedence() <= o.precedence)
262              .unwrap_or(false)
263            {
264              let value = operands.pop().unwrap();
265              let operation = match operators.pop().unwrap() {
266                Operator::Prefix(op, _) => Operation::Prefix(op, value),
267                Operator::Postfix(op, _) => Operation::Postfix(value, op),
268                Operator::Binary(op, _, _) => match operands.pop() {
269                  Some(lhs) => Operation::Binary(lhs, op, value),
270                  None => return Err(Err::Error(E::from_error_kind(i1, ErrorKind::Precedence))),
271                },
272              };
273              let result = match fold(operation) {
274                Err(e) => {
275                  return Err(Err::Error(E::from_external_error(
276                    i,
277                    ErrorKind::Precedence,
278                    e,
279                  )))
280                }
281                Ok(r) => r,
282              };
283              operands.push(result);
284            }
285            i1 = i2;
286            operators.push(Operator::Postfix(o.value, o.precedence));
287          }
288        }
289      }
290
291      match binary.parse(i1.clone()) {
292        Err(Err::Error(_)) => break 'main,
293        Err(e) => return Err(e),
294        Ok((i2, o)) => {
295          while operators
296            .last()
297            .map(|op| {
298              op.precedence() < o.precedence
299                || (o.assoc == Assoc::Left && op.precedence() == o.precedence)
300                || (op.is_postfix())
301            })
302            .unwrap_or(false)
303          {
304            let value = operands.pop().unwrap();
305            let operation = match operators.pop().unwrap() {
306              Operator::Prefix(op, _) => Operation::Prefix(op, value),
307              Operator::Postfix(op, _) => Operation::Postfix(value, op),
308              Operator::Binary(op, _, _) => match operands.pop() {
309                Some(lhs) => Operation::Binary(lhs, op, value),
310                None => return Err(Err::Error(E::from_error_kind(i1, ErrorKind::Precedence))),
311              },
312            };
313            let result = match fold(operation) {
314              Err(e) => {
315                return Err(Err::Error(E::from_external_error(
316                  i,
317                  ErrorKind::Precedence,
318                  e,
319                )))
320              }
321              Ok(r) => r,
322            };
323            operands.push(result);
324          }
325          operators.push(Operator::Binary(o.value, o.precedence, o.assoc));
326          i1 = i2;
327        }
328      }
329
330      // infinite loop check: either operand or operator must consume input
331      if i == i1 {
332        return Err(Err::Error(E::from_error_kind(i, ErrorKind::Precedence)));
333      }
334      i = i1.clone();
335    }
336
337    while operators.len() > 0 {
338      let value = match operands.pop() {
339        Some(o) => o,
340        None => return Err(Err::Error(E::from_error_kind(i, ErrorKind::Precedence))),
341      };
342      let operation = match operators.pop().unwrap() {
343        Operator::Prefix(op, _) => Operation::Prefix(op, value),
344        Operator::Postfix(op, _) => Operation::Postfix(value, op),
345        Operator::Binary(op, _, _) => match operands.pop() {
346          Some(lhs) => Operation::Binary(lhs, op, value),
347          None => return Err(Err::Error(E::from_error_kind(i, ErrorKind::Precedence))),
348        },
349      };
350      let result = match fold(operation) {
351        Ok(r) => r,
352        Err(e) => {
353          return Err(Err::Error(E::from_external_error(
354            i,
355            ErrorKind::Precedence,
356            e,
357          )))
358        }
359      };
360      operands.push(result);
361    }
362
363    if operands.len() == 1 {
364      return Ok((i1, operands.pop().unwrap()));
365    } else {
366      return Err(Err::Error(E::from_error_kind(i, ErrorKind::Precedence)));
367    }
368  }
369}
370
371/// Applies a parser multiple times separated by another parser.
372///
373/// It is similar to [`separated_list1`][nom::multi::separated_list1] but instead of collecting
374/// into a vector, you have a callback to build the output.
375///
376/// In a LALR grammar a left recursive operator is usually built with a rule syntax such as:
377///  * A := A op B | B
378///
379/// If you try to parse that wth [`alt`][nom::branch::alt] it will fail with a stack overflow
380/// because the recusion is unlimited. This function solves this problem by converting the recusion
381/// into an iteration.
382///
383/// Compare with a right recursive operator, that in LALR would be:
384///  * A := B op A | B
385/// Or equivalently:
386///  * A := B (op A)?
387///
388///  That can be written in `nom` trivially.
389///
390/// This stops when either parser returns an error and returns the last built value. to instead chain an error up, see
391/// [`cut`][nom::combinator::cut].
392///
393/// # Arguments
394/// * `child` The parser to apply.
395/// * `operator` Parses the operator between argument.
396/// * `init` A function returning the initial value.
397/// * `fold` The function that combines a result of `f` with
398///       the current accumulator.
399/// ```rust
400/// # #[macro_use] extern crate nom;
401/// # use nom::{Err, error::ErrorKind, Needed, IResult, Parser};
402/// use nom_language::precedence::left_assoc;
403/// use nom::branch::alt;
404/// use nom::sequence::delimited;
405/// use nom::character::complete::{char, digit1};
406///
407/// fn add(i: &str) -> IResult<&str, String> {
408///     left_assoc(mult, char('+'), |a, o, b| format!("{o}{a}{b}")).parse(i)
409/// }
410/// fn mult(i: &str) -> IResult<&str, String> {
411///     left_assoc(single, char('*'), |a, o, b| format!("{o}{a}{b}")).parse(i)
412/// }
413/// fn single(i: &str) -> IResult<&str, String> {
414///     alt((
415///         digit1.map(|x: &str| x.to_string()),
416///         delimited(char('('), add, char(')'))
417///     )).parse(i)
418/// }
419///
420/// assert_eq!(single("(1+2*3)"), Ok(("", String::from("+1*23"))));
421/// assert_eq!(single("((1+2)*3)"), Ok(("", String::from("*+123"))));
422/// assert_eq!(single("(1*2+3)"), Ok(("", String::from("+*123"))));
423/// assert_eq!(single("((1+2*3)+4)"), Ok(("", String::from("++1*234"))));
424/// assert_eq!(single("(1+(2*3+4))"), Ok(("", String::from("+1+*234"))));
425/// ```
426pub fn left_assoc<I, E, O, OP, G, F, B>(
427  child: F,
428  operator: G,
429  builder: B,
430) -> impl Parser<I, Output = O, Error = E>
431where
432  I: Clone + Input,
433  E: ParseError<I>,
434  F: Parser<I, Output = O, Error = E>,
435  G: Parser<I, Output = OP, Error = E>,
436  B: FnMut(O, OP, O) -> O,
437{
438  LeftAssoc {
439    child,
440    operator,
441    builder,
442  }
443}
444
445/// Parser implementation for the [`separated_list1`][nom::multi::separated_list1] combinator
446pub struct LeftAssoc<F, G, B> {
447  child: F,
448  operator: G,
449  builder: B,
450}
451
452impl<I, E, O, OP, G, F, B> Parser<I> for LeftAssoc<F, G, B>
453where
454  I: Clone + Input,
455  E: ParseError<I>,
456  F: Parser<I, Output = O, Error = E>,
457  G: Parser<I, Output = OP, Error = E>,
458  B: FnMut(O, OP, O) -> O,
459{
460  type Output = O;
461  type Error = E;
462
463  fn process<OM: OutputMode>(
464    &mut self,
465    mut i: I,
466  ) -> nom::PResult<OM, I, Self::Output, Self::Error> {
467    let (i1, mut res) = self.child.process::<OM>(i)?;
468    i = i1;
469
470    loop {
471      let len = i.input_len();
472      match self
473        .operator
474        .process::<OutputM<OM::Output, Check, OM::Incomplete>>(i.clone())
475      {
476        Err(Err::Error(_)) => return Ok((i, res)),
477        Err(Err::Failure(e)) => return Err(Err::Failure(e)),
478        Err(Err::Incomplete(e)) => return Err(Err::Incomplete(e)),
479        Ok((i1, op)) => {
480          match self
481            .child
482            .process::<OutputM<OM::Output, Check, OM::Incomplete>>(i1.clone())
483          {
484            Err(Err::Error(_)) => return Ok((i, res)),
485            Err(Err::Failure(e)) => return Err(Err::Failure(e)),
486            Err(Err::Incomplete(e)) => return Err(Err::Incomplete(e)),
487            Ok((i2, rhs)) => {
488              // infinite loop check: the parser must always consume
489              if i2.input_len() == len {
490                return Err(Err::Error(OM::Error::bind(|| {
491                  <F as Parser<I>>::Error::from_error_kind(i, ErrorKind::SeparatedList)
492                })));
493              }
494              // there is no combine() with 3 arguments, fake it with a tuple and two calls
495              let op_rhs = OM::Output::combine(op, rhs, |op, rhs| (op, rhs));
496              res = OM::Output::combine(res, op_rhs, |lhs, (op, rhs)| (self.builder)(lhs, op, rhs));
497              i = i2;
498            }
499          }
500        }
501      }
502    }
503  }
504}