nom_language/precedence/mod.rs
1//! Combinators to parse expressions with operator precedence.
2
3#[cfg(test)]
4mod tests;
5
6use nom::error::{ErrorKind, FromExternalError, ParseError};
7use nom::{Check, Err, IResult, Input, Mode, OutputM, OutputMode, Parser};
8
9/// An unary operator.
10pub struct Unary<V, Q: Ord + Copy> {
11 value: V,
12 precedence: Q,
13}
14
15/// A binary operator.
16pub struct Binary<V, Q: Ord + Copy> {
17 value: V,
18 precedence: Q,
19 assoc: Assoc,
20}
21
22/// A single evaluation step.
23pub enum Operation<P1, P2, P3, O> {
24 /// A prefix operation.
25 Prefix(P1, O),
26 /// A postfix operation.
27 Postfix(O, P2),
28 /// A binary operation.
29 Binary(O, P3, O),
30}
31
32/// Associativity for binary operators.
33#[derive(Copy, Clone, PartialEq, Eq)]
34pub enum Assoc {
35 /// Left associative.
36 Left,
37 /// Right associative.
38 Right,
39}
40
41/// Element for operator stack.
42enum Operator<P1, P2, P3, Q: Ord + Copy> {
43 Prefix(P1, Q),
44 Postfix(P2, Q),
45 Binary(P3, Q, Assoc),
46}
47
48impl<P1, P2, P3, Q> Operator<P1, P2, P3, Q>
49where
50 Q: Ord + Copy,
51{
52 fn precedence(&self) -> Q {
53 match self {
54 Operator::Prefix(_, p) => *p,
55 Operator::Postfix(_, p) => *p,
56 Operator::Binary(_, p, _) => *p,
57 }
58 }
59
60 fn is_postfix(&self) -> bool {
61 match self {
62 Operator::Postfix(_, _) => true,
63 _ => false,
64 }
65 }
66}
67
68/// Runs the inner parser and transforms the result into an unary operator with the given precedence.
69///
70/// Intended for use with [precedence].
71/// # Arguments
72/// * `precedence` The precedence of the operator.
73/// * `parser` The parser to apply.
74pub fn unary_op<I, O, E, P, Q>(
75 precedence: Q,
76 mut parser: P,
77) -> impl FnMut(I) -> IResult<I, Unary<O, Q>, E>
78where
79 P: Parser<I, Output = O, Error = E>,
80 Q: Ord + Copy,
81{
82 move |input| match parser.parse(input) {
83 Ok((i, value)) => Ok((i, Unary { value, precedence })),
84 Err(e) => Err(e),
85 }
86}
87
88/// Runs the inner parser and transforms the result into a binary operator with the given precedence and associativity.
89///
90/// Intended for use with [precedence].
91/// # Arguments
92/// * `precedence` The precedence of the operator.
93/// * `assoc` The associativity of the operator.
94/// * `parser` The parser to apply.
95pub fn binary_op<I, O, E, P, Q>(
96 precedence: Q,
97 assoc: Assoc,
98 mut parser: P,
99) -> impl FnMut(I) -> IResult<I, Binary<O, Q>, E>
100where
101 P: Parser<I, Output = O, Error = E>,
102 Q: Ord + Copy,
103{
104 move |input| match parser.parse(input) {
105 Ok((i, value)) => Ok((
106 i,
107 Binary {
108 value,
109 precedence,
110 assoc,
111 },
112 )),
113 Err(e) => Err(e),
114 }
115}
116
117/// Parses an expression with operator precedence.
118///
119/// Supports prefix, postfix and binary operators. Operators are applied in ascending precedence.
120///
121/// The parser will track its current position inside the expression and call the respective
122/// operand/operator parsers. The prefix and postfix parsers are called repeatedly until they fail before
123/// execution moves on to the operand or binary parser.
124///
125/// Expressions are folded as soon as possible. The result will be reused as another operand. After the
126/// expression has been read completely any remaining operations are folded and the resulting, single
127/// operand is returned as the result.
128///
129/// It will return `Err(Err:Error((_, ErrorKind::Precedence)))` if:
130/// * the `fold` function returns an `Err`.
131/// * more than one or no operands remain after the expression has been evaluated completely.
132/// * the input does not match the pattern: `prefix* operand postfix* (binary prefix* operand postfix*)*`
133///
134/// # Arguments
135/// * `prefix` Parser for prefix unary operators.
136/// * `postfix` Parser for postfix unary operators.
137/// * `binary` Parser for binary operators.
138/// * `operand` Parser for operands.
139/// * `fold` Function that evaluates a single operation and returns the result.
140///
141/// # Example
142/// ```rust
143/// # use nom::{Err, error::{Error, ErrorKind}, IResult};
144/// use nom_language::precedence::{precedence, unary_op, binary_op, Assoc, Operation};
145/// use nom::character::complete::digit1;
146/// use nom::combinator::{map_res, fail};
147/// use nom::sequence::delimited;
148/// use nom::bytes::complete::tag;
149/// use nom::branch::alt;
150///
151/// fn parser(i: &str) -> IResult<&str, i64> {
152/// precedence(
153/// unary_op(1, tag("-")),
154/// fail(),
155/// alt((
156/// binary_op(2, Assoc::Left, tag("*")),
157/// binary_op(2, Assoc::Left, tag("/")),
158/// binary_op(3, Assoc::Left, tag("+")),
159/// binary_op(3, Assoc::Left, tag("-")),
160/// )),
161/// alt((
162/// map_res(digit1, |s: &str| s.parse::<i64>()),
163/// delimited(tag("("), parser, tag(")")),
164/// )),
165/// |op: Operation<&str, &str, &str, i64>| {
166/// use nom_language::precedence::Operation::*;
167/// match op {
168/// Prefix("-", o) => Ok(-o),
169/// Binary(lhs, "*", rhs) => Ok(lhs * rhs),
170/// Binary(lhs, "/", rhs) => Ok(lhs / rhs),
171/// Binary(lhs, "+", rhs) => Ok(lhs + rhs),
172/// Binary(lhs, "-", rhs) => Ok(lhs - rhs),
173/// _ => Err("Invalid combination"),
174/// }
175/// }
176/// )(i)
177/// }
178///
179/// assert_eq!(parser("8-2*2"), Ok(("", 4)));
180/// assert_eq!(parser("4-(2+2)"), Ok(("", 0)));
181/// assert_eq!(parser("3-(2*3)+7+2*2-(2*(2+4))"), Ok(("", -4)));
182/// ```
183///
184/// # Evaluation order
185/// This parser reads expressions from left to right and folds operations as soon as possible. This
186/// behaviour is only important when using an operator grammar that allows for ambigious expressions.
187///
188/// For example, the expression `-a++**b` is ambigious with the following precedence.
189///
190/// | Operator | Position | Precedence | Associativity |
191/// |----------|----------|------------|---------------|
192/// | ** | Binary | 1 | Right |
193/// | - | Prefix | 2 | N/A |
194/// | ++ | Postfix | 3 | N/A |
195///
196/// The expression can be parsed in two ways: `-((a++)**b)` or `((-a)++)**b`. This parser will always
197/// parse it as the latter because of how it evaluates expressions:
198/// * It reads, left-to-right, the first two operators `-a++`.
199/// * Because the minus takes precedence over the increment it is evaluated immediately `(-a)++`.
200/// * It then reads the remaining input and evaluates the increment next in order to preserve its
201/// position in the expression \
202/// `((-a)++)**b`.
203pub fn precedence<I, O, E, E2, F, G, H1, H3, H2, P1, P2, P3, Q>(
204 mut prefix: H1,
205 mut postfix: H2,
206 mut binary: H3,
207 mut operand: F,
208 mut fold: G,
209) -> impl FnMut(I) -> IResult<I, O, E>
210where
211 I: Clone + PartialEq,
212 E: ParseError<I> + FromExternalError<I, E2>,
213 F: Parser<I, Output = O, Error = E>,
214 G: FnMut(Operation<P1, P2, P3, O>) -> Result<O, E2>,
215 H1: Parser<I, Output = Unary<P1, Q>, Error = E>,
216 H2: Parser<I, Output = Unary<P2, Q>, Error = E>,
217 H3: Parser<I, Output = Binary<P3, Q>, Error = E>,
218 Q: Ord + Copy,
219{
220 move |mut i| {
221 let mut operands = Vec::new();
222 let mut operators = Vec::new();
223 let mut i1 = i.clone();
224
225 'main: loop {
226 'prefix: loop {
227 match prefix.parse(i1.clone()) {
228 Err(Err::Error(_)) => break 'prefix,
229 Err(e) => return Err(e),
230 Ok((i2, o)) => {
231 // infinite loop check: the parser must always consume
232 if i2 == i1 {
233 return Err(Err::Error(E::from_error_kind(i1, ErrorKind::Precedence)));
234 }
235 i1 = i2;
236 operators.push(Operator::Prefix(o.value, o.precedence));
237 }
238 }
239 }
240
241 let (i2, o) = match operand.parse(i1.clone()) {
242 Ok((i, o)) => (i, o),
243 Err(Err::Error(e)) => return Err(Err::Error(E::append(i, ErrorKind::Precedence, e))),
244 Err(e) => return Err(e),
245 };
246 i1 = i2;
247 operands.push(o);
248
249 'postfix: loop {
250 match postfix.parse(i1.clone()) {
251 Err(Err::Error(_)) => break 'postfix,
252 Err(e) => return Err(e),
253 Ok((i2, o)) => {
254 // infinite loop check: the parser must always consume
255 if i2 == i1 {
256 return Err(Err::Error(E::from_error_kind(i1, ErrorKind::Precedence)));
257 }
258
259 while operators
260 .last()
261 .map(|op| op.precedence() <= o.precedence)
262 .unwrap_or(false)
263 {
264 let value = operands.pop().unwrap();
265 let operation = match operators.pop().unwrap() {
266 Operator::Prefix(op, _) => Operation::Prefix(op, value),
267 Operator::Postfix(op, _) => Operation::Postfix(value, op),
268 Operator::Binary(op, _, _) => match operands.pop() {
269 Some(lhs) => Operation::Binary(lhs, op, value),
270 None => return Err(Err::Error(E::from_error_kind(i1, ErrorKind::Precedence))),
271 },
272 };
273 let result = match fold(operation) {
274 Err(e) => {
275 return Err(Err::Error(E::from_external_error(
276 i,
277 ErrorKind::Precedence,
278 e,
279 )))
280 }
281 Ok(r) => r,
282 };
283 operands.push(result);
284 }
285 i1 = i2;
286 operators.push(Operator::Postfix(o.value, o.precedence));
287 }
288 }
289 }
290
291 match binary.parse(i1.clone()) {
292 Err(Err::Error(_)) => break 'main,
293 Err(e) => return Err(e),
294 Ok((i2, o)) => {
295 while operators
296 .last()
297 .map(|op| {
298 op.precedence() < o.precedence
299 || (o.assoc == Assoc::Left && op.precedence() == o.precedence)
300 || (op.is_postfix())
301 })
302 .unwrap_or(false)
303 {
304 let value = operands.pop().unwrap();
305 let operation = match operators.pop().unwrap() {
306 Operator::Prefix(op, _) => Operation::Prefix(op, value),
307 Operator::Postfix(op, _) => Operation::Postfix(value, op),
308 Operator::Binary(op, _, _) => match operands.pop() {
309 Some(lhs) => Operation::Binary(lhs, op, value),
310 None => return Err(Err::Error(E::from_error_kind(i1, ErrorKind::Precedence))),
311 },
312 };
313 let result = match fold(operation) {
314 Err(e) => {
315 return Err(Err::Error(E::from_external_error(
316 i,
317 ErrorKind::Precedence,
318 e,
319 )))
320 }
321 Ok(r) => r,
322 };
323 operands.push(result);
324 }
325 operators.push(Operator::Binary(o.value, o.precedence, o.assoc));
326 i1 = i2;
327 }
328 }
329
330 // infinite loop check: either operand or operator must consume input
331 if i == i1 {
332 return Err(Err::Error(E::from_error_kind(i, ErrorKind::Precedence)));
333 }
334 i = i1.clone();
335 }
336
337 while operators.len() > 0 {
338 let value = match operands.pop() {
339 Some(o) => o,
340 None => return Err(Err::Error(E::from_error_kind(i, ErrorKind::Precedence))),
341 };
342 let operation = match operators.pop().unwrap() {
343 Operator::Prefix(op, _) => Operation::Prefix(op, value),
344 Operator::Postfix(op, _) => Operation::Postfix(value, op),
345 Operator::Binary(op, _, _) => match operands.pop() {
346 Some(lhs) => Operation::Binary(lhs, op, value),
347 None => return Err(Err::Error(E::from_error_kind(i, ErrorKind::Precedence))),
348 },
349 };
350 let result = match fold(operation) {
351 Ok(r) => r,
352 Err(e) => {
353 return Err(Err::Error(E::from_external_error(
354 i,
355 ErrorKind::Precedence,
356 e,
357 )))
358 }
359 };
360 operands.push(result);
361 }
362
363 if operands.len() == 1 {
364 return Ok((i1, operands.pop().unwrap()));
365 } else {
366 return Err(Err::Error(E::from_error_kind(i, ErrorKind::Precedence)));
367 }
368 }
369}
370
371/// Applies a parser multiple times separated by another parser.
372///
373/// It is similar to [`separated_list1`][nom::multi::separated_list1] but instead of collecting
374/// into a vector, you have a callback to build the output.
375///
376/// In a LALR grammar a left recursive operator is usually built with a rule syntax such as:
377/// * A := A op B | B
378///
379/// If you try to parse that wth [`alt`][nom::branch::alt] it will fail with a stack overflow
380/// because the recusion is unlimited. This function solves this problem by converting the recusion
381/// into an iteration.
382///
383/// Compare with a right recursive operator, that in LALR would be:
384/// * A := B op A | B
385/// Or equivalently:
386/// * A := B (op A)?
387///
388/// That can be written in `nom` trivially.
389///
390/// This stops when either parser returns an error and returns the last built value. to instead chain an error up, see
391/// [`cut`][nom::combinator::cut].
392///
393/// # Arguments
394/// * `child` The parser to apply.
395/// * `operator` Parses the operator between argument.
396/// * `init` A function returning the initial value.
397/// * `fold` The function that combines a result of `f` with
398/// the current accumulator.
399/// ```rust
400/// # #[macro_use] extern crate nom;
401/// # use nom::{Err, error::ErrorKind, Needed, IResult, Parser};
402/// use nom_language::precedence::left_assoc;
403/// use nom::branch::alt;
404/// use nom::sequence::delimited;
405/// use nom::character::complete::{char, digit1};
406///
407/// fn add(i: &str) -> IResult<&str, String> {
408/// left_assoc(mult, char('+'), |a, o, b| format!("{o}{a}{b}")).parse(i)
409/// }
410/// fn mult(i: &str) -> IResult<&str, String> {
411/// left_assoc(single, char('*'), |a, o, b| format!("{o}{a}{b}")).parse(i)
412/// }
413/// fn single(i: &str) -> IResult<&str, String> {
414/// alt((
415/// digit1.map(|x: &str| x.to_string()),
416/// delimited(char('('), add, char(')'))
417/// )).parse(i)
418/// }
419///
420/// assert_eq!(single("(1+2*3)"), Ok(("", String::from("+1*23"))));
421/// assert_eq!(single("((1+2)*3)"), Ok(("", String::from("*+123"))));
422/// assert_eq!(single("(1*2+3)"), Ok(("", String::from("+*123"))));
423/// assert_eq!(single("((1+2*3)+4)"), Ok(("", String::from("++1*234"))));
424/// assert_eq!(single("(1+(2*3+4))"), Ok(("", String::from("+1+*234"))));
425/// ```
426pub fn left_assoc<I, E, O, OP, G, F, B>(
427 child: F,
428 operator: G,
429 builder: B,
430) -> impl Parser<I, Output = O, Error = E>
431where
432 I: Clone + Input,
433 E: ParseError<I>,
434 F: Parser<I, Output = O, Error = E>,
435 G: Parser<I, Output = OP, Error = E>,
436 B: FnMut(O, OP, O) -> O,
437{
438 LeftAssoc {
439 child,
440 operator,
441 builder,
442 }
443}
444
445/// Parser implementation for the [`separated_list1`][nom::multi::separated_list1] combinator
446pub struct LeftAssoc<F, G, B> {
447 child: F,
448 operator: G,
449 builder: B,
450}
451
452impl<I, E, O, OP, G, F, B> Parser<I> for LeftAssoc<F, G, B>
453where
454 I: Clone + Input,
455 E: ParseError<I>,
456 F: Parser<I, Output = O, Error = E>,
457 G: Parser<I, Output = OP, Error = E>,
458 B: FnMut(O, OP, O) -> O,
459{
460 type Output = O;
461 type Error = E;
462
463 fn process<OM: OutputMode>(
464 &mut self,
465 mut i: I,
466 ) -> nom::PResult<OM, I, Self::Output, Self::Error> {
467 let (i1, mut res) = self.child.process::<OM>(i)?;
468 i = i1;
469
470 loop {
471 let len = i.input_len();
472 match self
473 .operator
474 .process::<OutputM<OM::Output, Check, OM::Incomplete>>(i.clone())
475 {
476 Err(Err::Error(_)) => return Ok((i, res)),
477 Err(Err::Failure(e)) => return Err(Err::Failure(e)),
478 Err(Err::Incomplete(e)) => return Err(Err::Incomplete(e)),
479 Ok((i1, op)) => {
480 match self
481 .child
482 .process::<OutputM<OM::Output, Check, OM::Incomplete>>(i1.clone())
483 {
484 Err(Err::Error(_)) => return Ok((i, res)),
485 Err(Err::Failure(e)) => return Err(Err::Failure(e)),
486 Err(Err::Incomplete(e)) => return Err(Err::Incomplete(e)),
487 Ok((i2, rhs)) => {
488 // infinite loop check: the parser must always consume
489 if i2.input_len() == len {
490 return Err(Err::Error(OM::Error::bind(|| {
491 <F as Parser<I>>::Error::from_error_kind(i, ErrorKind::SeparatedList)
492 })));
493 }
494 // there is no combine() with 3 arguments, fake it with a tuple and two calls
495 let op_rhs = OM::Output::combine(op, rhs, |op, rhs| (op, rhs));
496 res = OM::Output::combine(res, op_rhs, |lhs, (op, rhs)| (self.builder)(lhs, op, rhs));
497 i = i2;
498 }
499 }
500 }
501 }
502 }
503 }
504}