nom/
lib.rs

1//! # nom, eating data byte by byte
2//!
3//! nom is a parser combinator library with a focus on safe parsing,
4//! streaming patterns, and as much as possible zero copy.
5//!
6//! ## Example
7//!
8//! ```rust
9//! use nom::{
10//!   IResult,
11//!   Parser,
12//!   bytes::complete::{tag, take_while_m_n},
13//!   combinator::map_res
14//! };
15//!
16//! #[derive(Debug,PartialEq)]
17//! pub struct Color {
18//!   pub red:     u8,
19//!   pub green:   u8,
20//!   pub blue:    u8,
21//! }
22//!
23//! fn from_hex(input: &str) -> Result<u8, std::num::ParseIntError> {
24//!   u8::from_str_radix(input, 16)
25//! }
26//!
27//! fn is_hex_digit(c: char) -> bool {
28//!   c.is_digit(16)
29//! }
30//!
31//! fn hex_primary(input: &str) -> IResult<&str, u8> {
32//!   map_res(
33//!     take_while_m_n(2, 2, is_hex_digit),
34//!     from_hex
35//!   ).parse(input)
36//! }
37//!
38//! fn hex_color(input: &str) -> IResult<&str, Color> {
39//!   let (input, _) = tag("#")(input)?;
40//!   let (input, (red, green, blue)) = (hex_primary, hex_primary, hex_primary).parse(input)?;
41//!
42//!   Ok((input, Color { red, green, blue }))
43//! }
44//!
45//! fn main() {
46//!   assert_eq!(hex_color("#2F14DF"), Ok(("", Color {
47//!     red: 47,
48//!     green: 20,
49//!     blue: 223,
50//!   })));
51//! }
52//! ```
53//!
54//! The code is available on [GitHub](https://github.com/rust-bakery/nom)
55//!
56//! There are a few [guides](https://github.com/rust-bakery/nom/tree/main/doc) with more details
57//! about [how to write parsers](https://github.com/rust-bakery/nom/blob/main/doc/making_a_new_parser_from_scratch.md),
58//! or the [error management system](https://github.com/rust-bakery/nom/blob/main/doc/error_management.md).
59//! You can also check out the [recipes] module that contains examples of common patterns.
60//!
61//! **Looking for a specific combinator? Read the
62//! ["choose a combinator" guide](https://github.com/rust-bakery/nom/blob/main/doc/choosing_a_combinator.md)**
63//!
64//! If you are upgrading to nom 5.0, please read the
65//! [migration document](https://github.com/rust-bakery/nom/blob/main/doc/upgrading_to_nom_5.md).
66//!
67//! ## Parser combinators
68//!
69//! Parser combinators are an approach to parsers that is very different from
70//! software like [lex](https://en.wikipedia.org/wiki/Lex_(software)) and
71//! [yacc](https://en.wikipedia.org/wiki/Yacc). Instead of writing the grammar
72//! in a separate syntax and generating the corresponding code, you use very small
73//! functions with very specific purposes, like "take 5 bytes", or "recognize the
74//! word 'HTTP'", and assemble them in meaningful patterns like "recognize
75//! 'HTTP', then a space, then a version".
76//! The resulting code is small, and looks like the grammar you would have
77//! written with other parser approaches.
78//!
79//! This gives us a few advantages:
80//!
81//! - The parsers are small and easy to write
82//! - The parsers components are easy to reuse (if they're general enough, please add them to nom!)
83//! - The parsers components are easy to test separately (unit tests and property-based tests)
84//! - The parser combination code looks close to the grammar you would have written
85//! - You can build partial parsers, specific to the data you need at the moment, and ignore the rest
86//!
87//! Here is an example of one such parser, to recognize text between parentheses:
88//!
89//! ```rust
90//! use nom::{
91//!   IResult,
92//!   Parser,
93//!   sequence::delimited,
94//!   // see the "streaming/complete" paragraph lower for an explanation of these submodules
95//!   character::complete::char,
96//!   bytes::complete::is_not
97//! };
98//!
99//! fn parens(input: &str) -> IResult<&str, &str> {
100//!   delimited(char('('), is_not(")"), char(')')).parse(input)
101//! }
102//! ```
103//!
104//! It defines a function named `parens` which will recognize a sequence of the
105//! character `(`, the longest byte array not containing `)`, then the character
106//! `)`, and will return the byte array in the middle.
107//!
108//! Here is another parser, written without using nom's combinators this time:
109//!
110//! ```rust
111//! use nom::{IResult, Err, Needed};
112//!
113//! # fn main() {
114//! fn take4(i: &[u8]) -> IResult<&[u8], &[u8]>{
115//!   if i.len() < 4 {
116//!     Err(Err::Incomplete(Needed::new(4)))
117//!   } else {
118//!     Ok((&i[4..], &i[0..4]))
119//!   }
120//! }
121//! # }
122//! ```
123//!
124//! This function takes a byte array as input, and tries to consume 4 bytes.
125//! Writing all the parsers manually, like this, is dangerous, despite Rust's
126//! safety features. There are still a lot of mistakes one can make. That's why
127//! nom provides a list of functions to help in developing parsers.
128//!
129//! With functions, you would write it like this:
130//!
131//! ```rust
132//! use nom::{IResult, bytes::streaming::take};
133//! fn take4(input: &str) -> IResult<&str, &str> {
134//!   take(4u8)(input)
135//! }
136//! ```
137//!
138//! A parser in nom is a function which, for an input type `I`, an output type `O`
139//! and an optional error type `E`, will have the following signature:
140//!
141//! ```rust,compile_fail
142//! fn parser(input: I) -> IResult<I, O, E>;
143//! ```
144//!
145//! Or like this, if you don't want to specify a custom error type (it will be `(I, ErrorKind)` by default):
146//!
147//! ```rust,compile_fail
148//! fn parser(input: I) -> IResult<I, O>;
149//! ```
150//!
151//! `IResult` is an alias for the `Result` type:
152//!
153//! ```rust
154//! use nom::{Needed, error::Error};
155//!
156//! type IResult<I, O, E = Error<I>> = Result<(I, O), Err<E>>;
157//!
158//! enum Err<E> {
159//!   Incomplete(Needed),
160//!   Error(E),
161//!   Failure(E),
162//! }
163//! ```
164//!
165//! It can have the following values:
166//!
167//! - A correct result `Ok((I,O))` with the first element being the remaining of the input (not parsed yet), and the second the output value;
168//! - An error `Err(Err::Error(c))` with `c` an error that can be built from the input position and a parser specific error
169//! - An error `Err(Err::Incomplete(Needed))` indicating that more input is necessary. `Needed` can indicate how much data is needed
170//! - An error `Err(Err::Failure(c))`. It works like the `Error` case, except it indicates an unrecoverable error: We cannot backtrack and test another parser
171//!
172//! Please refer to the ["choose a combinator" guide](https://github.com/rust-bakery/nom/blob/main/doc/choosing_a_combinator.md) for an exhaustive list of parsers.
173//! See also the rest of the documentation [here](https://github.com/rust-bakery/nom/tree/main/doc).
174//!
175//! ## Making new parsers with function combinators
176//!
177//! nom is based on functions that generate parsers, with a signature like
178//! this: `(arguments) -> impl Fn(Input) -> IResult<Input, Output, Error>`.
179//! The arguments of a combinator can be direct values (like `take` which uses
180//! a number of bytes or character as argument) or even other parsers (like
181//! `delimited` which takes as argument 3 parsers, and returns the result of
182//! the second one if all are successful).
183//!
184//! Here are some examples:
185//!
186//! ```rust
187//! use nom::IResult;
188//! use nom::bytes::complete::{tag, take};
189//! fn abcd_parser(i: &str) -> IResult<&str, &str> {
190//!   tag("abcd")(i) // will consume bytes if the input begins with "abcd"
191//! }
192//!
193//! fn take_10(i: &[u8]) -> IResult<&[u8], &[u8]> {
194//!   take(10u8)(i) // will consume and return 10 bytes of input
195//! }
196//! ```
197//!
198//! ## Combining parsers
199//!
200//! There are higher level patterns, like the **`alt`** combinator, which
201//! provides a choice between multiple parsers. If one branch fails, it tries
202//! the next, and returns the result of the first parser that succeeds:
203//!
204//! ```rust
205//! use nom::{IResult, Parser};
206//! use nom::branch::alt;
207//! use nom::bytes::complete::tag;
208//!
209//! let mut alt_tags = alt((tag("abcd"), tag("efgh")));
210//!
211//! assert_eq!(alt_tags.parse(&b"abcdxxx"[..]), Ok((&b"xxx"[..], &b"abcd"[..])));
212//! assert_eq!(alt_tags.parse(&b"efghxxx"[..]), Ok((&b"xxx"[..], &b"efgh"[..])));
213//! assert_eq!(alt_tags.parse(&b"ijklxxx"[..]), Err(nom::Err::Error((&b"ijklxxx"[..], nom::error::ErrorKind::Tag))));
214//! ```
215//!
216//! The **`opt`** combinator makes a parser optional. If the child parser returns
217//! an error, **`opt`** will still succeed and return None:
218//!
219//! ```rust
220//! use nom::{IResult, Parser, combinator::opt, bytes::complete::tag};
221//! fn abcd_opt(i: &[u8]) -> IResult<&[u8], Option<&[u8]>> {
222//!   opt(tag("abcd")).parse(i)
223//! }
224//!
225//! assert_eq!(abcd_opt(&b"abcdxxx"[..]), Ok((&b"xxx"[..], Some(&b"abcd"[..]))));
226//! assert_eq!(abcd_opt(&b"efghxxx"[..]), Ok((&b"efghxxx"[..], None)));
227//! ```
228//!
229//! **`many0`** applies a parser 0 or more times, and returns a vector of the aggregated results:
230//!
231//! ```rust
232//! # #[cfg(feature = "alloc")]
233//! # fn main() {
234//! use nom::{IResult, Parser, multi::many0, bytes::complete::tag};
235//! use std::str;
236//!
237//! fn multi(i: &str) -> IResult<&str, Vec<&str>> {
238//!   many0(tag("abcd")).parse(i)
239//! }
240//!
241//! let a = "abcdef";
242//! let b = "abcdabcdef";
243//! let c = "azerty";
244//! assert_eq!(multi(a), Ok(("ef",     vec!["abcd"])));
245//! assert_eq!(multi(b), Ok(("ef",     vec!["abcd", "abcd"])));
246//! assert_eq!(multi(c), Ok(("azerty", Vec::new())));
247//! # }
248//! # #[cfg(not(feature = "alloc"))]
249//! # fn main() {}
250//! ```
251//!
252//! Here are some basic combinators available:
253//!
254//! - **`opt`**: Will make the parser optional (if it returns the `O` type, the new parser returns `Option<O>`)
255//! - **`many0`**: Will apply the parser 0 or more times (if it returns the `O` type, the new parser returns `Vec<O>`)
256//! - **`many1`**: Will apply the parser 1 or more times
257//!
258//! There are more complex (and more useful) parsers like tuples, which are
259//! used to apply a series of parsers then assemble their results.
260//!
261//! Example with a tuple of parsers:
262//!
263//! ```rust
264//! # fn main() {
265//! use nom::{
266//!   error::ErrorKind,
267//!   Needed,
268//!   Parser,
269//!   number::streaming::be_u16,
270//!   bytes::streaming::{tag, take}};
271//!
272//! let mut tpl = (be_u16, take(3u8), tag("fg"));
273//!
274//! assert_eq!(
275//!   tpl.parse(&b"abcdefgh"[..]),
276//!   Ok((
277//!     &b"h"[..],
278//!     (0x6162u16, &b"cde"[..], &b"fg"[..])
279//!   ))
280//! );
281//! assert_eq!(tpl.parse(&b"abcde"[..]), Err(nom::Err::Incomplete(Needed::new(2))));
282//! let input = &b"abcdejk"[..];
283//! assert_eq!(tpl.parse(input), Err(nom::Err::Error((&input[5..], ErrorKind::Tag))));
284//! # }
285//! ```
286//!
287//! But you can also use a sequence of combinators written in imperative style,
288//! thanks to the `?` operator:
289//!
290//! ```rust
291//! # fn main() {
292//! use nom::{IResult, bytes::complete::tag};
293//!
294//! #[derive(Debug, PartialEq)]
295//! struct A {
296//!   a: u8,
297//!   b: u8
298//! }
299//!
300//! fn ret_int1(i:&[u8]) -> IResult<&[u8], u8> { Ok((i,1)) }
301//! fn ret_int2(i:&[u8]) -> IResult<&[u8], u8> { Ok((i,2)) }
302//!
303//! fn f(i: &[u8]) -> IResult<&[u8], A> {
304//!   // if successful, the parser returns `Ok((remaining_input, output_value))` that we can destructure
305//!   let (i, _) = tag("abcd")(i)?;
306//!   let (i, a) = ret_int1(i)?;
307//!   let (i, _) = tag("efgh")(i)?;
308//!   let (i, b) = ret_int2(i)?;
309//!
310//!   Ok((i, A { a, b }))
311//! }
312//!
313//! let r = f(b"abcdefghX");
314//! assert_eq!(r, Ok((&b"X"[..], A{a: 1, b: 2})));
315//! # }
316//! ```
317//!
318//! ## Streaming / Complete
319//!
320//! Some of nom's modules have `streaming` or `complete` submodules. They hold
321//! different variants of the same combinators.
322//!
323//! A streaming parser assumes that we might not have all of the input data.
324//! This can happen with some network protocol or large file parsers, where the
325//! input buffer can be full and need to be resized or refilled.
326//!
327//! A complete parser assumes that we already have all of the input data.
328//! This will be the common case with small files that can be read entirely to
329//! memory.
330//!
331//! Here is how it works in practice:
332//!
333//! ```rust
334//! use nom::{IResult, Err, Needed, error::{Error, ErrorKind}, bytes, character};
335//!
336//! fn take_streaming(i: &[u8]) -> IResult<&[u8], &[u8]> {
337//!   bytes::streaming::take(4u8)(i)
338//! }
339//!
340//! fn take_complete(i: &[u8]) -> IResult<&[u8], &[u8]> {
341//!   bytes::complete::take(4u8)(i)
342//! }
343//!
344//! // both parsers will take 4 bytes as expected
345//! assert_eq!(take_streaming(&b"abcde"[..]), Ok((&b"e"[..], &b"abcd"[..])));
346//! assert_eq!(take_complete(&b"abcde"[..]), Ok((&b"e"[..], &b"abcd"[..])));
347//!
348//! // if the input is smaller than 4 bytes, the streaming parser
349//! // will return `Incomplete` to indicate that we need more data
350//! assert_eq!(take_streaming(&b"abc"[..]), Err(Err::Incomplete(Needed::new(1))));
351//!
352//! // but the complete parser will return an error
353//! assert_eq!(take_complete(&b"abc"[..]), Err(Err::Error(Error::new(&b"abc"[..], ErrorKind::Eof))));
354//!
355//! // the alpha0 function recognizes 0 or more alphabetic characters
356//! fn alpha0_streaming(i: &str) -> IResult<&str, &str> {
357//!   character::streaming::alpha0(i)
358//! }
359//!
360//! fn alpha0_complete(i: &str) -> IResult<&str, &str> {
361//!   character::complete::alpha0(i)
362//! }
363//!
364//! // if there's a clear limit to the recognized characters, both parsers work the same way
365//! assert_eq!(alpha0_streaming("abcd;"), Ok((";", "abcd")));
366//! assert_eq!(alpha0_complete("abcd;"), Ok((";", "abcd")));
367//!
368//! // but when there's no limit, the streaming version returns `Incomplete`, because it cannot
369//! // know if more input data should be recognized. The whole input could be "abcd;", or
370//! // "abcde;"
371//! assert_eq!(alpha0_streaming("abcd"), Err(Err::Incomplete(Needed::new(1))));
372//!
373//! // while the complete version knows that all of the data is there
374//! assert_eq!(alpha0_complete("abcd"), Ok(("", "abcd")));
375//! ```
376//! **Going further:** Read the [guides](https://github.com/rust-bakery/nom/tree/main/doc),
377//! check out the [recipes]!
378#![cfg_attr(not(feature = "std"), no_std)]
379#![cfg_attr(feature = "docsrs", feature(doc_cfg))]
380#![allow(clippy::doc_markdown)]
381#![deny(missing_docs)]
382#[cfg(feature = "alloc")]
383#[macro_use]
384extern crate alloc;
385#[cfg(doctest)]
386extern crate doc_comment;
387
388#[cfg(doctest)]
389doc_comment::doctest!("../README.md");
390
391/// Lib module to re-export everything needed from `std` or `core`/`alloc`. This is how `serde` does
392/// it, albeit there it is not public.
393pub mod lib {
394  /// `std` facade allowing `std`/`core` to be interchangeable. Reexports `alloc` crate optionally,
395  /// as well as `core` or `std`
396  #[cfg(not(feature = "std"))]
397  /// internal std exports for no_std compatibility
398  pub mod std {
399    #[doc(hidden)]
400    #[cfg(not(feature = "alloc"))]
401    pub use core::borrow;
402
403    #[cfg(feature = "alloc")]
404    #[doc(hidden)]
405    pub use alloc::{borrow, boxed, string, vec};
406
407    #[doc(hidden)]
408    pub use core::{cmp, convert, fmt, iter, mem, num, ops, option, result, slice, str};
409
410    /// internal reproduction of std prelude
411    #[doc(hidden)]
412    pub mod prelude {
413      pub use core::prelude as v1;
414    }
415  }
416
417  #[cfg(feature = "std")]
418  /// internal std exports for no_std compatibility
419  pub mod std {
420    #[doc(hidden)]
421    pub use std::{
422      alloc, borrow, boxed, cmp, collections, convert, fmt, hash, iter, mem, num, ops, option,
423      result, slice, str, string, vec,
424    };
425
426    /// internal reproduction of std prelude
427    #[doc(hidden)]
428    pub mod prelude {
429      pub use std::prelude as v1;
430    }
431  }
432}
433
434pub use self::internal::*;
435pub use self::traits::*;
436
437#[macro_use]
438mod macros;
439#[macro_use]
440pub mod error;
441
442pub mod branch;
443pub mod combinator;
444mod internal;
445pub mod multi;
446pub mod sequence;
447mod traits;
448
449pub mod bits;
450pub mod bytes;
451
452pub mod character;
453
454mod str;
455
456pub mod number;
457
458#[cfg(all(feature = "std", any(doc, doctest, feature = "docsrs")))]
459#[cfg_attr(any(doc, doctest, feature = "docsrs"), doc = include_str!("../doc/nom_recipes.md"))]
460pub mod recipes {}
nom/lib.rs

nom/
lib.rs