nom/bytes/
complete.rs

1//! Parsers recognizing bytes streams, complete input version
2
3use core::marker::PhantomData;
4
5use crate::error::ParseError;
6use crate::internal::{IResult, Parser};
7use crate::traits::{Compare, FindSubstring, FindToken, ToUsize};
8use crate::Complete;
9use crate::Emit;
10use crate::Input;
11use crate::OutputM;
12
13/// Recognizes a pattern
14///
15/// The input data will be compared to the tag combinator's argument and will return the part of
16/// the input that matches the argument
17///
18/// It will return `Err(Err::Error((_, ErrorKind::Tag)))` if the input doesn't match the pattern
19/// # Example
20/// ```rust
21/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult};
22/// use nom::bytes::complete::tag;
23///
24/// fn parser(s: &str) -> IResult<&str, &str> {
25///   tag("Hello")(s)
26/// }
27///
28/// assert_eq!(parser("Hello, World!"), Ok((", World!", "Hello")));
29/// assert_eq!(parser("Something"), Err(Err::Error(Error::new("Something", ErrorKind::Tag))));
30/// assert_eq!(parser(""), Err(Err::Error(Error::new("", ErrorKind::Tag))));
31/// ```
32pub fn tag<T, I, Error: ParseError<I>>(tag: T) -> impl Fn(I) -> IResult<I, I, Error>
33where
34  I: Input + Compare<T>,
35  T: Input + Clone,
36{
37  move |i: I| {
38    let mut parser = super::Tag {
39      tag: tag.clone(),
40      e: PhantomData,
41    };
42
43    parser.process::<OutputM<Emit, Emit, Complete>>(i)
44  }
45}
46
47/// Recognizes a case insensitive pattern.
48///
49/// The input data will be compared to the tag combinator's argument and will return the part of
50/// the input that matches the argument with no regard to case.
51///
52/// It will return `Err(Err::Error((_, ErrorKind::Tag)))` if the input doesn't match the pattern.
53/// # Example
54/// ```rust
55/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult};
56/// use nom::bytes::complete::tag_no_case;
57///
58/// fn parser(s: &str) -> IResult<&str, &str> {
59///   tag_no_case("hello")(s)
60/// }
61///
62/// assert_eq!(parser("Hello, World!"), Ok((", World!", "Hello")));
63/// assert_eq!(parser("hello, World!"), Ok((", World!", "hello")));
64/// assert_eq!(parser("HeLlO, World!"), Ok((", World!", "HeLlO")));
65/// assert_eq!(parser("Something"), Err(Err::Error(Error::new("Something", ErrorKind::Tag))));
66/// assert_eq!(parser(""), Err(Err::Error(Error::new("", ErrorKind::Tag))));
67/// ```
68pub fn tag_no_case<T, I, Error: ParseError<I>>(tag: T) -> impl Fn(I) -> IResult<I, I, Error>
69where
70  I: Input + Compare<T>,
71  T: Input + Clone,
72{
73  move |i: I| {
74    let mut parser = super::TagNoCase {
75      tag: tag.clone(),
76      e: PhantomData,
77    };
78
79    parser.process::<OutputM<Emit, Emit, Complete>>(i)
80  }
81}
82
83/// Parse till certain characters are met.
84///
85/// The parser will return the longest slice till one of the characters of the combinator's argument are met.
86///
87/// It doesn't consume the matched character.
88///
89/// It will return a `Err::Error(("", ErrorKind::IsNot))` if the pattern wasn't met.
90/// # Example
91/// ```rust
92/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult};
93/// use nom::bytes::complete::is_not;
94///
95/// fn not_space(s: &str) -> IResult<&str, &str> {
96///   is_not(" \t\r\n")(s)
97/// }
98///
99/// assert_eq!(not_space("Hello, World!"), Ok((" World!", "Hello,")));
100/// assert_eq!(not_space("Sometimes\t"), Ok(("\t", "Sometimes")));
101/// assert_eq!(not_space("Nospace"), Ok(("", "Nospace")));
102/// assert_eq!(not_space(""), Err(Err::Error(Error::new("", ErrorKind::IsNot))));
103/// ```
104pub fn is_not<T, I, Error: ParseError<I>>(arr: T) -> impl FnMut(I) -> IResult<I, I, Error>
105where
106  I: Input,
107  T: FindToken<<I as Input>::Item>,
108{
109  let mut parser = super::is_not(arr);
110
111  move |i: I| parser.process::<OutputM<Emit, Emit, Complete>>(i)
112}
113
114/// Returns the longest slice of the matches the pattern.
115///
116/// The parser will return the longest slice consisting of the characters in provided in the
117/// combinator's argument.
118///
119/// It will return a `Err(Err::Error((_, ErrorKind::IsA)))` if the pattern wasn't met.
120/// # Example
121/// ```rust
122/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult};
123/// use nom::bytes::complete::is_a;
124///
125/// fn hex(s: &str) -> IResult<&str, &str> {
126///   is_a("1234567890ABCDEF")(s)
127/// }
128///
129/// assert_eq!(hex("123 and voila"), Ok((" and voila", "123")));
130/// assert_eq!(hex("DEADBEEF and others"), Ok((" and others", "DEADBEEF")));
131/// assert_eq!(hex("BADBABEsomething"), Ok(("something", "BADBABE")));
132/// assert_eq!(hex("D15EA5E"), Ok(("", "D15EA5E")));
133/// assert_eq!(hex(""), Err(Err::Error(Error::new("", ErrorKind::IsA))));
134/// ```
135pub fn is_a<T, I, Error: ParseError<I>>(arr: T) -> impl FnMut(I) -> IResult<I, I, Error>
136where
137  I: Input,
138  T: FindToken<<I as Input>::Item>,
139{
140  let mut parser = super::is_a(arr);
141
142  move |i: I| parser.process::<OutputM<Emit, Emit, Complete>>(i)
143}
144
145/// Returns the longest input slice (if any) that matches the predicate.
146///
147/// The parser will return the longest slice that matches the given predicate *(a function that
148/// takes the input and returns a bool)*.
149/// # Example
150/// ```rust
151/// # use nom::{Err, error::ErrorKind, Needed, IResult};
152/// use nom::bytes::complete::take_while;
153/// use nom::AsChar;
154///
155/// fn alpha(s: &[u8]) -> IResult<&[u8], &[u8]> {
156///   take_while(AsChar::is_alpha)(s)
157/// }
158///
159/// assert_eq!(alpha(b"latin123"), Ok((&b"123"[..], &b"latin"[..])));
160/// assert_eq!(alpha(b"12345"), Ok((&b"12345"[..], &b""[..])));
161/// assert_eq!(alpha(b"latin"), Ok((&b""[..], &b"latin"[..])));
162/// assert_eq!(alpha(b""), Ok((&b""[..], &b""[..])));
163/// ```
164pub fn take_while<F, I, Error: ParseError<I>>(cond: F) -> impl FnMut(I) -> IResult<I, I, Error>
165where
166  I: Input,
167  F: Fn(<I as Input>::Item) -> bool,
168{
169  let mut parser = super::take_while(cond);
170
171  move |i: I| parser.process::<OutputM<Emit, Emit, Complete>>(i)
172}
173
174/// Returns the longest (at least 1) input slice that matches the predicate.
175///
176/// The parser will return the longest slice that matches the given predicate *(a function that
177/// takes the input and returns a bool)*.
178///
179/// It will return an `Err(Err::Error((_, ErrorKind::TakeWhile1)))` if the pattern wasn't met.
180/// # Example
181/// ```rust
182/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult};
183/// use nom::bytes::complete::take_while1;
184/// use nom::AsChar;
185///
186/// fn alpha(s: &[u8]) -> IResult<&[u8], &[u8]> {
187///   take_while1(AsChar::is_alpha)(s)
188/// }
189///
190/// assert_eq!(alpha(b"latin123"), Ok((&b"123"[..], &b"latin"[..])));
191/// assert_eq!(alpha(b"latin"), Ok((&b""[..], &b"latin"[..])));
192/// assert_eq!(alpha(b"12345"), Err(Err::Error(Error::new(&b"12345"[..], ErrorKind::TakeWhile1))));
193/// ```
194pub fn take_while1<F, I, Error: ParseError<I>>(cond: F) -> impl FnMut(I) -> IResult<I, I, Error>
195where
196  I: Input,
197  F: Fn(<I as Input>::Item) -> bool,
198{
199  let mut parser = super::take_while1(cond);
200
201  move |i: I| parser.process::<OutputM<Emit, Emit, Complete>>(i)
202}
203
204/// Returns the longest (m <= len <= n) input slice that matches the predicate.
205///
206/// The parser will return the longest slice that matches the given predicate *(a function that
207/// takes the input and returns a bool)*.
208///
209/// It will return an `Err::Error((_, ErrorKind::TakeWhileMN))` if the pattern wasn't met or is out
210/// of range (m <= len <= n).
211/// # Example
212/// ```rust
213/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult};
214/// use nom::bytes::complete::take_while_m_n;
215/// use nom::AsChar;
216///
217/// fn short_alpha(s: &[u8]) -> IResult<&[u8], &[u8]> {
218///   take_while_m_n(3, 6, AsChar::is_alpha)(s)
219/// }
220///
221/// assert_eq!(short_alpha(b"latin123"), Ok((&b"123"[..], &b"latin"[..])));
222/// assert_eq!(short_alpha(b"lengthy"), Ok((&b"y"[..], &b"length"[..])));
223/// assert_eq!(short_alpha(b"latin"), Ok((&b""[..], &b"latin"[..])));
224/// assert_eq!(short_alpha(b"ed"), Err(Err::Error(Error::new(&b"ed"[..], ErrorKind::TakeWhileMN))));
225/// assert_eq!(short_alpha(b"12345"), Err(Err::Error(Error::new(&b"12345"[..], ErrorKind::TakeWhileMN))));
226/// ```
227pub fn take_while_m_n<F, I, Error: ParseError<I>>(
228  m: usize,
229  n: usize,
230  cond: F,
231) -> impl FnMut(I) -> IResult<I, I, Error>
232where
233  I: Input,
234  F: Fn(<I as Input>::Item) -> bool,
235{
236  let mut parser = super::take_while_m_n(m, n, cond);
237
238  move |i: I| parser.process::<OutputM<Emit, Emit, Complete>>(i)
239}
240
241/// Returns the longest input slice (if any) till a predicate is met.
242///
243/// The parser will return the longest slice till the given predicate *(a function that
244/// takes the input and returns a bool)*.
245/// # Example
246/// ```rust
247/// # use nom::{Err, error::ErrorKind, Needed, IResult};
248/// use nom::bytes::complete::take_till;
249///
250/// fn till_colon(s: &str) -> IResult<&str, &str> {
251///   take_till(|c| c == ':')(s)
252/// }
253///
254/// assert_eq!(till_colon("latin:123"), Ok((":123", "latin")));
255/// assert_eq!(till_colon(":empty matched"), Ok((":empty matched", ""))); //allowed
256/// assert_eq!(till_colon("12345"), Ok(("", "12345")));
257/// assert_eq!(till_colon(""), Ok(("", "")));
258/// ```
259#[allow(clippy::redundant_closure)]
260pub fn take_till<F, I, Error: ParseError<I>>(cond: F) -> impl FnMut(I) -> IResult<I, I, Error>
261where
262  I: Input,
263  F: Fn(<I as Input>::Item) -> bool,
264{
265  let mut parser = super::take_till(cond);
266
267  move |i: I| parser.process::<OutputM<Emit, Emit, Complete>>(i)
268}
269
270/// Returns the longest (at least 1) input slice till a predicate is met.
271///
272/// The parser will return the longest slice till the given predicate *(a function that
273/// takes the input and returns a bool)*.
274///
275/// It will return `Err(Err::Error((_, ErrorKind::TakeTill1)))` if the input is empty or the
276/// predicate matches the first input.
277/// # Example
278/// ```rust
279/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult};
280/// use nom::bytes::complete::take_till1;
281///
282/// fn till_colon(s: &str) -> IResult<&str, &str> {
283///   take_till1(|c| c == ':')(s)
284/// }
285///
286/// assert_eq!(till_colon("latin:123"), Ok((":123", "latin")));
287/// assert_eq!(till_colon(":empty matched"), Err(Err::Error(Error::new(":empty matched", ErrorKind::TakeTill1))));
288/// assert_eq!(till_colon("12345"), Ok(("", "12345")));
289/// assert_eq!(till_colon(""), Err(Err::Error(Error::new("", ErrorKind::TakeTill1))));
290/// ```
291#[allow(clippy::redundant_closure)]
292pub fn take_till1<F, I, Error: ParseError<I>>(cond: F) -> impl FnMut(I) -> IResult<I, I, Error>
293where
294  I: Input,
295  F: Fn(<I as Input>::Item) -> bool,
296{
297  let mut parser = super::take_till1(cond);
298
299  move |i: I| parser.process::<OutputM<Emit, Emit, Complete>>(i)
300}
301
302/// Returns an input slice containing the first N input elements (Input[..N]).
303///
304/// It will return `Err(Err::Error((_, ErrorKind::Eof)))` if the input is shorter than the argument.
305/// # Example
306/// ```rust
307/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult};
308/// use nom::bytes::complete::take;
309///
310/// fn take6(s: &str) -> IResult<&str, &str> {
311///   take(6usize)(s)
312/// }
313///
314/// assert_eq!(take6("1234567"), Ok(("7", "123456")));
315/// assert_eq!(take6("things"), Ok(("", "things")));
316/// assert_eq!(take6("short"), Err(Err::Error(Error::new("short", ErrorKind::Eof))));
317/// assert_eq!(take6(""), Err(Err::Error(Error::new("", ErrorKind::Eof))));
318/// ```
319///
320/// The units that are taken will depend on the input type. For example, for a
321/// `&str` it will take a number of `char`'s, whereas for a `&[u8]` it will
322/// take that many `u8`'s:
323///
324/// ```rust
325/// use nom::error::Error;
326/// use nom::bytes::complete::take;
327///
328/// assert_eq!(take::<_, _, Error<_>>(1usize)("💙"), Ok(("", "💙")));
329/// assert_eq!(take::<_, _, Error<_>>(1usize)("💙".as_bytes()), Ok((b"\x9F\x92\x99".as_ref(), b"\xF0".as_ref())));
330/// ```
331pub fn take<C, I, Error: ParseError<I>>(count: C) -> impl FnMut(I) -> IResult<I, I, Error>
332where
333  I: Input,
334  C: ToUsize,
335{
336  let mut parser = super::take(count);
337
338  move |i: I| parser.process::<OutputM<Emit, Emit, Complete>>(i)
339}
340
341/// Returns the input slice up to the first occurrence of the pattern.
342///
343/// It doesn't consume the pattern. It will return `Err(Err::Error((_, ErrorKind::TakeUntil)))`
344/// if the pattern wasn't met.
345/// # Example
346/// ```rust
347/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult};
348/// use nom::bytes::complete::take_until;
349///
350/// fn until_eof(s: &str) -> IResult<&str, &str> {
351///   take_until("eof")(s)
352/// }
353///
354/// assert_eq!(until_eof("hello, worldeof"), Ok(("eof", "hello, world")));
355/// assert_eq!(until_eof("hello, world"), Err(Err::Error(Error::new("hello, world", ErrorKind::TakeUntil))));
356/// assert_eq!(until_eof(""), Err(Err::Error(Error::new("", ErrorKind::TakeUntil))));
357/// assert_eq!(until_eof("1eof2eof"), Ok(("eof2eof", "1")));
358/// ```
359pub fn take_until<T, I, Error: ParseError<I>>(tag: T) -> impl FnMut(I) -> IResult<I, I, Error>
360where
361  I: Input + FindSubstring<T>,
362  T: Input + Clone,
363{
364  let mut parser = super::take_until(tag);
365
366  move |i: I| parser.process::<OutputM<Emit, Emit, Complete>>(i)
367}
368
369/// Returns the non empty input slice up to the first occurrence of the pattern.
370///
371/// It doesn't consume the pattern. It will return `Err(Err::Error((_, ErrorKind::TakeUntil)))`
372/// if the pattern wasn't met.
373/// # Example
374/// ```rust
375/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult};
376/// use nom::bytes::complete::take_until1;
377///
378/// fn until_eof(s: &str) -> IResult<&str, &str> {
379///   take_until1("eof")(s)
380/// }
381///
382/// assert_eq!(until_eof("hello, worldeof"), Ok(("eof", "hello, world")));
383/// assert_eq!(until_eof("hello, world"), Err(Err::Error(Error::new("hello, world", ErrorKind::TakeUntil))));
384/// assert_eq!(until_eof(""), Err(Err::Error(Error::new("", ErrorKind::TakeUntil))));
385/// assert_eq!(until_eof("1eof2eof"), Ok(("eof2eof", "1")));
386/// assert_eq!(until_eof("eof"), Err(Err::Error(Error::new("eof", ErrorKind::TakeUntil))));
387/// ```
388pub fn take_until1<T, I, Error: ParseError<I>>(tag: T) -> impl FnMut(I) -> IResult<I, I, Error>
389where
390  I: Input + FindSubstring<T>,
391  T: Input + Clone,
392{
393  let mut parser = super::take_until1(tag);
394
395  move |i: I| parser.process::<OutputM<Emit, Emit, Complete>>(i)
396}
397
398/// Matches a byte string with escaped characters.
399///
400/// * The first argument matches the normal characters (it must not accept the control character)
401/// * The second argument is the control character (like `\` in most languages)
402/// * The third argument matches the escaped characters
403/// # Example
404/// ```
405/// # use nom::{Err, error::ErrorKind, Needed, IResult};
406/// # use nom::character::complete::digit1;
407/// use nom::bytes::complete::escaped;
408/// use nom::character::complete::one_of;
409///
410/// fn esc(s: &str) -> IResult<&str, &str> {
411///   escaped(digit1, '\\', one_of(r#""n\"#))(s)
412/// }
413///
414/// assert_eq!(esc("123;"), Ok((";", "123")));
415/// assert_eq!(esc(r#"12\"34;"#), Ok((";", r#"12\"34"#)));
416/// ```
417///
418pub fn escaped<'a, I, Error, F, G>(
419  normal: F,
420  control_char: char,
421  escapable: G,
422) -> impl FnMut(I) -> IResult<I, I, Error>
423where
424  I: Clone + crate::traits::Offset + Input + 'a,
425  <I as Input>::Item: crate::traits::AsChar,
426  F: Parser<I, Error = Error>,
427  G: Parser<I, Error = Error>,
428  Error: ParseError<I>,
429{
430  let mut parser = super::escaped(normal, control_char, escapable);
431
432  move |i: I| parser.process::<OutputM<Emit, Emit, Complete>>(i)
433}
434
435/// Matches a byte string with escaped characters.
436///
437/// * The first argument matches the normal characters (it must not match the control character)
438/// * The second argument is the control character (like `\` in most languages)
439/// * The third argument matches the escaped characters and transforms them
440///
441/// As an example, the chain `abc\tdef` could be `abc    def` (it also consumes the control character)
442///
443/// ```
444/// # use nom::{Err, error::ErrorKind, Needed, IResult};
445/// # use std::str::from_utf8;
446/// use nom::bytes::complete::{escaped_transform, tag};
447/// use nom::character::complete::alpha1;
448/// use nom::branch::alt;
449/// use nom::combinator::value;
450///
451/// fn parser(input: &str) -> IResult<&str, String> {
452///   escaped_transform(
453///     alpha1,
454///     '\\',
455///     alt((
456///       value("\\", tag("\\")),
457///       value("\"", tag("\"")),
458///       value("\n", tag("n")),
459///     ))
460///   )(input)
461/// }
462///
463/// assert_eq!(parser("ab\\\"cd"), Ok(("", String::from("ab\"cd"))));
464/// assert_eq!(parser("ab\\ncd"), Ok(("", String::from("ab\ncd"))));
465/// ```
466#[cfg(feature = "alloc")]
467#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))]
468pub fn escaped_transform<I, Error, F, G, O1, O2, ExtendItem, Output>(
469  normal: F,
470  control_char: char,
471  transform: G,
472) -> impl FnMut(I) -> IResult<I, Output, Error>
473where
474  I: Clone + crate::traits::Offset + Input,
475  I: crate::traits::ExtendInto<Item = ExtendItem, Extender = Output>,
476  O1: crate::traits::ExtendInto<Item = ExtendItem, Extender = Output>,
477  O2: crate::traits::ExtendInto<Item = ExtendItem, Extender = Output>,
478  <I as Input>::Item: crate::traits::AsChar,
479  F: Parser<I, Output = O1, Error = Error>,
480  G: Parser<I, Output = O2, Error = Error>,
481  Error: ParseError<I>,
482{
483  let mut parser = super::escaped_transform(normal, control_char, transform);
484
485  move |i: I| parser.process::<OutputM<Emit, Emit, Complete>>(i)
486}
487
488#[cfg(test)]
489mod tests {
490  use super::*;
491  use crate::error::ErrorKind;
492  use crate::AsChar;
493  use crate::Err;
494
495  #[test]
496  fn complete_take_while_m_n_utf8_all_matching() {
497    let result: IResult<&str, &str> =
498      super::take_while_m_n(1, 4, |c: char| c.is_alphabetic())("øn");
499    assert_eq!(result, Ok(("", "øn")));
500  }
501
502  #[test]
503  fn complete_take_while_m_n_utf8_all_matching_substring() {
504    let result: IResult<&str, &str> =
505      super::take_while_m_n(1, 1, |c: char| c.is_alphabetic())("øn");
506    assert_eq!(result, Ok(("n", "ø")));
507  }
508
509  // issue #1336 "escaped hangs if normal parser accepts empty"
510  fn escaped_string(input: &str) -> IResult<&str, &str> {
511    use crate::character::complete::{alpha0, one_of};
512    escaped(alpha0, '\\', one_of("n"))(input)
513  }
514
515  // issue #1336 "escaped hangs if normal parser accepts empty"
516  #[test]
517  fn escaped_hang() {
518    escaped_string("7").unwrap();
519    escaped_string("a7").unwrap();
520  }
521
522  // issue ##1118 escaped does not work with empty string
523  fn unquote(input: &str) -> IResult<&str, &str> {
524    use crate::bytes::complete::*;
525    use crate::character::complete::*;
526    use crate::combinator::opt;
527    use crate::sequence::delimited;
528
529    delimited(
530      char('"'),
531      escaped(opt(none_of(r#"\""#)), '\\', one_of(r#"\"rnt"#)),
532      char('"'),
533    )
534    .parse(input)
535  }
536
537  #[test]
538  fn escaped_hang_1118() {
539    assert_eq!(unquote(r#""""#), Ok(("", "")));
540  }
541
542  // issue #1630 take_while_m_n is invalid for multi-byte UTF-8 characters
543  #[test]
544  fn complete_take_while_m_n_multibyte() {
545    use crate::error::Error;
546
547    fn multi_byte_chars(s: &str, m: usize, n: usize) -> IResult<&str, &str> {
548      take_while_m_n(m, n, |c: char| c.len() > 1)(s)
549    }
550
551    assert_eq!(multi_byte_chars("€ latin", 0, 64), Ok((" latin", "€")));
552    assert_eq!(multi_byte_chars("𝄠 latin", 0, 1), Ok((" latin", "𝄠")));
553    assert_eq!(multi_byte_chars("باب latin", 0, 64), Ok((" latin", "باب")));
554    assert_eq!(
555      multi_byte_chars("💣💢ᾠ latin", 3, 3),
556      Ok((" latin", "💣💢ᾠ"))
557    );
558    assert_eq!(multi_byte_chars("latin", 0, 64), Ok(("latin", "")));
559    assert_eq!(multi_byte_chars("باب", 1, 3), Ok(("", "باب")));
560    assert_eq!(multi_byte_chars("باب", 1, 2), Ok(("ب", "با")));
561    assert_eq!(
562      multi_byte_chars("latin", 1, 64),
563      Err(Err::Error(Error::new("latin", ErrorKind::TakeWhileMN)))
564    );
565  }
566}