nom/bytes/complete.rs
1//! Parsers recognizing bytes streams, complete input version
2
3use core::marker::PhantomData;
4
5use crate::error::ParseError;
6use crate::internal::{IResult, Parser};
7use crate::traits::{Compare, FindSubstring, FindToken, ToUsize};
8use crate::Complete;
9use crate::Emit;
10use crate::Input;
11use crate::OutputM;
12
13/// Recognizes a pattern
14///
15/// The input data will be compared to the tag combinator's argument and will return the part of
16/// the input that matches the argument
17///
18/// It will return `Err(Err::Error((_, ErrorKind::Tag)))` if the input doesn't match the pattern
19/// # Example
20/// ```rust
21/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult};
22/// use nom::bytes::complete::tag;
23///
24/// fn parser(s: &str) -> IResult<&str, &str> {
25/// tag("Hello")(s)
26/// }
27///
28/// assert_eq!(parser("Hello, World!"), Ok((", World!", "Hello")));
29/// assert_eq!(parser("Something"), Err(Err::Error(Error::new("Something", ErrorKind::Tag))));
30/// assert_eq!(parser(""), Err(Err::Error(Error::new("", ErrorKind::Tag))));
31/// ```
32pub fn tag<T, I, Error: ParseError<I>>(tag: T) -> impl Fn(I) -> IResult<I, I, Error>
33where
34 I: Input + Compare<T>,
35 T: Input + Clone,
36{
37 move |i: I| {
38 let mut parser = super::Tag {
39 tag: tag.clone(),
40 e: PhantomData,
41 };
42
43 parser.process::<OutputM<Emit, Emit, Complete>>(i)
44 }
45}
46
47/// Recognizes a case insensitive pattern.
48///
49/// The input data will be compared to the tag combinator's argument and will return the part of
50/// the input that matches the argument with no regard to case.
51///
52/// It will return `Err(Err::Error((_, ErrorKind::Tag)))` if the input doesn't match the pattern.
53/// # Example
54/// ```rust
55/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult};
56/// use nom::bytes::complete::tag_no_case;
57///
58/// fn parser(s: &str) -> IResult<&str, &str> {
59/// tag_no_case("hello")(s)
60/// }
61///
62/// assert_eq!(parser("Hello, World!"), Ok((", World!", "Hello")));
63/// assert_eq!(parser("hello, World!"), Ok((", World!", "hello")));
64/// assert_eq!(parser("HeLlO, World!"), Ok((", World!", "HeLlO")));
65/// assert_eq!(parser("Something"), Err(Err::Error(Error::new("Something", ErrorKind::Tag))));
66/// assert_eq!(parser(""), Err(Err::Error(Error::new("", ErrorKind::Tag))));
67/// ```
68pub fn tag_no_case<T, I, Error: ParseError<I>>(tag: T) -> impl Fn(I) -> IResult<I, I, Error>
69where
70 I: Input + Compare<T>,
71 T: Input + Clone,
72{
73 move |i: I| {
74 let mut parser = super::TagNoCase {
75 tag: tag.clone(),
76 e: PhantomData,
77 };
78
79 parser.process::<OutputM<Emit, Emit, Complete>>(i)
80 }
81}
82
83/// Parse till certain characters are met.
84///
85/// The parser will return the longest slice till one of the characters of the combinator's argument are met.
86///
87/// It doesn't consume the matched character.
88///
89/// It will return a `Err::Error(("", ErrorKind::IsNot))` if the pattern wasn't met.
90/// # Example
91/// ```rust
92/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult};
93/// use nom::bytes::complete::is_not;
94///
95/// fn not_space(s: &str) -> IResult<&str, &str> {
96/// is_not(" \t\r\n")(s)
97/// }
98///
99/// assert_eq!(not_space("Hello, World!"), Ok((" World!", "Hello,")));
100/// assert_eq!(not_space("Sometimes\t"), Ok(("\t", "Sometimes")));
101/// assert_eq!(not_space("Nospace"), Ok(("", "Nospace")));
102/// assert_eq!(not_space(""), Err(Err::Error(Error::new("", ErrorKind::IsNot))));
103/// ```
104pub fn is_not<T, I, Error: ParseError<I>>(arr: T) -> impl FnMut(I) -> IResult<I, I, Error>
105where
106 I: Input,
107 T: FindToken<<I as Input>::Item>,
108{
109 let mut parser = super::is_not(arr);
110
111 move |i: I| parser.process::<OutputM<Emit, Emit, Complete>>(i)
112}
113
114/// Returns the longest slice of the matches the pattern.
115///
116/// The parser will return the longest slice consisting of the characters in provided in the
117/// combinator's argument.
118///
119/// It will return a `Err(Err::Error((_, ErrorKind::IsA)))` if the pattern wasn't met.
120/// # Example
121/// ```rust
122/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult};
123/// use nom::bytes::complete::is_a;
124///
125/// fn hex(s: &str) -> IResult<&str, &str> {
126/// is_a("1234567890ABCDEF")(s)
127/// }
128///
129/// assert_eq!(hex("123 and voila"), Ok((" and voila", "123")));
130/// assert_eq!(hex("DEADBEEF and others"), Ok((" and others", "DEADBEEF")));
131/// assert_eq!(hex("BADBABEsomething"), Ok(("something", "BADBABE")));
132/// assert_eq!(hex("D15EA5E"), Ok(("", "D15EA5E")));
133/// assert_eq!(hex(""), Err(Err::Error(Error::new("", ErrorKind::IsA))));
134/// ```
135pub fn is_a<T, I, Error: ParseError<I>>(arr: T) -> impl FnMut(I) -> IResult<I, I, Error>
136where
137 I: Input,
138 T: FindToken<<I as Input>::Item>,
139{
140 let mut parser = super::is_a(arr);
141
142 move |i: I| parser.process::<OutputM<Emit, Emit, Complete>>(i)
143}
144
145/// Returns the longest input slice (if any) that matches the predicate.
146///
147/// The parser will return the longest slice that matches the given predicate *(a function that
148/// takes the input and returns a bool)*.
149/// # Example
150/// ```rust
151/// # use nom::{Err, error::ErrorKind, Needed, IResult};
152/// use nom::bytes::complete::take_while;
153/// use nom::AsChar;
154///
155/// fn alpha(s: &[u8]) -> IResult<&[u8], &[u8]> {
156/// take_while(AsChar::is_alpha)(s)
157/// }
158///
159/// assert_eq!(alpha(b"latin123"), Ok((&b"123"[..], &b"latin"[..])));
160/// assert_eq!(alpha(b"12345"), Ok((&b"12345"[..], &b""[..])));
161/// assert_eq!(alpha(b"latin"), Ok((&b""[..], &b"latin"[..])));
162/// assert_eq!(alpha(b""), Ok((&b""[..], &b""[..])));
163/// ```
164pub fn take_while<F, I, Error: ParseError<I>>(cond: F) -> impl FnMut(I) -> IResult<I, I, Error>
165where
166 I: Input,
167 F: Fn(<I as Input>::Item) -> bool,
168{
169 let mut parser = super::take_while(cond);
170
171 move |i: I| parser.process::<OutputM<Emit, Emit, Complete>>(i)
172}
173
174/// Returns the longest (at least 1) input slice that matches the predicate.
175///
176/// The parser will return the longest slice that matches the given predicate *(a function that
177/// takes the input and returns a bool)*.
178///
179/// It will return an `Err(Err::Error((_, ErrorKind::TakeWhile1)))` if the pattern wasn't met.
180/// # Example
181/// ```rust
182/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult};
183/// use nom::bytes::complete::take_while1;
184/// use nom::AsChar;
185///
186/// fn alpha(s: &[u8]) -> IResult<&[u8], &[u8]> {
187/// take_while1(AsChar::is_alpha)(s)
188/// }
189///
190/// assert_eq!(alpha(b"latin123"), Ok((&b"123"[..], &b"latin"[..])));
191/// assert_eq!(alpha(b"latin"), Ok((&b""[..], &b"latin"[..])));
192/// assert_eq!(alpha(b"12345"), Err(Err::Error(Error::new(&b"12345"[..], ErrorKind::TakeWhile1))));
193/// ```
194pub fn take_while1<F, I, Error: ParseError<I>>(cond: F) -> impl FnMut(I) -> IResult<I, I, Error>
195where
196 I: Input,
197 F: Fn(<I as Input>::Item) -> bool,
198{
199 let mut parser = super::take_while1(cond);
200
201 move |i: I| parser.process::<OutputM<Emit, Emit, Complete>>(i)
202}
203
204/// Returns the longest (m <= len <= n) input slice that matches the predicate.
205///
206/// The parser will return the longest slice that matches the given predicate *(a function that
207/// takes the input and returns a bool)*.
208///
209/// It will return an `Err::Error((_, ErrorKind::TakeWhileMN))` if the pattern wasn't met or is out
210/// of range (m <= len <= n).
211/// # Example
212/// ```rust
213/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult};
214/// use nom::bytes::complete::take_while_m_n;
215/// use nom::AsChar;
216///
217/// fn short_alpha(s: &[u8]) -> IResult<&[u8], &[u8]> {
218/// take_while_m_n(3, 6, AsChar::is_alpha)(s)
219/// }
220///
221/// assert_eq!(short_alpha(b"latin123"), Ok((&b"123"[..], &b"latin"[..])));
222/// assert_eq!(short_alpha(b"lengthy"), Ok((&b"y"[..], &b"length"[..])));
223/// assert_eq!(short_alpha(b"latin"), Ok((&b""[..], &b"latin"[..])));
224/// assert_eq!(short_alpha(b"ed"), Err(Err::Error(Error::new(&b"ed"[..], ErrorKind::TakeWhileMN))));
225/// assert_eq!(short_alpha(b"12345"), Err(Err::Error(Error::new(&b"12345"[..], ErrorKind::TakeWhileMN))));
226/// ```
227pub fn take_while_m_n<F, I, Error: ParseError<I>>(
228 m: usize,
229 n: usize,
230 cond: F,
231) -> impl FnMut(I) -> IResult<I, I, Error>
232where
233 I: Input,
234 F: Fn(<I as Input>::Item) -> bool,
235{
236 let mut parser = super::take_while_m_n(m, n, cond);
237
238 move |i: I| parser.process::<OutputM<Emit, Emit, Complete>>(i)
239}
240
241/// Returns the longest input slice (if any) till a predicate is met.
242///
243/// The parser will return the longest slice till the given predicate *(a function that
244/// takes the input and returns a bool)*.
245/// # Example
246/// ```rust
247/// # use nom::{Err, error::ErrorKind, Needed, IResult};
248/// use nom::bytes::complete::take_till;
249///
250/// fn till_colon(s: &str) -> IResult<&str, &str> {
251/// take_till(|c| c == ':')(s)
252/// }
253///
254/// assert_eq!(till_colon("latin:123"), Ok((":123", "latin")));
255/// assert_eq!(till_colon(":empty matched"), Ok((":empty matched", ""))); //allowed
256/// assert_eq!(till_colon("12345"), Ok(("", "12345")));
257/// assert_eq!(till_colon(""), Ok(("", "")));
258/// ```
259#[allow(clippy::redundant_closure)]
260pub fn take_till<F, I, Error: ParseError<I>>(cond: F) -> impl FnMut(I) -> IResult<I, I, Error>
261where
262 I: Input,
263 F: Fn(<I as Input>::Item) -> bool,
264{
265 let mut parser = super::take_till(cond);
266
267 move |i: I| parser.process::<OutputM<Emit, Emit, Complete>>(i)
268}
269
270/// Returns the longest (at least 1) input slice till a predicate is met.
271///
272/// The parser will return the longest slice till the given predicate *(a function that
273/// takes the input and returns a bool)*.
274///
275/// It will return `Err(Err::Error((_, ErrorKind::TakeTill1)))` if the input is empty or the
276/// predicate matches the first input.
277/// # Example
278/// ```rust
279/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult};
280/// use nom::bytes::complete::take_till1;
281///
282/// fn till_colon(s: &str) -> IResult<&str, &str> {
283/// take_till1(|c| c == ':')(s)
284/// }
285///
286/// assert_eq!(till_colon("latin:123"), Ok((":123", "latin")));
287/// assert_eq!(till_colon(":empty matched"), Err(Err::Error(Error::new(":empty matched", ErrorKind::TakeTill1))));
288/// assert_eq!(till_colon("12345"), Ok(("", "12345")));
289/// assert_eq!(till_colon(""), Err(Err::Error(Error::new("", ErrorKind::TakeTill1))));
290/// ```
291#[allow(clippy::redundant_closure)]
292pub fn take_till1<F, I, Error: ParseError<I>>(cond: F) -> impl FnMut(I) -> IResult<I, I, Error>
293where
294 I: Input,
295 F: Fn(<I as Input>::Item) -> bool,
296{
297 let mut parser = super::take_till1(cond);
298
299 move |i: I| parser.process::<OutputM<Emit, Emit, Complete>>(i)
300}
301
302/// Returns an input slice containing the first N input elements (Input[..N]).
303///
304/// It will return `Err(Err::Error((_, ErrorKind::Eof)))` if the input is shorter than the argument.
305/// # Example
306/// ```rust
307/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult};
308/// use nom::bytes::complete::take;
309///
310/// fn take6(s: &str) -> IResult<&str, &str> {
311/// take(6usize)(s)
312/// }
313///
314/// assert_eq!(take6("1234567"), Ok(("7", "123456")));
315/// assert_eq!(take6("things"), Ok(("", "things")));
316/// assert_eq!(take6("short"), Err(Err::Error(Error::new("short", ErrorKind::Eof))));
317/// assert_eq!(take6(""), Err(Err::Error(Error::new("", ErrorKind::Eof))));
318/// ```
319///
320/// The units that are taken will depend on the input type. For example, for a
321/// `&str` it will take a number of `char`'s, whereas for a `&[u8]` it will
322/// take that many `u8`'s:
323///
324/// ```rust
325/// use nom::error::Error;
326/// use nom::bytes::complete::take;
327///
328/// assert_eq!(take::<_, _, Error<_>>(1usize)("💙"), Ok(("", "💙")));
329/// assert_eq!(take::<_, _, Error<_>>(1usize)("💙".as_bytes()), Ok((b"\x9F\x92\x99".as_ref(), b"\xF0".as_ref())));
330/// ```
331pub fn take<C, I, Error: ParseError<I>>(count: C) -> impl FnMut(I) -> IResult<I, I, Error>
332where
333 I: Input,
334 C: ToUsize,
335{
336 let mut parser = super::take(count);
337
338 move |i: I| parser.process::<OutputM<Emit, Emit, Complete>>(i)
339}
340
341/// Returns the input slice up to the first occurrence of the pattern.
342///
343/// It doesn't consume the pattern. It will return `Err(Err::Error((_, ErrorKind::TakeUntil)))`
344/// if the pattern wasn't met.
345/// # Example
346/// ```rust
347/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult};
348/// use nom::bytes::complete::take_until;
349///
350/// fn until_eof(s: &str) -> IResult<&str, &str> {
351/// take_until("eof")(s)
352/// }
353///
354/// assert_eq!(until_eof("hello, worldeof"), Ok(("eof", "hello, world")));
355/// assert_eq!(until_eof("hello, world"), Err(Err::Error(Error::new("hello, world", ErrorKind::TakeUntil))));
356/// assert_eq!(until_eof(""), Err(Err::Error(Error::new("", ErrorKind::TakeUntil))));
357/// assert_eq!(until_eof("1eof2eof"), Ok(("eof2eof", "1")));
358/// ```
359pub fn take_until<T, I, Error: ParseError<I>>(tag: T) -> impl FnMut(I) -> IResult<I, I, Error>
360where
361 I: Input + FindSubstring<T>,
362 T: Input + Clone,
363{
364 let mut parser = super::take_until(tag);
365
366 move |i: I| parser.process::<OutputM<Emit, Emit, Complete>>(i)
367}
368
369/// Returns the non empty input slice up to the first occurrence of the pattern.
370///
371/// It doesn't consume the pattern. It will return `Err(Err::Error((_, ErrorKind::TakeUntil)))`
372/// if the pattern wasn't met.
373/// # Example
374/// ```rust
375/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult};
376/// use nom::bytes::complete::take_until1;
377///
378/// fn until_eof(s: &str) -> IResult<&str, &str> {
379/// take_until1("eof")(s)
380/// }
381///
382/// assert_eq!(until_eof("hello, worldeof"), Ok(("eof", "hello, world")));
383/// assert_eq!(until_eof("hello, world"), Err(Err::Error(Error::new("hello, world", ErrorKind::TakeUntil))));
384/// assert_eq!(until_eof(""), Err(Err::Error(Error::new("", ErrorKind::TakeUntil))));
385/// assert_eq!(until_eof("1eof2eof"), Ok(("eof2eof", "1")));
386/// assert_eq!(until_eof("eof"), Err(Err::Error(Error::new("eof", ErrorKind::TakeUntil))));
387/// ```
388pub fn take_until1<T, I, Error: ParseError<I>>(tag: T) -> impl FnMut(I) -> IResult<I, I, Error>
389where
390 I: Input + FindSubstring<T>,
391 T: Input + Clone,
392{
393 let mut parser = super::take_until1(tag);
394
395 move |i: I| parser.process::<OutputM<Emit, Emit, Complete>>(i)
396}
397
398/// Matches a byte string with escaped characters.
399///
400/// * The first argument matches the normal characters (it must not accept the control character)
401/// * The second argument is the control character (like `\` in most languages)
402/// * The third argument matches the escaped characters
403/// # Example
404/// ```
405/// # use nom::{Err, error::ErrorKind, Needed, IResult};
406/// # use nom::character::complete::digit1;
407/// use nom::bytes::complete::escaped;
408/// use nom::character::complete::one_of;
409///
410/// fn esc(s: &str) -> IResult<&str, &str> {
411/// escaped(digit1, '\\', one_of(r#""n\"#))(s)
412/// }
413///
414/// assert_eq!(esc("123;"), Ok((";", "123")));
415/// assert_eq!(esc(r#"12\"34;"#), Ok((";", r#"12\"34"#)));
416/// ```
417///
418pub fn escaped<'a, I, Error, F, G>(
419 normal: F,
420 control_char: char,
421 escapable: G,
422) -> impl FnMut(I) -> IResult<I, I, Error>
423where
424 I: Clone + crate::traits::Offset + Input + 'a,
425 <I as Input>::Item: crate::traits::AsChar,
426 F: Parser<I, Error = Error>,
427 G: Parser<I, Error = Error>,
428 Error: ParseError<I>,
429{
430 let mut parser = super::escaped(normal, control_char, escapable);
431
432 move |i: I| parser.process::<OutputM<Emit, Emit, Complete>>(i)
433}
434
435/// Matches a byte string with escaped characters.
436///
437/// * The first argument matches the normal characters (it must not match the control character)
438/// * The second argument is the control character (like `\` in most languages)
439/// * The third argument matches the escaped characters and transforms them
440///
441/// As an example, the chain `abc\tdef` could be `abc def` (it also consumes the control character)
442///
443/// ```
444/// # use nom::{Err, error::ErrorKind, Needed, IResult};
445/// # use std::str::from_utf8;
446/// use nom::bytes::complete::{escaped_transform, tag};
447/// use nom::character::complete::alpha1;
448/// use nom::branch::alt;
449/// use nom::combinator::value;
450///
451/// fn parser(input: &str) -> IResult<&str, String> {
452/// escaped_transform(
453/// alpha1,
454/// '\\',
455/// alt((
456/// value("\\", tag("\\")),
457/// value("\"", tag("\"")),
458/// value("\n", tag("n")),
459/// ))
460/// )(input)
461/// }
462///
463/// assert_eq!(parser("ab\\\"cd"), Ok(("", String::from("ab\"cd"))));
464/// assert_eq!(parser("ab\\ncd"), Ok(("", String::from("ab\ncd"))));
465/// ```
466#[cfg(feature = "alloc")]
467#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))]
468pub fn escaped_transform<I, Error, F, G, O1, O2, ExtendItem, Output>(
469 normal: F,
470 control_char: char,
471 transform: G,
472) -> impl FnMut(I) -> IResult<I, Output, Error>
473where
474 I: Clone + crate::traits::Offset + Input,
475 I: crate::traits::ExtendInto<Item = ExtendItem, Extender = Output>,
476 O1: crate::traits::ExtendInto<Item = ExtendItem, Extender = Output>,
477 O2: crate::traits::ExtendInto<Item = ExtendItem, Extender = Output>,
478 <I as Input>::Item: crate::traits::AsChar,
479 F: Parser<I, Output = O1, Error = Error>,
480 G: Parser<I, Output = O2, Error = Error>,
481 Error: ParseError<I>,
482{
483 let mut parser = super::escaped_transform(normal, control_char, transform);
484
485 move |i: I| parser.process::<OutputM<Emit, Emit, Complete>>(i)
486}
487
488#[cfg(test)]
489mod tests {
490 use super::*;
491 use crate::error::ErrorKind;
492 use crate::AsChar;
493 use crate::Err;
494
495 #[test]
496 fn complete_take_while_m_n_utf8_all_matching() {
497 let result: IResult<&str, &str> =
498 super::take_while_m_n(1, 4, |c: char| c.is_alphabetic())("øn");
499 assert_eq!(result, Ok(("", "øn")));
500 }
501
502 #[test]
503 fn complete_take_while_m_n_utf8_all_matching_substring() {
504 let result: IResult<&str, &str> =
505 super::take_while_m_n(1, 1, |c: char| c.is_alphabetic())("øn");
506 assert_eq!(result, Ok(("n", "ø")));
507 }
508
509 // issue #1336 "escaped hangs if normal parser accepts empty"
510 fn escaped_string(input: &str) -> IResult<&str, &str> {
511 use crate::character::complete::{alpha0, one_of};
512 escaped(alpha0, '\\', one_of("n"))(input)
513 }
514
515 // issue #1336 "escaped hangs if normal parser accepts empty"
516 #[test]
517 fn escaped_hang() {
518 escaped_string("7").unwrap();
519 escaped_string("a7").unwrap();
520 }
521
522 // issue ##1118 escaped does not work with empty string
523 fn unquote(input: &str) -> IResult<&str, &str> {
524 use crate::bytes::complete::*;
525 use crate::character::complete::*;
526 use crate::combinator::opt;
527 use crate::sequence::delimited;
528
529 delimited(
530 char('"'),
531 escaped(opt(none_of(r#"\""#)), '\\', one_of(r#"\"rnt"#)),
532 char('"'),
533 )
534 .parse(input)
535 }
536
537 #[test]
538 fn escaped_hang_1118() {
539 assert_eq!(unquote(r#""""#), Ok(("", "")));
540 }
541
542 // issue #1630 take_while_m_n is invalid for multi-byte UTF-8 characters
543 #[test]
544 fn complete_take_while_m_n_multibyte() {
545 use crate::error::Error;
546
547 fn multi_byte_chars(s: &str, m: usize, n: usize) -> IResult<&str, &str> {
548 take_while_m_n(m, n, |c: char| c.len() > 1)(s)
549 }
550
551 assert_eq!(multi_byte_chars("€ latin", 0, 64), Ok((" latin", "€")));
552 assert_eq!(multi_byte_chars("𝄠 latin", 0, 1), Ok((" latin", "𝄠")));
553 assert_eq!(multi_byte_chars("باب latin", 0, 64), Ok((" latin", "باب")));
554 assert_eq!(
555 multi_byte_chars("💣💢ᾠ latin", 3, 3),
556 Ok((" latin", "💣💢ᾠ"))
557 );
558 assert_eq!(multi_byte_chars("latin", 0, 64), Ok(("latin", "")));
559 assert_eq!(multi_byte_chars("باب", 1, 3), Ok(("", "باب")));
560 assert_eq!(multi_byte_chars("باب", 1, 2), Ok(("ب", "با")));
561 assert_eq!(
562 multi_byte_chars("latin", 1, 64),
563 Err(Err::Error(Error::new("latin", ErrorKind::TakeWhileMN)))
564 );
565 }
566}