json5format/
parser.rs

1// Copyright (c) 2020 Google LLC All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5#![deny(missing_docs)]
6use {
7    crate::{content::*, error::*},
8    lazy_static::lazy_static,
9    regex::{CaptureLocations, Match, Regex},
10    std::cell::RefCell,
11    std::rc::Rc,
12};
13
14/// All of the regular expressions in this module consume from the start of the remaining characters
15/// in the input buffer. To make it more clear that the Regex instances must start with "^", this
16/// function prepends the "^" to the start of the rest of the regex string, and all Regex
17/// declarations start with this function.
18fn from_start(regex: &str) -> String {
19    "^".to_owned() + regex
20}
21
22/// Wraps a regex pattern to match the string *only* if it matches the entire string.
23fn exact_match(regex: &str) -> String {
24    "^".to_owned() + regex + "$"
25}
26
27lazy_static! {
28
29    /// Any unicode whitespace except newline.
30    static ref WHITESPACE_PATTERN: &'static str = r#"([\s&&[^\n]]+)"#;
31    static ref WHITESPACE: usize = 1;
32
33    /// Match a newline (except newlines in multiline strings and block comments).
34    static ref NEWLINE_PATTERN: &'static str = r#"(\n)"#;
35    static ref NEWLINE: usize = 2;
36
37    /// Match two slashes before capturing the line comment. Additional slashes and leading spaces
38    /// are considered part of the content, so they will be accurately restored by the formatter.
39    static ref LINE_COMMENT_SLASHES_PATTERN: &'static str = r#"(//)"#;
40    static ref LINE_COMMENT_SLASHES: usize = 3;
41
42    /// Match the start of a block comment.
43    static ref OPEN_BLOCK_COMMENT_PATTERN: &'static str = r#"(/\*)"#;
44    static ref OPEN_BLOCK_COMMENT: usize = 4;
45
46    /// Any non-string primitive (Number, Boolean, 'null').
47    static ref NON_STRING_PRIMITIVE_PATTERN: &'static str =
48        r#"((?x) # ignore whitespace and allow '#' comments
49
50            # Capture null, true, or false (lowercase only, as in the ECMAScript keywords).
51            # End with a word boundary ('\b' marker) to ensure the pattern does not match if
52            # it is followed by a word ('\w') character; for example, 'nullify' is a valid
53            # identifier (depending on the context) and must not match the 'null' value.
54
55            (?:(?:null|true|false)\b)|
56
57            # Capture all number formats. Every variant is allowed an optional '-' or '+' prefix.
58
59            (?:[-+]?(?:
60
61                # All of the following variants end in a word character. Use '\b' to prevent
62                # matching numbers immediately followed by another word character, for example,
63                # 'NaNo', 'Infinity_', or '0xadef1234ghi'.
64
65                (?:(?:
66                    NaN|
67                    Infinity|
68
69                    # hexadecimal notation
70                    (?:0[xX][0-9a-fA-F]+)|
71
72                    # decimal exponent notation
73                    (?:(?:0|(?:[1-9][0-9]*))?\.[0-9]+[eE][+-]?[0-9]+)|
74
75                    # integer exponent notation with optional trailing decimal point
76                    (?:(?:0|(?:[1-9][0-9]*))\.?[eE][+-]?[0-9]+)|
77
78                    # decimal notation
79                    (?:(?:0|(?:[1-9][0-9]*))?\.[0-9]+)
80                )\b)|
81
82                # Capture integers, with an optional trailing decimal point.
83                # If the value ends in a digit (no trailing decimal point), apply `\b` to prevent
84                # matching integers immediatly followed by a word character (for example, 1200PDT).
85                # But if the integer has a trailing decimal, the '\b' does not apply. (Since '.' is
86                # not itself a '\w' word character, the '\b' would have the opposite affect,
87                # matching only if the next character is a word character, unless there is no next
88                # character.)
89
90                (?:
91                    (?:0|(?:[1-9][0-9]*))(?:\.|\b)
92                )
93            ))
94        )"#;
95    static ref NON_STRING_PRIMITIVE: usize = 5;
96
97    /// Property name without quotes.
98    static ref UNQUOTED_PROPERTY_NAME_PATTERN: &'static str = r#"[\$\w&&[^\d]][\$\w]*"#;
99    static ref UNQUOTED_PROPERTY_NAME_REGEX: Regex =
100        Regex::new(&exact_match(&*UNQUOTED_PROPERTY_NAME_PATTERN)).unwrap();
101
102    static ref UNQUOTED_PROPERTY_NAME_AND_COLON_PATTERN_STRING: String =
103        r#"(?:("#.to_owned() + *UNQUOTED_PROPERTY_NAME_PATTERN + r#")[\s&&[^\n]]*:)"#;
104    static ref UNQUOTED_PROPERTY_NAME_AND_COLON_PATTERN: &'static str =
105        &UNQUOTED_PROPERTY_NAME_AND_COLON_PATTERN_STRING;
106    static ref UNQUOTED_PROPERTY_NAME_AND_COLON: usize = 6;
107
108    /// Initial quote for a single or double quote string.
109    static ref OPEN_QUOTE_PATTERN: &'static str = r#"(["'])"#;
110    static ref OPEN_QUOTE: usize = 7;
111
112    /// An opening or closing curly brace or square brace.
113    static ref BRACE_PATTERN: &'static str = r#"([{}\[\]])"#;
114    static ref BRACE: usize = 8;
115
116    /// Match a comma, separating object properties and array items
117    static ref COMMA_PATTERN: &'static str = r#"(,)"#;
118    static ref COMMA: usize = 9;
119
120    /// Capture any of the above tokens. These regular expressions are designed for an exclusive
121    /// match, so only one of the tokens should match a valid JSON 5 document fragement, when
122    /// applied.
123    static ref NEXT_TOKEN: Regex = Regex::new(
124        &from_start(&(r#"(?:"#.to_owned()
125        + &vec![
126            *WHITESPACE_PATTERN,
127            *NEWLINE_PATTERN,
128            *LINE_COMMENT_SLASHES_PATTERN,
129            *OPEN_BLOCK_COMMENT_PATTERN,
130            *NON_STRING_PRIMITIVE_PATTERN,
131            *UNQUOTED_PROPERTY_NAME_AND_COLON_PATTERN,
132            *OPEN_QUOTE_PATTERN,
133            *BRACE_PATTERN,
134            *COMMA_PATTERN,
135        ].join("|")
136        + r#")"#))
137    ).unwrap();
138
139    /// Capture the contents of a line comment.
140    static ref LINE_COMMENT: Regex = Regex::new(
141        &from_start(r#"([^\n]*)"#)
142    ).unwrap();
143
144    /// Capture the contents of a block comment.
145    static ref BLOCK_COMMENT: Regex = Regex::new(
146        &from_start(r#"((?:.|\n)*?)\*/"#)
147    ).unwrap();
148
149    /// Capture the string, without quotes.
150    static ref SINGLE_QUOTED: Regex = Regex::new(
151        &from_start(r#"((?:(?:\\\\)|(?:\\')|(?:\\\n)|(?:[^'\n]))*)(?:')"#)
152    ).unwrap();
153
154    /// Capture the string, without quotes.
155    static ref DOUBLE_QUOTED: Regex = Regex::new(
156        &from_start(r#"((?:(?:\\\\)|(?:\\")|(?:\\\n)|(?:[^"\n]))*)(?:")"#)
157    ).unwrap();
158
159    /// Quoted property names are captured using the same regex as quoted string primitives, and
160    /// unlike `UNQUOTED_PROPERTY_NAME_AND_COLON`, the property name separator (colon with optional
161    /// whitespace) is not automatically consumed. Use this regex to consume the separator after
162    /// encountering a quoted string in the property name position.
163    static ref COLON: Regex = Regex::new(
164        &from_start(r#"([\s&&[^\n]]*:)"#)
165    ).unwrap();
166}
167
168fn matches_unquoted_property_name(strval: &str) -> bool {
169    const KEYWORDS: &'static [&'static str] = &["true", "false", "null"];
170    UNQUOTED_PROPERTY_NAME_REGEX.is_match(strval) && !KEYWORDS.contains(&strval)
171}
172
173struct Capturer {
174    regex: &'static Regex,
175    overall_match: Option<String>,
176    locations: CaptureLocations,
177}
178
179impl Capturer {
180    fn new(regex: &'static Regex) -> Self {
181        Self { regex, overall_match: None, locations: regex.capture_locations() }
182    }
183
184    fn capture<'a>(&mut self, text: &'a str) -> Option<Match<'a>> {
185        let captures = self.regex.captures_read(&mut self.locations, text);
186        if let Some(captures) = &captures {
187            self.overall_match = Some(text[0..captures.end()].to_string());
188        } else {
189            self.overall_match = None;
190        }
191        captures
192    }
193
194    fn overall_match<'a>(&'a self) -> Option<&'a str> {
195        self.overall_match.as_deref()
196    }
197
198    fn captured<'a>(&'a self, i: usize) -> Option<&'a str> {
199        if let (Some(overall_match), Some((start, end))) =
200            (&self.overall_match, self.locations.get(i))
201        {
202            Some(&overall_match[start..end])
203        } else {
204            None
205        }
206    }
207}
208
209/// This internal struct holds the information needed to print a
210/// contextually-relevant portion of the line (if not the entire line) where a
211/// parser error was caught, the first character of the error on that line, and
212/// the number of characters from that initial character index (1 or more) to
213/// highlight as being part of the error.
214struct ParserErrorContext {
215    /// The error line to be printed with a parser error.
216    line: String,
217
218    /// The starting character of the error (zero-based index).
219    indicator_start: usize,
220
221    /// The number of characters to highlight, including the character at the
222    /// `indicator_start` (at least 1).
223    indicator_len: usize,
224}
225
226impl ParserErrorContext {
227    fn new(line: String, indicator_start: usize, indicator_len: usize) -> Self {
228        assert!(indicator_len >= 1);
229        Self { line, indicator_start, indicator_len }
230    }
231
232    fn line(&self) -> &str {
233        &self.line
234    }
235
236    fn indicator(&self) -> String {
237        let mut line = " ".repeat(self.indicator_start) + "^";
238        if self.indicator_len > 1 {
239            line += &"~".repeat(self.indicator_len - 1);
240        }
241        line
242    }
243}
244
245pub(crate) struct Parser<'parser> {
246    /// The remaining text in the input buffer since the last capture.
247    remaining: &'parser str,
248
249    /// The input filename, if any.
250    filename: &'parser Option<String>,
251
252    /// The text of the current line being parsed.
253    current_line: &'parser str,
254
255    /// The text of the next line to be parsed after parsing the last capture.
256    next_line: &'parser str,
257
258    /// The current line number (from 1) while parsing the input buffer.
259    line_number: usize,
260
261    /// The current column number (from 1) while parsing the input buffer.
262    column_number: usize,
263
264    /// The line number of the next token to be parsed.
265    next_line_number: usize,
266
267    /// The column number of the next token to be parsed.
268    next_column_number: usize,
269
270    /// The top of the stack is the current Object or Array whose content is being parsed.
271    /// Tne next item in the stack is the Object or Array that contains the current one,
272    /// and so on.
273    scope_stack: Vec<Rc<RefCell<Value>>>,
274
275    /// To avoid accidentally overflowing the program stack, limit the number of
276    /// nested scopes and generate an error if it is exceeded.
277    nesting_limit: usize,
278
279    /// Captures a colon token when expected.
280    colon_capturer: Capturer,
281}
282
283impl<'parser> Parser<'parser> {
284    /// The default limit of nested scopes when parsing a JSON5 document.
285    pub const DEFAULT_NESTING_LIMIT: usize = 1000;
286
287    pub fn new(filename: &'parser Option<String>) -> Self {
288        let remaining = "";
289        let current_line = &remaining;
290        Self {
291            remaining,
292            filename,
293            current_line,
294            next_line: current_line,
295            line_number: 1,
296            column_number: 1,
297            next_line_number: 1,
298            next_column_number: 1,
299            scope_stack: Vec::default(),
300            nesting_limit: Self::DEFAULT_NESTING_LIMIT,
301            colon_capturer: Capturer::new(&COLON),
302        }
303    }
304
305    /// To avoid accidentally overflowing the program stack, there is a mutable
306    /// limit on the number of nested scopes allowed. If this limit is exceeded
307    /// while parsing a document, a parser error is generated.
308    pub fn set_nesting_limit(&mut self, new_limit: usize) {
309        self.nesting_limit = new_limit;
310    }
311
312    fn current_scope(&self) -> Rc<RefCell<Value>> {
313        assert!(self.scope_stack.len() > 0);
314        self.scope_stack.last().unwrap().clone()
315    }
316
317    fn with_container<F, T>(&self, f: F) -> Result<T, Error>
318    where
319        F: FnOnce(&mut dyn Container) -> Result<T, Error>,
320    {
321        match &mut *self.current_scope().borrow_mut() {
322            Value::Array { val, .. } => f(val),
323            Value::Object { val, .. } => f(val),
324            unexpected => Err(Error::internal(
325                self.location(),
326                format!(
327                    "Current scope should be an Array or Object, but scope was {:?}",
328                    unexpected
329                ),
330            )),
331        }
332    }
333
334    fn with_array<F, T>(&self, f: F) -> Result<T, Error>
335    where
336        F: FnOnce(&mut Array) -> Result<T, Error>,
337    {
338        match &mut *self.current_scope().borrow_mut() {
339            Value::Array { val, .. } => f(val),
340            unexpected => Err(self.error(format!(
341                "Invalid Array token found while parsing an {:?} (mismatched braces?)",
342                unexpected
343            ))),
344        }
345    }
346
347    fn with_object<F, T>(&self, f: F) -> Result<T, Error>
348    where
349        F: FnOnce(&mut Object) -> Result<T, Error>,
350    {
351        match &mut *self.current_scope().borrow_mut() {
352            Value::Object { val, .. } => f(val),
353            unexpected => Err(self.error(format!(
354                "Invalid Object token found while parsing an {:?} (mismatched braces?)",
355                unexpected
356            ))),
357        }
358    }
359
360    fn is_in_array(&self) -> bool {
361        (*self.current_scope().borrow()).is_array()
362    }
363
364    fn is_in_object(&self) -> bool {
365        !self.is_in_array()
366    }
367
368    fn add_value(&mut self, value: Value) -> Result<(), Error> {
369        let is_container = value.is_object() || value.is_array();
370        let value_ref = Rc::new(RefCell::new(value));
371        self.with_container(|container| container.add_value(value_ref.clone(), self))?;
372        if is_container {
373            self.scope_stack.push(value_ref.clone());
374            if self.scope_stack.len() > self.nesting_limit {
375                return Err(self.error(format!(
376                    "The given JSON5 document exceeds the parser's nesting limit of {}",
377                    self.nesting_limit
378                )));
379            }
380        }
381        Ok(())
382    }
383
384    fn on_newline(&mut self) -> Result<(), Error> {
385        self.with_container(|container| container.on_newline())
386    }
387
388    /// Adds a standalone line comment to the current container, or adds an end-of-line comment
389    /// to the current container's current value.
390    ///
391    /// # Arguments
392    ///   * `captured`: the line comment content (including leading spaces)
393    ///   * `pending_new_line_comment_block` - If true and the comment is not an
394    ///     end-of-line comment, the container should insert a line_comment_break before inserting
395    ///     the next line comment. This should only be true if this standalone line comment was
396    ///     preceded by one or more standalone line comments and one or more blank lines.
397    ///
398    /// # Returns
399    ///   true if the line comment is standalone, that is, not an end-of-line comment
400    fn add_line_comment(
401        &self,
402        captured: Option<&str>,
403        pending_new_line_comment_block: bool,
404    ) -> Result<bool, Error> {
405        match captured {
406            Some(content) => {
407                let content = content.trim_end();
408                self.with_container(|container| {
409                    container.add_line_comment(
410                        content,
411                        self.column_number,
412                        pending_new_line_comment_block,
413                    )
414                })
415            }
416            None => Err(Error::internal(
417                self.location(),
418                "Line comment regex should support empty line comment",
419            )),
420        }
421    }
422
423    fn add_block_comment(&self, captured: Option<&str>) -> Result<(), Error> {
424        match captured {
425            Some(content) => {
426                // `indent_count` subtracts 2 characters for the "/*" prefix on the firt line of
427                // the block comment, and 2 spaces on subsequent lines, assuming the line content is
428                // meant to be vertically aligned.
429                let indent_count = self.column_number - 3;
430                let indent = " ".repeat(indent_count);
431                if content
432                    .lines()
433                    .enumerate()
434                    .find(|(index, line)| {
435                        *index > 0 && !line.starts_with(&indent) && line.trim() != ""
436                    })
437                    .is_some()
438                {
439                    self.with_container(|container| {
440                        container.add_block_comment(Comment::Block {
441                            lines: content.lines().map(|line| line.to_owned()).collect(),
442                            align: false,
443                        })
444                    })
445                } else {
446                    // All block comment lines are indented at least beyond the "/*", so strip the
447                    // indent and re-indent when formatting.
448                    let trimmed_lines = content
449                        .lines()
450                        .enumerate()
451                        .map(|(index, line)| {
452                            if index == 0 {
453                                line
454                            } else if line.trim().len() == 0 {
455                                ""
456                            } else {
457                                &line[indent_count..]
458                            }
459                        })
460                        .collect::<Vec<&str>>();
461                    self.with_container(|container| {
462                        container.add_block_comment(Comment::Block {
463                            lines: trimmed_lines.iter().map(|line| line.to_string()).collect(),
464                            align: true,
465                        })
466                    })
467                }
468            }
469            None => return Err(self.error("Block comment started without closing \"*/\"")),
470        }
471    }
472
473    fn take_pending_comments(&mut self) -> Result<Vec<Comment>, Error> {
474        self.with_container(|container| Ok(container.take_pending_comments()))
475    }
476
477    /// The given property name was parsed. Once it's value is also parsed, the property will be
478    /// added to the current `Object`.
479    ///
480    /// # Arguments
481    ///   * name - the property name, possibly quoted
482    fn set_pending_property(&self, name: &str) -> Result<(), Error> {
483        self.with_object(|object| object.set_pending_property(name.to_string(), self))
484    }
485
486    /// Adds a primitive string value or quoted property name, depending on the current context.
487    ///
488    /// For property names that meet the requirements for unquoted property names, the unnecessary
489    /// quotes are removed; otherwise, the original quotes are retained since the content of the
490    /// string may depend on the type of quote. For instance:
491    ///
492    /// ```json
493    ///   'JSON string "with double quotes" wrapped in single quotes'
494    /// ```
495    ///
496    /// As long as the single quotes are restored as-is (and not replaced with double-quotes)
497    /// the formatter can restore the original representation of the string without additional
498    /// (and perhaps less-readable) escaping of internal quotes.
499    fn add_quoted_string(&mut self, quote: &str, captured: Option<&str>) -> Result<(), Error> {
500        match captured {
501            Some(unquoted) => {
502                if self.is_in_object()
503                    && !self.with_object(|object| object.has_pending_property())?
504                {
505                    let captured = self.colon_capturer.capture(self.remaining);
506                    if self.consume_if_matched(captured) {
507                        if matches_unquoted_property_name(&unquoted) {
508                            self.set_pending_property(unquoted)
509                        } else {
510                            self.set_pending_property(&format!("{}{}{}", quote, &unquoted, quote))
511                        }
512                    } else {
513                        return Err(self.error("Property name separator (:) missing"));
514                    }
515                } else {
516                    let comments = self.take_pending_comments()?;
517                    self.add_value(Primitive::new(
518                        format!("{}{}{}", quote, &unquoted, quote),
519                        comments,
520                    ))
521                }
522            }
523            None => return Err(self.error("Unclosed string")),
524        }
525    }
526
527    fn add_non_string_primitive(&mut self, non_string_primitive: &str) -> Result<(), Error> {
528        let comments = self.take_pending_comments()?;
529        self.add_value(Primitive::new(non_string_primitive.to_string(), comments))
530    }
531
532    fn on_brace(&mut self, brace: &str) -> Result<(), Error> {
533        match brace {
534            "{" => self.open_object(),
535            "}" => self.close_object(),
536            "[" => self.open_array(),
537            "]" => self.close_array(),
538            unexpected => Err(Error::internal(
539                self.location(),
540                format!("regex returned unexpected brace string: {}", unexpected),
541            )),
542        }
543    }
544
545    fn open_object(&mut self) -> Result<(), Error> {
546        let comments = self.take_pending_comments()?;
547        self.add_value(Object::new(comments))
548    }
549
550    fn exit_scope(&mut self) -> Result<(), Error> {
551        self.scope_stack.pop();
552        if self.scope_stack.is_empty() {
553            Err(self.error("Closing brace without a matching opening brace"))
554        } else {
555            Ok(())
556        }
557    }
558
559    fn close_object(&mut self) -> Result<(), Error> {
560        self.with_object(|object| object.close(self))?;
561        self.exit_scope()
562    }
563
564    fn open_array(&mut self) -> Result<(), Error> {
565        let comments = self.take_pending_comments()?;
566        self.add_value(Array::new(comments))
567    }
568
569    fn close_array(&mut self) -> Result<(), Error> {
570        self.with_array(|array| array.close(self))?;
571        self.exit_scope()
572    }
573
574    fn end_value(&self) -> Result<(), Error> {
575        self.with_container(|container| container.end_value(self))
576    }
577
578    pub fn location(&self) -> Option<Location> {
579        Some(Location::new(self.filename.clone(), self.line_number, self.column_number))
580    }
581
582    pub fn error(&self, err: impl std::fmt::Display) -> Error {
583        const MAX_ERROR_LINE_LEN: usize = 200;
584        const MIN_CONTEXT_LEN: usize = 10;
585        const ELLIPSIS: &str = "\u{2026}";
586        let error_context = self.get_error_context(MAX_ERROR_LINE_LEN, MIN_CONTEXT_LEN, ELLIPSIS);
587        Error::parse(
588            self.location(),
589            format!("{}:\n{}\n{}", err, error_context.line(), error_context.indicator()),
590        )
591    }
592
593    fn consume_if_matched<'a>(&mut self, matched: Option<Match<'a>>) -> bool {
594        self.column_number = self.next_column_number;
595        if self.line_number < self.next_line_number {
596            self.line_number = self.next_line_number;
597            self.current_line = self.next_line;
598        }
599        if let Some(matched) = matched {
600            let matched_and_remaining = &self.remaining[matched.start()..];
601            self.remaining = &self.remaining[matched.end()..];
602
603            // If `matched` contains newlines, advance the `next_line` and column, for printing the
604            // location of the next syntax element, in error messages, for example.
605            let mut some_matched_lines = None;
606            for c in matched.as_str().chars() {
607                if c == '\n' {
608                    let matched_lines = some_matched_lines
609                        .get_or_insert_with(|| matched_and_remaining.lines().skip(1));
610                    self.next_line = matched_lines.next().unwrap_or(self.current_line);
611                    self.next_line_number += 1;
612                    self.next_column_number = 1;
613                } else {
614                    self.next_column_number += 1;
615                }
616            }
617            true
618        } else {
619            false
620        }
621    }
622
623    fn capture(&mut self, capturer: &mut Capturer) -> bool {
624        self.consume_if_matched(capturer.capture(self.remaining))
625    }
626
627    fn consume<'a>(&mut self, capturer: &'a mut Capturer) -> Option<&'a str> {
628        if self.capture(capturer) {
629            capturer.captured(1)
630        } else {
631            None
632        }
633    }
634
635    /// Parse the given document string as a JSON5 document containing Array
636    /// elements (with implicit outer braces). Document locations (use in, for
637    /// example, error messages), are 1-based and start at line 1, column 1.
638    pub fn parse(&mut self, buffer: &'parser str) -> Result<Array, Error> {
639        self.parse_from_location(buffer, 1, 1)
640    }
641
642    /// Parse the given document string as a JSON5 document containing Array
643    /// elements (with implicit outer braces), and use the given 1-based line
644    /// and column numbers when referring to document locations.
645    pub fn parse_from_location(
646        &mut self,
647        buffer: &'parser str,
648        starting_line_number: usize,
649        starting_column_number: usize,
650    ) -> Result<Array, Error> {
651        self.remaining = buffer;
652        self.current_line = &self.remaining;
653
654        assert!(starting_line_number > 0, "document line numbers are 1-based");
655        self.next_line_number = starting_line_number;
656        self.next_column_number = starting_column_number;
657
658        self.next_line = self.current_line;
659        self.line_number = self.next_line_number - 1;
660        self.column_number = self.next_column_number - 1;
661        self.scope_stack = vec![Rc::new(RefCell::new(Array::new(vec![])))];
662
663        let mut next_token = Capturer::new(&NEXT_TOKEN);
664        let mut single_quoted = Capturer::new(&SINGLE_QUOTED);
665        let mut double_quoted = Capturer::new(&DOUBLE_QUOTED);
666        let mut line_comment = Capturer::new(&LINE_COMMENT);
667        let mut block_comment = Capturer::new(&BLOCK_COMMENT);
668
669        // Blocks of adjacent line comments should be kept together as a "line comment block", but
670        // adjacent line comment blocks separated by one or more newlines must be maintained as
671        // separate blocks.
672        //
673        // These booleans, along with `reset_line_comment_break_check`, update state information as
674        // line comments and blank lines are parsed.
675        let mut just_captured_line_comment = false;
676        let mut pending_blank_line = false;
677        let mut pending_new_line_comment_block = false;
678
679        while self.remaining.len() > 0 {
680            // See comment above regarding "line comment blocks".
681            let mut reset_line_comment_break_check = true;
682
683            if self.capture(&mut next_token) {
684                // Since this has to be done as an if-else-if-... check the most common
685                // occurrences first.
686                if let Some(_) = next_token.captured(*WHITESPACE) {
687                    reset_line_comment_break_check = false;
688                    Ok(()) // ignore all whitespace not in a string or comment
689                } else if let Some(_) = next_token.captured(*NEWLINE) {
690                    reset_line_comment_break_check = false;
691                    if just_captured_line_comment {
692                        if pending_blank_line {
693                            pending_new_line_comment_block = true;
694                            pending_blank_line = false;
695                        } else if !pending_new_line_comment_block {
696                            pending_blank_line = true;
697                        }
698                    }
699                    self.on_newline()
700                } else if let Some(_) = next_token.captured(*COMMA) {
701                    self.end_value()
702                } else if let Some(brace) = next_token.captured(*BRACE) {
703                    self.on_brace(&brace)
704                } else if let Some(non_string_primitive) =
705                    next_token.captured(*NON_STRING_PRIMITIVE)
706                {
707                    self.add_non_string_primitive(&non_string_primitive)
708                } else if let Some(quote) = next_token.captured(*OPEN_QUOTE) {
709                    let quoted_string = if quote == "'" {
710                        self.consume(&mut single_quoted)
711                    } else {
712                        self.consume(&mut double_quoted)
713                    };
714                    self.add_quoted_string(&quote, quoted_string)
715                } else if let Some(unquoted_property_name) =
716                    next_token.captured(*UNQUOTED_PROPERTY_NAME_AND_COLON)
717                {
718                    self.set_pending_property(unquoted_property_name)
719                } else if let Some(_line_comment_start) = next_token.captured(*LINE_COMMENT_SLASHES)
720                {
721                    reset_line_comment_break_check = false;
722                    pending_blank_line = false;
723                    let line_comment = self.consume(&mut line_comment);
724                    if self.add_line_comment(line_comment, pending_new_line_comment_block)? {
725                        // standalone line comment
726                        just_captured_line_comment = true;
727                        pending_new_line_comment_block = false;
728                    } // else it was an end-of-line comment
729                    Ok(())
730                } else if let Some(_block_comment_start) = next_token.captured(*OPEN_BLOCK_COMMENT)
731                {
732                    let block_comment = self.consume(&mut block_comment);
733                    self.add_block_comment(block_comment)
734                } else {
735                    Err(Error::internal(
736                        self.location(),
737                        format!(
738                            "NEXT_TOKEN matched an unexpected capture group: {}",
739                            next_token.overall_match().unwrap_or("")
740                        ),
741                    ))
742                }
743            } else {
744                Err(self.error("Unexpected token"))
745            }?;
746
747            if reset_line_comment_break_check {
748                just_captured_line_comment = false;
749                pending_blank_line = false;
750                pending_new_line_comment_block = false;
751            }
752        }
753        self.remaining = "";
754        self.close_document()?;
755
756        match Rc::try_unwrap(self.scope_stack.pop().unwrap())
757            .map_err(|_| Error::internal(None, "Rc<> for document array could not be unwrapped."))?
758            .into_inner()
759        {
760            Value::Array { val, .. } => Ok(val),
761            unexpected => Err(Error::internal(
762                self.location(),
763                format!("Final scope should be an Array, but scope was {:?}", unexpected),
764            )),
765        }
766    }
767
768    fn close_document(&mut self) -> Result<(), Error> {
769        if self.scope_stack.len() == 1 {
770            Ok(())
771        } else {
772            Err(self.error("Mismatched braces in the document"))
773        }
774    }
775
776    /// Returns the given `current_line` and an `indicator` line: spaces, followed
777    /// by a carat (`^`) that points at the given `column_number`, followed by
778    /// tilde's (`~`) as long as the error token.
779    ///
780    /// If the line is longer than a set maximum length, the line is trimmed and
781    /// the indicator positions are adjusted.
782    fn get_error_context(
783        &self,
784        max_error_line_len: usize,
785        min_context_len: usize,
786        ellipsis: &str,
787    ) -> ParserErrorContext {
788        let error_line_len = self.current_line.chars().count();
789
790        // `indicator_start` is a 0-based char position
791        let indicator_start = std::cmp::min(self.column_number - 1, error_line_len);
792
793        let indicator_len = if self.line_number == self.next_line_number {
794            std::cmp::max(
795                std::cmp::min(
796                    self.next_column_number - self.column_number,
797                    error_line_len - indicator_start,
798                ),
799                1,
800            )
801        } else {
802            1
803        };
804
805        if error_line_len <= max_error_line_len {
806            ParserErrorContext::new(self.current_line.to_owned(), indicator_start, indicator_len)
807        } else {
808            trim_error_line_and_indicator(
809                self.current_line,
810                indicator_start,
811                indicator_len,
812                error_line_len,
813                max_error_line_len,
814                min_context_len,
815                ellipsis,
816            )
817        }
818    }
819}
820
821struct CharRange {
822    range: std::ops::Range<usize>,
823}
824
825impl CharRange {
826    fn new(range: std::ops::Range<usize>) -> Self {
827        Self { range }
828    }
829
830    fn to_byte_range(self, from_string: &str) -> Option<std::ops::Range<usize>> {
831        let char_len = from_string.chars().count();
832        let mut some_start_byte =
833            if self.range.start == char_len { Some(from_string.len()) } else { None };
834        let mut some_end_byte =
835            if self.range.end == char_len { Some(from_string.len()) } else { None };
836        if let (Some(start_byte), Some(end_byte)) = (some_start_byte, some_end_byte) {
837            return Some(start_byte..end_byte);
838        }
839        for (char_pos, (byte_pos, _char)) in from_string.char_indices().enumerate() {
840            if char_pos == self.range.start {
841                if let Some(end_byte) = some_end_byte {
842                    return Some(byte_pos..end_byte);
843                }
844                some_start_byte = Some(byte_pos);
845            }
846            if char_pos == self.range.end {
847                if let Some(start_byte) = some_start_byte {
848                    return Some(start_byte..byte_pos);
849                }
850                some_end_byte = Some(byte_pos);
851            }
852        }
853        None
854    }
855}
856
857fn trim_error_line_and_indicator(
858    error_line: &str,
859    indicator_start: usize,
860    mut indicator_len: usize,
861    error_line_len: usize,
862    max_error_line_len: usize,
863    min_context_len: usize,
864    ellipsis: &str,
865) -> ParserErrorContext {
866    let ellipsis_len = ellipsis.chars().count();
867
868    assert!(max_error_line_len > ellipsis_len);
869    assert!(max_error_line_len < error_line_len);
870    assert!(
871        indicator_start <= error_line_len,
872        "Error because indicator_start={} > error_line_len={}\n{}",
873        indicator_start,
874        error_line_len,
875        error_line
876    );
877    assert!(
878        indicator_len == 1 || (indicator_start + indicator_len) <= error_line_len,
879        "Error because indicator_start={}, indicator_len={}, error_line_len={}\n{}",
880        indicator_start,
881        indicator_len,
882        error_line_len,
883        error_line
884    );
885
886    indicator_len = std::cmp::min(indicator_len, max_error_line_len);
887
888    let min_right_context_len = std::cmp::max(min_context_len, indicator_len);
889
890    let context_end =
891        std::cmp::min(indicator_start + min_right_context_len, error_line_len - ellipsis_len);
892    if context_end < max_error_line_len - ellipsis_len {
893        let slice_bytes = CharRange::new(0..(max_error_line_len - ellipsis_len))
894            .to_byte_range(error_line)
895            .expect("char indices should map to String bytes");
896        return ParserErrorContext::new(
897            error_line[slice_bytes].to_string() + ellipsis,
898            indicator_start,
899            indicator_len,
900        );
901    }
902
903    let context_start = indicator_start - std::cmp::min(indicator_start, min_context_len);
904    if error_line_len - context_start < max_error_line_len - ellipsis_len {
905        let start_char = error_line_len - (max_error_line_len - ellipsis_len);
906        let slice_bytes = CharRange::new(start_char..error_line_len)
907            .to_byte_range(error_line)
908            .expect("char indices should map to String bytes");
909        return ParserErrorContext::new(
910            ellipsis.to_owned() + &error_line[slice_bytes],
911            (indicator_start + ellipsis_len) - start_char,
912            indicator_len,
913        );
914    }
915
916    let margin_chars =
917        max_error_line_len - std::cmp::min(max_error_line_len, (ellipsis_len * 2) + indicator_len);
918    let right_margin = std::cmp::min(
919        error_line_len - std::cmp::min(error_line_len, indicator_start + indicator_len),
920        margin_chars / 2,
921    );
922    let left_margin = margin_chars - right_margin;
923    let mut start_char = indicator_start - left_margin;
924    let mut end_char =
925        std::cmp::min(indicator_start + indicator_len + right_margin, error_line_len);
926    let mut start_ellipsis = ellipsis;
927    let mut end_ellipsis = ellipsis;
928    if start_char == 0 {
929        start_ellipsis = "";
930        end_char += ellipsis_len;
931    } else if end_char == error_line_len {
932        end_ellipsis = "";
933        start_char -= ellipsis_len;
934    }
935
936    let slice_bytes = CharRange::new(start_char..end_char)
937        .to_byte_range(error_line)
938        .expect("char indices should map to String bytes");
939    ParserErrorContext::new(
940        start_ellipsis.to_owned() + &error_line[slice_bytes] + end_ellipsis,
941        (indicator_start + ellipsis_len) - start_char,
942        indicator_len,
943    )
944}
945
946#[cfg(test)]
947mod tests {
948    use {super::*, crate::test_error, proptest::prelude::*};
949
950    fn gen_error_line_test(
951        error_line: &str,
952        pattern: &str,
953        max_error_line_len: usize,
954        min_context_len: usize,
955        ellipsis: &str,
956        expected_errorline: &str,
957        expected_indicator: &str,
958    ) -> Result<(), String> {
959        let some_newline = pattern.find("\n");
960        let pattern_line1 =
961            if let Some(newline) = some_newline { &pattern[0..newline] } else { &pattern };
962        assert!(pattern_line1.len() > 0);
963        let indicator_start = error_line.find(pattern_line1).expect("pattern not found in line");
964        let end = indicator_start + pattern.len();
965        let indicator_len = end - indicator_start;
966        let error_context = if error_line.chars().count() <= max_error_line_len {
967            ParserErrorContext::new(error_line.to_owned(), indicator_start, indicator_len)
968        } else {
969            trim_error_line_and_indicator(
970                error_line,
971                indicator_start,
972                indicator_len,
973                error_line.chars().count(),
974                max_error_line_len,
975                min_context_len,
976                ellipsis,
977            )
978        };
979        let actual_errorline = error_context.line();
980        let actual_indicator = error_context.indicator();
981        let mut errors = String::new();
982        if expected_errorline != actual_errorline {
983            println!(
984                r#"
985expected_errorline: >>>{}<<< (charlen={})
986  actual_errorline: >>>{}<<< (charlen={} of {}, min context len={})"#,
987                expected_errorline,
988                expected_errorline.chars().count(),
989                actual_errorline,
990                actual_errorline.chars().count(),
991                max_error_line_len,
992                min_context_len,
993            );
994            errors.push_str("actual errorline does not match expected");
995        } else if expected_indicator != actual_indicator {
996            println!(
997                r#"
998                       {}"#,
999                actual_errorline,
1000            );
1001        }
1002        if expected_indicator != actual_indicator {
1003            if errors.len() > 0 {
1004                errors.push_str(" and ");
1005            }
1006            println!(
1007                r#"
1008expected_indicator:    {}
1009  actual_indicator:    {}"#,
1010                expected_indicator, actual_indicator,
1011            );
1012            errors.push_str("actual indicator does not match expected");
1013        } else if expected_errorline != actual_errorline {
1014            println!(
1015                r#"
1016                       {}"#,
1017                actual_indicator,
1018            );
1019        }
1020        if errors.len() > 0 {
1021            println!("{}", errors);
1022            Err(errors)
1023        } else {
1024            Ok(())
1025        }
1026    }
1027
1028    #[test]
1029    fn test_error_line1() {
1030        gen_error_line_test(
1031            "  good token, bad token;",
1032            "bad",
1033            30,
1034            10,
1035            " ... ",
1036            "  good token, bad token;",
1037            "              ^~~",
1038        )
1039        .expect("actual should match expected");
1040    }
1041
1042    #[test]
1043    fn test_error_line2() {
1044        gen_error_line_test(
1045            "  good token, bad token;",
1046            "token;",
1047            20,
1048            10,
1049            " ... ",
1050            " ... ken, bad token;",
1051            "              ^~~~~~",
1052        )
1053        .expect("actual should match expected");
1054    }
1055
1056    #[test]
1057    fn test_error_line2_short_ellipsis() {
1058        gen_error_line_test(
1059            "  good token, bad token;",
1060            "token;",
1061            20,
1062            10,
1063            "…",
1064            "…d token, bad token;",
1065            "              ^~~~~~",
1066        )
1067        .expect("actual should match expected");
1068    }
1069
1070    #[test]
1071    fn test_error_line3() {
1072        gen_error_line_test(
1073            "A good token, bad token;",
1074            "bad",
1075            20,
1076            10,
1077            " ... ",
1078            " ... en, bad to ... ",
1079            "         ^~~",
1080        )
1081        .expect("actual should match expected");
1082    }
1083
1084    #[test]
1085    fn test_error_line3_short_ellipsis() {
1086        gen_error_line_test(
1087            "A good token, bad token;",
1088            "bad",
1089            20,
1090            10,
1091            "…",
1092            "…d token, bad token;",
1093            "          ^~~",
1094        )
1095        .expect("actual should match expected");
1096    }
1097
1098    #[test]
1099    fn test_error_line3_escaped_unicode_ellipsis() {
1100        gen_error_line_test(
1101            "A good token, bad token;",
1102            "bad",
1103            20,
1104            10,
1105            "\u{2026}",
1106            "…d token, bad token;",
1107            "          ^~~",
1108        )
1109        .expect("actual should match expected");
1110    }
1111
1112    #[test]
1113    fn test_error_line4() {
1114        gen_error_line_test(
1115            "A good token, bad token;",
1116            "bad",
1117            10,
1118            10,
1119            " ... ",
1120            " ... bad ... ",
1121            "     ^~~",
1122        )
1123        .expect("actual should match expected");
1124    }
1125
1126    #[test]
1127    fn test_error_line4_short_context() {
1128        gen_error_line_test(
1129            "A good token, bad token;",
1130            "bad",
1131            10,
1132            5,
1133            " ... ",
1134            " ... bad ... ",
1135            "     ^~~",
1136        )
1137        .expect("actual should match expected");
1138    }
1139
1140    #[test]
1141    fn test_error_line4_long_pattern() {
1142        gen_error_line_test(
1143            "A good token, bad token;",
1144            "bad token",
1145            10,
1146            10,
1147            " ... ",
1148            " ... bad token ... ",
1149            "     ^~~~~~~~~",
1150        )
1151        .expect("actual should match expected");
1152    }
1153
1154    #[test]
1155    fn test_error_line4_long_pattern_short_context_big_ellipsis() {
1156        gen_error_line_test(
1157            "A good token, bad token;",
1158            "bad token",
1159            10,
1160            4,
1161            " ... ",
1162            " ... bad token ... ",
1163            "     ^~~~~~~~~",
1164        )
1165        .expect("actual should match expected");
1166    }
1167
1168    #[test]
1169    fn test_error_line4_long_pattern_short_context_short_ellipsis() {
1170        gen_error_line_test(
1171            "A good token, bad token;",
1172            "bad",
1173            10,
1174            4,
1175            "\u{2026}",
1176            "…n, bad t…",
1177            "    ^~~",
1178        )
1179        .expect("actual should match expected");
1180    }
1181
1182    #[test]
1183    fn test_error_line5() {
1184        gen_error_line_test(
1185            r#"[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]"#,
1186            "a_prop",
1187            200,
1188            10,
1189            " ... ",
1190            r#"[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]"#,
1191            r#"                                               ^~~~~~"#,
1192        ).expect("actual should match expected");
1193    }
1194
1195    #[test]
1196    fn test_error_line6() {
1197        gen_error_line_test(
1198            r#"[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]"#,
1199            "a_prop",
1200            100,
1201            10,
1202            " ... ",
1203            r#"[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]] ... "#,
1204            r#"                                               ^~~~~~"#,
1205        ).expect("actual should match expected");
1206    }
1207
1208    #[test]
1209    fn test_error_line7() {
1210        gen_error_line_test(
1211            r#"[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]"#,
1212            "a_prop",
1213            100,
1214            5,
1215            " ... ",
1216            r#"[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]] ... "#,
1217            r#"                                              ^~~~~~"#,
1218        ).expect("actual should match expected");
1219    }
1220
1221    #[test]
1222    fn test_error_line7_more_braces() {
1223        gen_error_line_test(
1224            r#"[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]"#,
1225            "a_prop",
1226            100,
1227            10,
1228            " ... ",
1229            r#"[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]] ... "#,
1230            r#"                                                  ^~~~~~"#,
1231        ).expect("actual should match expected");
1232    }
1233
1234    #[test]
1235    fn test_error_line8() {
1236        gen_error_line_test(
1237            r#"[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]"#,
1238            "a_prop",
1239            100,
1240            10,
1241            " ... ",
1242            r#" ... [[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]"#,
1243            r#"                                           ^~~~~~"#,
1244        ).expect("actual should match expected");
1245    }
1246
1247    #[test]
1248    fn test_error_line9() {
1249        gen_error_line_test(
1250            r#"[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]"#,
1251            "a_prop",
1252            100,
1253            10,
1254            " ... ",
1255            r#" ... [[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]] ... "#,
1256            r#"                                               ^~~~~~"#,
1257        ).expect("actual should match expected");
1258    }
1259
1260    lazy_static! {
1261        // With `ProptestConfig::failure_persistence` on by default, tests may generate the
1262        // following warnings:
1263        //
1264        //     proptest: Failed to find absolute path of source file...
1265        //     proptest: FileFailurePersistence::SourceParallel set, but no source file known
1266        //
1267        // To suppress these warnings, the following ProptestConfig overrides this behavior:
1268        static ref NO_PERSIST: ProptestConfig = ProptestConfig {
1269             failure_persistence: None,
1270             .. ProptestConfig::default()
1271        };
1272
1273        // Overrides the default number of test cases that must pass, from the default of 256.
1274        static ref EXTRA_CASES_NO_PERSIST: ProptestConfig = ProptestConfig {
1275             failure_persistence: None,
1276             cases: 1024,
1277             .. ProptestConfig::default()
1278        };
1279    }
1280
1281    struct RegexTest<'a> {
1282        error: Option<&'a str>,
1283        prefix: &'a str,
1284        matches: &'a str,
1285        suffix: &'a str,
1286        next_regex: Option<&'a Regex>,
1287        next_prefix: &'a str,
1288        next_matches: &'a str,
1289        next_suffix: &'a str,
1290        trailing: &'a str,
1291    }
1292
1293    impl<'a> Default for RegexTest<'a> {
1294        fn default() -> Self {
1295            RegexTest {
1296                error: None,
1297                prefix: "",
1298                matches: "",
1299                suffix: "",
1300                next_regex: None,
1301                next_prefix: "",
1302                next_matches: "",
1303                next_suffix: "",
1304                trailing: "",
1305            }
1306        }
1307    }
1308
1309    /// Validate a regex capture, and optional follow-up capture.
1310    /// If a test fails, the details of the tests are printed.
1311    ///
1312    /// To view the details of a successful tests (such as to validate your proptest patterns are
1313    /// generating anticipated sample values), run with the following options:
1314    ///
1315    ///   $ out/${OUT_SUBDIR}/host_x64/exe.unstripped/json5format_lib_test --show-output <test_name>
1316    fn try_capture(
1317        regex: &Regex,
1318        group_id: Option<usize>,
1319        test: RegexTest<'_>,
1320    ) -> Result<String, Error> {
1321        println!();
1322        println!("pattern: '{}'", regex.as_str());
1323
1324        let trailing = test.next_suffix.to_owned() + test.trailing;
1325        let test_string =
1326            test.prefix.to_owned() + test.matches + test.suffix + test.next_matches + &trailing;
1327        println!("capturing from: '{}'", test_string.escape_debug());
1328        println!(
1329            "                 {}{}{}{}",
1330            " ".repeat(test.prefix.len()),
1331            "^".repeat(test.matches.len()),
1332            " ".repeat(test.suffix.len()),
1333            "^".repeat(test.next_matches.len())
1334        );
1335
1336        let group_id = group_id.unwrap_or(1);
1337        println!("expected capture id: '{}'", group_id);
1338
1339        let capture = regex.captures(&test_string).ok_or_else(|| test_error!("capture failed"))?;
1340        let overall_match = capture.get(0).ok_or_else(|| test_error!("regex did not match"))?;
1341        println!(
1342            "overall match: '{}', length = {}",
1343            overall_match.as_str().escape_debug(),
1344            overall_match.end()
1345        );
1346
1347        let remaining = &test_string[overall_match.end()..];
1348        println!("remaining: '{}'", remaining.escape_debug());
1349
1350        const OVERALL_MATCH: usize = 0;
1351
1352        let mut capture_ids = vec![];
1353        for (index, subcapture) in capture.iter().enumerate() {
1354            if index != OVERALL_MATCH {
1355                if subcapture.is_some() {
1356                    capture_ids.push(index);
1357                }
1358            }
1359        }
1360        println!("capture ids = {:?}", capture_ids);
1361
1362        let captured_text = capture
1363            .get(group_id)
1364            .ok_or_else(|| test_error!(format!("capture group {} did not match", group_id)))?
1365            .as_str();
1366        println!("captured: '{}'", captured_text.escape_debug());
1367        assert_eq!(captured_text, test.matches);
1368        assert_eq!(capture_ids.len(), 1);
1369        assert_eq!(remaining, test.next_matches.to_owned() + &trailing);
1370
1371        match test.next_regex {
1372            Some(next_regex) => test_capture(
1373                &*next_regex,
1374                None,
1375                RegexTest {
1376                    prefix: test.next_prefix,
1377                    matches: test.next_matches,
1378                    suffix: test.next_suffix,
1379                    trailing: test.trailing,
1380                    ..Default::default()
1381                },
1382            ),
1383            None => Ok(captured_text.to_string()),
1384        }
1385    }
1386
1387    fn test_capture(
1388        regex: &Regex,
1389        group_id: Option<usize>,
1390        test: RegexTest<'_>,
1391    ) -> Result<String, Error> {
1392        let expected_error_str = test.error.clone();
1393        match try_capture(regex, group_id, test) {
1394            Ok(captured) => {
1395                println!("SUCCESSFUL CAPTURE! ... '{}'", captured);
1396                Ok(captured)
1397            }
1398            Err(actual_error) => match expected_error_str {
1399                Some(expected_error_str) => match &actual_error {
1400                    Error::TestFailure(_location, actual_error_str) => {
1401                        if expected_error_str == actual_error_str {
1402                            println!("EXPECTED FAILURE (GOOD NEWS)! ... '{}'", actual_error);
1403                            Ok(format!("{}", actual_error))
1404                        } else {
1405                            println!("{}", actual_error);
1406                            println!("expected: {}", expected_error_str);
1407                            println!("  actual: {}", actual_error_str);
1408                            Err(test_error!(
1409                                "Actual error string did not match expected error string."
1410                            ))
1411                        }
1412                    }
1413                    _unexpected_error_type => {
1414                        println!("expected: Test failure: {}", expected_error_str);
1415                        println!("  actual: {}", actual_error);
1416                        Err(test_error!(
1417                            "Actual error type did not match expected test failure type."
1418                        ))
1419                    }
1420                },
1421                None => Err(actual_error),
1422            },
1423        }
1424    }
1425
1426    fn test_regex(group_id: usize, test: RegexTest<'_>) -> Result<String, Error> {
1427        test_capture(&NEXT_TOKEN, Some(group_id), test)
1428    }
1429
1430    proptest! {
1431        #![proptest_config(NO_PERSIST)]
1432        #[test]
1433        fn test_whitespace_no_newlines(
1434            spaces in r#"[\s&&[^\n]]+"#,
1435            trailing_non_whitespace in r#"[^\s&&[^\n]]*"#,
1436        ) {
1437            test_regex(
1438                *WHITESPACE,
1439                RegexTest {
1440                    matches: &spaces,
1441                    trailing: &trailing_non_whitespace,
1442                    ..Default::default()
1443                }
1444            )
1445            .unwrap();
1446        }
1447    }
1448
1449    proptest! {
1450        #![proptest_config(NO_PERSIST)]
1451        #[test]
1452        fn test_whitespace_until_newline(
1453            spaces in r#"[\s&&[^\n]]+"#,
1454            trailing_non_whitespace in r#"\n[^\s&&[^\n]]*"#,
1455        ) {
1456            test_regex(
1457                *WHITESPACE,
1458                RegexTest {
1459                    matches: &spaces,
1460                    trailing: &trailing_non_whitespace,
1461                    ..Default::default()
1462                }
1463            )
1464            .unwrap();
1465        }
1466    }
1467
1468    proptest! {
1469        #![proptest_config(NO_PERSIST)]
1470        #[test]
1471        fn test_plain_ascii_whitespace_no_newline(
1472            spaces in r#"[ \t]+"#,
1473            trailing_non_whitespace in r#"[^\s&&[^\n]]*"#,
1474        ) {
1475            test_regex(
1476                *WHITESPACE,
1477                RegexTest {
1478                    matches: &spaces,
1479                    trailing: &trailing_non_whitespace,
1480                    ..Default::default()
1481                }
1482            )
1483            .unwrap();
1484        }
1485    }
1486
1487    proptest! {
1488        #![proptest_config(NO_PERSIST)]
1489        #[test]
1490        fn test_newline(
1491            newline in r#"\n"#,
1492            any_chars in r#"\PC*"#,
1493        ) {
1494            test_regex(
1495                *NEWLINE,
1496                RegexTest { matches: &newline, trailing: &any_chars, ..Default::default() },
1497            )
1498            .unwrap();
1499        }
1500    }
1501
1502    proptest! {
1503        #![proptest_config(NO_PERSIST)]
1504        #[test]
1505        fn test_line_comment(
1506            line_comment_prefix in r#"//"#,
1507            line_comment_content in r#"(|[^\n][^\n]*)"#,
1508            more_lines_or_eof in r#"(\n\PC*)?"#,
1509        ) {
1510            test_regex(
1511                *LINE_COMMENT_SLASHES,
1512                RegexTest {
1513                    matches: &line_comment_prefix,
1514                    next_regex: Some(&*LINE_COMMENT),
1515                    next_matches: &line_comment_content,
1516                    trailing: &more_lines_or_eof,
1517                    ..Default::default()
1518                },
1519            )
1520            .unwrap();
1521        }
1522    }
1523
1524    proptest! {
1525        #![proptest_config(NO_PERSIST)]
1526        #[test]
1527        fn test_empty_line_comment(
1528            line_comment_prefix in r#"//"#,
1529            more_lines_or_eof in r#"(\n\PC*)?"#,
1530        ) {
1531            test_regex(
1532                *LINE_COMMENT_SLASHES,
1533                RegexTest {
1534                    matches: &line_comment_prefix,
1535                    next_regex: Some(&*LINE_COMMENT),
1536                    next_matches: "",
1537                    trailing: &more_lines_or_eof,
1538                    ..Default::default()
1539                },
1540            )
1541            .unwrap();
1542        }
1543    }
1544
1545    proptest! {
1546        #![proptest_config(NO_PERSIST)]
1547        #[test]
1548        fn test_block_comment(
1549            block_comment_content in r#"([^*]|([*][^*/]))*"#,
1550            optional_trailing_content in r#"\PC*"#,
1551        ) {
1552            test_regex(
1553                *OPEN_BLOCK_COMMENT,
1554                RegexTest {
1555                    matches: "/*",
1556                    next_regex: Some(&*BLOCK_COMMENT),
1557                    next_matches: &block_comment_content,
1558                    next_suffix: "*/",
1559                    trailing: &optional_trailing_content,
1560
1561                    ..Default::default()
1562                },
1563            )
1564            .unwrap();
1565        }
1566    }
1567
1568    proptest! {
1569        #![proptest_config(NO_PERSIST)]
1570        #[test]
1571        fn test_empty_block_comment(
1572            optional_trailing_content in r#"\PC*"#,
1573        ) {
1574            test_regex(
1575                *OPEN_BLOCK_COMMENT,
1576                RegexTest {
1577                    matches: "/*",
1578                    next_regex: Some(&*BLOCK_COMMENT),
1579                    next_matches: "",
1580                    next_suffix: "*/",
1581                    trailing: &optional_trailing_content,
1582                    ..Default::default()
1583                },
1584            )
1585            .unwrap();
1586        }
1587    }
1588
1589    proptest! {
1590        #![proptest_config(NO_PERSIST)]
1591        #[test]
1592        fn test_property_name(
1593            propname in r#"[\w$&&[^\d]][\w$]*"#,
1594            whitespace_to_colon in r#"[\s&&[^\n]]*:"#,
1595            trailing_content in r#"\PC+"#,
1596        ) {
1597            test_regex(
1598                *UNQUOTED_PROPERTY_NAME_AND_COLON,
1599                RegexTest {
1600                    matches: &propname,
1601                    suffix: &whitespace_to_colon,
1602                    trailing: &trailing_content,
1603                    ..Default::default()
1604                },
1605            )
1606            .unwrap();
1607        }
1608    }
1609
1610    // Test two variations of invalid unquoted property name error handling, when expecting a match
1611    // against the regex `UNQUOTED_PROPERTY_NAME_AND_COLON` numbered capture group pattern:
1612    //
1613    // 1) No generated test candidates match any `NEXT_TOKEN` pattern.
1614    // 2) The first digit is a number, which does match a `NEXT_TOKEN` capture, but is an invalid
1615    //    property name.
1616    //
1617    // It's challenging to write a pattern for what does NOT constitute a valid property name since
1618    // the set of things not part of a given set is infinite. Unicode support also can make it hard
1619    // to define exhaustive patterns sometimes. So here are two tests for invalid unquoted property
1620    // names, both of which validate that a property name cannot start with a digit. The difference
1621    // between the two tests is:
1622    //
1623    //   * The first test generates candidate property names that will not match any pattern in the
1624    //     `NEXT_TOKEN` regex, generating a "capture failed" error.
1625    //   * The second test successfully captures a `NEXT_TOKEN`, but it captures a number literal,
1626    //     not an `UNQUOTED_PROPERTY_NAME_AND_COLON`, generating a different error message:
1627    //     "capture group {n} did not match" (where '{n}' is the capture group number for
1628    //     `UNQUOTED_PROPERTY_NAME_AND_COLON`).
1629    ////////////////////////////////////////////////////////////////////////////////////////////////
1630
1631    // Excluding 0-9, e & E, and x and X from the allowed pattern set for the second character
1632    // ensures the pattern generator will not generate strings with prefixes such as: `25`, `0X4`,
1633    // `0xf`, and `3E2`.
1634    proptest! {
1635        #![proptest_config(EXTRA_CASES_NO_PERSIST)]
1636        #[test]
1637        fn bad_property_name(
1638            propname in r#"[0-9][\w&&[^0-9eExX]][\w$]*"#,
1639            whitespace_to_colon in r#"[\s&&[^\n]]*:"#,
1640            trailing_content in r#"\PC+"#,
1641        ) {
1642            test_regex(
1643                *UNQUOTED_PROPERTY_NAME_AND_COLON,
1644                RegexTest {
1645                    error: Some("capture failed"),
1646                    matches: &propname,
1647                    suffix: &whitespace_to_colon,
1648                    trailing: &trailing_content,
1649                    ..Default::default()
1650                },
1651            )
1652            .unwrap();
1653        }
1654    }
1655
1656    // In this case, the second character is a dollar sign, which is legal for a property name,
1657    // but _not_ a "Word" character in the regex `\w` pattern set. The `\b` (word boundary) applies,
1658    // matching the digit as the `NEXT_TOKEN`, generating an error: "capture group {n} did not
1659    // match" (where '{n}' is the capture group number for `UNQUOTED_PROPERTY_NAME_AND_COLON`).
1660    proptest! {
1661        #![proptest_config(EXTRA_CASES_NO_PERSIST)]
1662        #[test]
1663        fn bad_property_name_captures_number_first(
1664            propname in r#"[0-9]\$[\w$]*"#,
1665            whitespace_to_colon in r#"[\s&&[^\n]]*:"#,
1666            trailing_content in r#"\PC+"#,
1667        ) {
1668            test_regex(
1669                *UNQUOTED_PROPERTY_NAME_AND_COLON,
1670                RegexTest {
1671                    error: Some(
1672                        &format!("capture group {} did not match",
1673                        *UNQUOTED_PROPERTY_NAME_AND_COLON)
1674                    ),
1675                    matches: &propname,
1676                    suffix: &whitespace_to_colon,
1677                    trailing: &trailing_content,
1678                    ..Default::default()
1679                },
1680            )
1681            .unwrap();
1682        }
1683    }
1684
1685    proptest! {
1686        #![proptest_config(NO_PERSIST)]
1687        #[test]
1688        fn test_single_quoted_string(
1689            single_quote in r#"'"#,
1690            single_quoted_string in r#"(([^'\\\n])|(\\')|(\\\n)|(\\\\))*"#,
1691            // comment inserted to balance closing braces [ and { for code editors
1692            non_literal_trailing_content in r#"\s*[,:/\]\}]"#,
1693        ) {
1694            test_regex(
1695                *OPEN_QUOTE,
1696                RegexTest {
1697                    matches: &single_quote,
1698                    next_regex: Some(&*SINGLE_QUOTED),
1699                    next_matches: &single_quoted_string,
1700                    next_suffix: &single_quote,
1701                    trailing: &non_literal_trailing_content,
1702                    ..Default::default()
1703                },
1704            )
1705            .unwrap();
1706        }
1707    }
1708
1709    proptest! {
1710        #![proptest_config(NO_PERSIST)]
1711        #[test]
1712        fn test_double_quoted_string(
1713            double_quote in r#"""#,
1714            double_quoted_string in r#"(([^"\\\n])|(\\")|(\\\n)|(\\\\))*"#,
1715            // comment inserted to balance closing braces [ and { for code editors
1716            non_literal_trailing_content in r#"\s*[,:/\]\}]?\PC*"#,
1717        ) {
1718            test_regex(
1719                *OPEN_QUOTE,
1720                RegexTest {
1721                    matches: &double_quote,
1722                    next_regex: Some(&*DOUBLE_QUOTED),
1723                    next_matches: &double_quoted_string,
1724                    next_suffix: &double_quote,
1725                    trailing: &non_literal_trailing_content,
1726                    ..Default::default()
1727                },
1728            )
1729            .unwrap();
1730        }
1731    }
1732
1733    proptest! {
1734        #![proptest_config(NO_PERSIST)]
1735        #[test]
1736        fn test_non_string_primitive(
1737            non_string_primitive in
1738                concat!(
1739                    r#"(null|true|false)|([-+]?(NaN|Infinity|(0[xX][0-9a-fA-F]+)"#,
1740                    r#"|((0|([1-9][0-9]*))?\.[0-9]+[eE][+-]?[0-9]+)|((0|([1-9][0-9]*))?\.[0-9]+)|((0|([1-9][0-9]*))\.?)))"#
1741                ),
1742            ends_non_string_primitive in r#"(|([\s,\]\}]\PC*))"#,
1743        ) {
1744            test_regex(
1745                *NON_STRING_PRIMITIVE,
1746                RegexTest {
1747                    matches: &non_string_primitive,
1748                    trailing: &ends_non_string_primitive,
1749                    ..Default::default()
1750                }
1751            )
1752            .unwrap();
1753        }
1754    }
1755
1756    proptest! {
1757        #![proptest_config(NO_PERSIST)]
1758        #[test]
1759        fn test_brace(
1760            brace in r#"[\[\{\}\]]"#,
1761            // comment inserted to add a closing " since VSCode thinks prior quote is still open.
1762            any_chars in r#"\PC*"#,
1763        ) {
1764            test_regex(
1765                *BRACE,
1766                RegexTest { matches: &brace, trailing: &any_chars, ..Default::default() },
1767            )
1768            .unwrap();
1769        }
1770    }
1771
1772    proptest! {
1773        #![proptest_config(NO_PERSIST)]
1774        #[test]
1775        fn test_comma(
1776            comma in r#","#,
1777            any_chars in r#"\PC*"#,
1778        ) {
1779            test_regex(
1780                *COMMA,
1781                RegexTest { matches: &comma, trailing: &any_chars, ..Default::default() },
1782            )
1783            .unwrap();
1784        }
1785    }
1786
1787    proptest! {
1788        #![proptest_config(NO_PERSIST)]
1789        #[test]
1790        fn test_colon(
1791            colon in r#":"#,
1792            any_chars in r#"\PC*"#,
1793        ) {
1794            test_capture(
1795                &*COLON,
1796                None,
1797                RegexTest { matches: &colon, trailing: &any_chars, ..Default::default() },
1798            )
1799            .unwrap();
1800        }
1801    }
1802
1803    #[test]
1804    fn test_regex_line_comment() {
1805        test_regex(
1806            *LINE_COMMENT_SLASHES,
1807            RegexTest {
1808                matches: "//",
1809                next_regex: Some(&*LINE_COMMENT),
1810                next_matches: " some line comment",
1811                trailing: "",
1812                ..Default::default()
1813            },
1814        )
1815        .unwrap();
1816
1817        test_regex(
1818            *LINE_COMMENT_SLASHES,
1819            RegexTest {
1820                matches: "//",
1821                next_regex: Some(&*LINE_COMMENT),
1822                next_matches: "    some line comment",
1823                trailing: "\n  more lines",
1824                ..Default::default()
1825            },
1826        )
1827        .unwrap();
1828
1829        test_regex(
1830            *LINE_COMMENT_SLASHES,
1831            RegexTest {
1832                matches: "//",
1833                next_regex: Some(&*LINE_COMMENT),
1834                trailing: "\nan empty line comment",
1835                ..Default::default()
1836            },
1837        )
1838        .unwrap();
1839
1840        test_regex(
1841            *LINE_COMMENT_SLASHES,
1842            RegexTest {
1843                matches: "//",
1844                next_regex: Some(&*LINE_COMMENT),
1845                next_matches: "/\t    some doc comment",
1846                trailing: "\nmultiple lines\nare here\n",
1847                ..Default::default()
1848            },
1849        )
1850        .unwrap();
1851    }
1852
1853    #[test]
1854    fn test_regex_block_comment() {
1855        test_regex(
1856            *OPEN_BLOCK_COMMENT,
1857            RegexTest {
1858                matches: "/*",
1859                next_regex: Some(&*BLOCK_COMMENT),
1860                next_matches: " this is a single line block comment ",
1861                next_suffix: "*/",
1862                trailing: "\n\nproperty: ignored",
1863                ..Default::default()
1864            },
1865        )
1866        .unwrap();
1867
1868        test_regex(
1869            *OPEN_BLOCK_COMMENT,
1870            RegexTest {
1871                matches: "/*",
1872                next_regex: Some(&*BLOCK_COMMENT),
1873                next_matches: " this is a
1874            multiline block comment",
1875                next_suffix: "*/",
1876                trailing: "\n\nproperty: ignored",
1877                ..Default::default()
1878            },
1879        )
1880        .unwrap();
1881
1882        test_regex(
1883            *OPEN_BLOCK_COMMENT,
1884            RegexTest {
1885                matches: "/*",
1886                next_regex: Some(&*BLOCK_COMMENT),
1887                next_matches: "",
1888                next_suffix: "*/",
1889                trailing: " to test an empty block comment",
1890                ..Default::default()
1891            },
1892        )
1893        .unwrap();
1894    }
1895
1896    #[test]
1897    fn test_regex_non_string_primitive() {
1898        test_regex(*NON_STRING_PRIMITIVE, RegexTest { matches: "null", ..Default::default() })
1899            .unwrap();
1900
1901        test_regex(
1902            *NON_STRING_PRIMITIVE,
1903            RegexTest { matches: "NULL", error: Some("capture failed"), ..Default::default() },
1904        )
1905        .unwrap();
1906
1907        test_regex(
1908            *NON_STRING_PRIMITIVE,
1909            RegexTest { matches: "nullify", error: Some("capture failed"), ..Default::default() },
1910        )
1911        .unwrap();
1912
1913        test_regex(*NON_STRING_PRIMITIVE, RegexTest { matches: "true", ..Default::default() })
1914            .unwrap();
1915
1916        test_regex(
1917            *NON_STRING_PRIMITIVE,
1918            RegexTest { matches: "True", error: Some("capture failed"), ..Default::default() },
1919        )
1920        .unwrap();
1921
1922        test_regex(
1923            *NON_STRING_PRIMITIVE,
1924            RegexTest { matches: "truest", error: Some("capture failed"), ..Default::default() },
1925        )
1926        .unwrap();
1927
1928        test_regex(*NON_STRING_PRIMITIVE, RegexTest { matches: "false", ..Default::default() })
1929            .unwrap();
1930
1931        for prefix in &["", "-", "+"] {
1932            for exp_prefix in &["", "-", "+"] {
1933                test_regex(
1934                    *NON_STRING_PRIMITIVE,
1935                    RegexTest {
1936                        matches: &(prefix.to_string() + "123e" + exp_prefix + "456"),
1937                        ..Default::default()
1938                    },
1939                )
1940                .unwrap();
1941
1942                test_regex(
1943                    *NON_STRING_PRIMITIVE,
1944                    RegexTest {
1945                        matches: &(prefix.to_string() + "123E" + exp_prefix + "456"),
1946                        ..Default::default()
1947                    },
1948                )
1949                .unwrap();
1950            }
1951
1952            test_regex(
1953                *NON_STRING_PRIMITIVE,
1954                RegexTest { matches: &(prefix.to_string() + "0x1a2b3e4f"), ..Default::default() },
1955            )
1956            .unwrap();
1957
1958            test_regex(
1959                *NON_STRING_PRIMITIVE,
1960                RegexTest { matches: &(prefix.to_string() + "0X1a2b3e4f"), ..Default::default() },
1961            )
1962            .unwrap();
1963
1964            test_regex(
1965                *NON_STRING_PRIMITIVE,
1966                RegexTest { matches: &(prefix.to_string() + "0x1A2B3E4F"), ..Default::default() },
1967            )
1968            .unwrap();
1969
1970            test_regex(
1971                *NON_STRING_PRIMITIVE,
1972                RegexTest { matches: &(prefix.to_string() + "0X1a2B3e4F"), ..Default::default() },
1973            )
1974            .unwrap();
1975
1976            test_regex(
1977                *NON_STRING_PRIMITIVE,
1978                RegexTest {
1979                    matches: &(prefix.to_string() + "0x1a2b3e4fg"),
1980                    error: Some("capture failed"),
1981                    ..Default::default()
1982                },
1983            )
1984            .unwrap();
1985
1986            test_regex(
1987                *NON_STRING_PRIMITIVE,
1988                RegexTest {
1989                    matches: &(prefix.to_string() + "0X"),
1990                    error: Some("capture failed"),
1991                    ..Default::default()
1992                },
1993            )
1994            .unwrap();
1995
1996            test_regex(*NON_STRING_PRIMITIVE, RegexTest { matches: "NaN", ..Default::default() })
1997                .unwrap();
1998
1999            test_regex(
2000                *NON_STRING_PRIMITIVE,
2001                RegexTest { matches: "NAN", error: Some("capture failed"), ..Default::default() },
2002            )
2003            .unwrap();
2004
2005            test_regex(
2006                *NON_STRING_PRIMITIVE,
2007                RegexTest { matches: "NaN0", error: Some("capture failed"), ..Default::default() },
2008            )
2009            .unwrap();
2010
2011            test_regex(
2012                *NON_STRING_PRIMITIVE,
2013                RegexTest { matches: "Infinity", ..Default::default() },
2014            )
2015            .unwrap();
2016
2017            test_regex(
2018                *NON_STRING_PRIMITIVE,
2019                RegexTest {
2020                    matches: "infinity",
2021                    error: Some("capture failed"),
2022                    ..Default::default()
2023                },
2024            )
2025            .unwrap();
2026
2027            test_regex(
2028                *NON_STRING_PRIMITIVE,
2029                RegexTest {
2030                    matches: "Infinity_",
2031                    error: Some("capture failed"),
2032                    ..Default::default()
2033                },
2034            )
2035            .unwrap();
2036
2037            test_regex(
2038                *NON_STRING_PRIMITIVE,
2039                RegexTest { matches: &(prefix.to_string() + "0"), ..Default::default() },
2040            )
2041            .unwrap();
2042
2043            test_regex(
2044                *NON_STRING_PRIMITIVE,
2045                RegexTest {
2046                    matches: &(prefix.to_string() + "1234567890123456789012345678901234567890"),
2047                    ..Default::default()
2048                },
2049            )
2050            .unwrap();
2051
2052            test_regex(
2053                *NON_STRING_PRIMITIVE,
2054                RegexTest { matches: &(prefix.to_string() + "12345.67890"), ..Default::default() },
2055            )
2056            .unwrap();
2057
2058            test_regex(
2059                *NON_STRING_PRIMITIVE,
2060                RegexTest { matches: &(prefix.to_string() + ".67890"), ..Default::default() },
2061            )
2062            .unwrap();
2063
2064            test_regex(
2065                *NON_STRING_PRIMITIVE,
2066                RegexTest { matches: &(prefix.to_string() + "12345."), ..Default::default() },
2067            )
2068            .unwrap();
2069        }
2070    }
2071
2072    #[test]
2073    fn test_regex_unquoted_property_name() {
2074        test_regex(
2075            *UNQUOTED_PROPERTY_NAME_AND_COLON,
2076            RegexTest {
2077                matches: "propname",
2078                suffix: ":",
2079                trailing: " 'some property value',",
2080                ..Default::default()
2081            },
2082        )
2083        .unwrap();
2084
2085        test_regex(
2086            *UNQUOTED_PROPERTY_NAME_AND_COLON,
2087            RegexTest {
2088                matches: "propname",
2089                suffix: "   :",
2090                trailing: " 'some property value',",
2091                ..Default::default()
2092            },
2093        )
2094        .unwrap();
2095
2096        test_regex(
2097            *UNQUOTED_PROPERTY_NAME_AND_COLON,
2098            RegexTest {
2099                error: Some("capture failed"),
2100                // error: Some(&format!(
2101                //     "capture group {} did not match",
2102                //     *UNQUOTED_PROPERTY_NAME_AND_COLON
2103                // )),
2104                matches: "99propname",
2105                suffix: ":",
2106                trailing: " 'property names do not start with digits,",
2107                ..Default::default()
2108            },
2109        )
2110        .unwrap();
2111    }
2112
2113    #[test]
2114    fn test_regex_string() {
2115        test_regex(
2116            *OPEN_QUOTE,
2117            RegexTest {
2118                matches: "'",
2119                next_regex: Some(&*SINGLE_QUOTED),
2120                next_matches: "this is a simple single-quoted string",
2121                next_suffix: "'",
2122                trailing: "",
2123                ..Default::default()
2124            },
2125        )
2126        .unwrap();
2127
2128        test_regex(
2129            *OPEN_QUOTE,
2130            RegexTest {
2131                matches: "'",
2132                next_regex: Some(&*SINGLE_QUOTED),
2133                next_matches: " this is a \\
2134            multiline \"text\" string",
2135                next_suffix: "'",
2136                trailing: ", end of value",
2137                ..Default::default()
2138            },
2139        )
2140        .unwrap();
2141
2142        test_regex(
2143            *OPEN_QUOTE,
2144            RegexTest {
2145                matches: "\"",
2146                next_regex: Some(&*DOUBLE_QUOTED),
2147                next_matches: "this is a simple double-quoted string",
2148                next_suffix: "\"",
2149                trailing: "",
2150                ..Default::default()
2151            },
2152        )
2153        .unwrap();
2154
2155        test_regex(
2156            *OPEN_QUOTE,
2157            RegexTest {
2158                matches: "\"",
2159                next_regex: Some(&*DOUBLE_QUOTED),
2160                next_matches: " this is a \\
2161            multiline 'text' string with escaped \\\" double-quote",
2162                next_suffix: "\"",
2163                trailing: ", end of value",
2164                ..Default::default()
2165            },
2166        )
2167        .unwrap();
2168
2169        test_regex(
2170            *OPEN_QUOTE,
2171            RegexTest {
2172                matches: "\"",
2173                next_regex: Some(&*DOUBLE_QUOTED),
2174                next_matches: "",
2175                next_suffix: "\"",
2176                trailing: ", to test empty string",
2177                ..Default::default()
2178            },
2179        )
2180        .unwrap();
2181    }
2182
2183    #[test]
2184    fn test_regex_braces() {
2185        test_regex(*BRACE, RegexTest { matches: "[", trailing: " 1234 ]", ..Default::default() })
2186            .unwrap();
2187
2188        test_regex(*BRACE, RegexTest { matches: "[", trailing: "true]", ..Default::default() })
2189            .unwrap();
2190
2191        test_regex(
2192            *BRACE,
2193            RegexTest { matches: "[", trailing: "\n  'item',\n  'item2'\n]", ..Default::default() },
2194        )
2195        .unwrap();
2196
2197        test_regex(*BRACE, RegexTest { matches: "]", trailing: ",[1234],", ..Default::default() })
2198            .unwrap();
2199
2200        test_regex(*BRACE, RegexTest { matches: "{", trailing: " 1234 }", ..Default::default() })
2201            .unwrap();
2202
2203        test_regex(*BRACE, RegexTest { matches: "{", trailing: "true}", ..Default::default() })
2204            .unwrap();
2205
2206        test_regex(
2207            *BRACE,
2208            RegexTest { matches: "{", trailing: "\n  'item',\n  'item2'\n}", ..Default::default() },
2209        )
2210        .unwrap();
2211
2212        test_regex(*BRACE, RegexTest { matches: "}", trailing: ",{1234},", ..Default::default() })
2213            .unwrap();
2214    }
2215
2216    #[test]
2217    fn test_regex_command_colon() {
2218        test_regex(
2219            *COMMA,
2220            RegexTest { matches: ",", trailing: "\n  'item',\n  'item2'\n}", ..Default::default() },
2221        )
2222        .unwrap();
2223
2224        test_regex(*COMMA, RegexTest { matches: ",", trailing: "{1234},", ..Default::default() })
2225            .unwrap();
2226
2227        test_capture(&*COLON, None, RegexTest { matches: ":", ..Default::default() }).unwrap();
2228
2229        test_capture(&*COLON, None, RegexTest { matches: "  \t :", ..Default::default() }).unwrap();
2230
2231        test_capture(
2232            &*COLON,
2233            None,
2234            RegexTest { error: Some("capture failed"), matches: " \n :", ..Default::default() },
2235        )
2236        .unwrap();
2237    }
2238
2239    #[test]
2240    fn test_enums() {
2241        let line_comment = Comment::Line("a line comment".to_owned());
2242        assert!(line_comment.is_line());
2243
2244        let block_comment =
2245            Comment::Block { lines: vec!["a block".into(), "comment".into()], align: true };
2246        assert!(block_comment.is_block());
2247
2248        let primitive_value = Primitive::new("l33t".to_owned(), vec![]);
2249        assert!(primitive_value.is_primitive());
2250
2251        let array_value = Array::new(vec![]);
2252        assert!(array_value.is_array());
2253
2254        let object_value = Object::new(vec![]);
2255        assert!(object_value.is_object());
2256    }
2257
2258    #[test]
2259    fn test_document_exceeds_nesting_limit() {
2260        let mut parser = Parser::new(&None);
2261        parser.set_nesting_limit(5);
2262        let good_buffer = r##"{
2263    list_of_lists_of_lists: [[[]]]
2264}"##;
2265        parser.parse_from_location(&good_buffer, 8, 15).expect("should NOT exceed nesting limit");
2266
2267        let bad_buffer = r##"{
2268    list_of_lists_of_lists: [[[[]]]]
2269}"##;
2270        let err = parser
2271            .parse_from_location(&bad_buffer, 8, 15)
2272            .expect_err("should exceed nesting limit");
2273        match err {
2274            Error::Parse(_, message) => {
2275                assert_eq!(
2276                    message,
2277                    r##"The given JSON5 document exceeds the parser's nesting limit of 5:
2278    list_of_lists_of_lists: [[[[]]]]
2279                               ^"##
2280                )
2281            }
2282            _ => panic!("expected a parser error"),
2283        }
2284    }
2285
2286    #[test]
2287    fn test_parse_from_location_error_location() {
2288        let filename = Some("mixed_content.md".to_string());
2289        let mixed_document = r##"
2290Mixed Content Doc
2291=================
2292
2293This is a document with embedded JSON5 content.
2294
2295```json5
2296json5_value = {
2297    // The next line should generate a parser error
2298    999,
2299}
2300```
2301
2302End of mixed content document.
2303"##;
2304        let json5_slice =
2305            &mixed_document[mixed_document.find("{").unwrap()..mixed_document.find("}").unwrap()];
2306        let mut parser = Parser::new(&filename);
2307        let err = parser
2308            .parse_from_location(json5_slice, 8, 15)
2309            .expect_err("check error message for location");
2310        match err {
2311            Error::Parse(Some(loc), message) => {
2312                assert_eq!(loc.file, Some("mixed_content.md".to_owned()));
2313                assert_eq!(loc.line, 10);
2314                assert_eq!(loc.col, 5);
2315                assert_eq!(
2316                    message,
2317                    r##"Object values require property names:
2318    999,
2319    ^~~"##
2320                )
2321            }
2322            _ => panic!("expected a parser error"),
2323        }
2324    }
2325
2326    #[test]
2327    fn test_doc_with_nulls() {
2328        let mut parser = Parser::new(&None);
2329        let buffer = "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[////[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]";
2330        let err = parser.parse(&buffer).expect_err("should fail");
2331        match err {
2332            Error::Parse(_, message) => {
2333                assert!(message.starts_with("Mismatched braces in the document:"));
2334            }
2335            _ => panic!("expected a parser error"),
2336        }
2337    }
2338}