1#![deny(missing_docs)]
6use {
7 crate::{content::*, error::*},
8 lazy_static::lazy_static,
9 regex::{CaptureLocations, Match, Regex},
10 std::cell::RefCell,
11 std::rc::Rc,
12};
13
14fn from_start(regex: &str) -> String {
19 "^".to_owned() + regex
20}
21
22fn exact_match(regex: &str) -> String {
24 "^".to_owned() + regex + "$"
25}
26
27lazy_static! {
28
29 static ref WHITESPACE_PATTERN: &'static str = r#"([\s&&[^\n]]+)"#;
31 static ref WHITESPACE: usize = 1;
32
33 static ref NEWLINE_PATTERN: &'static str = r#"(\n)"#;
35 static ref NEWLINE: usize = 2;
36
37 static ref LINE_COMMENT_SLASHES_PATTERN: &'static str = r#"(//)"#;
40 static ref LINE_COMMENT_SLASHES: usize = 3;
41
42 static ref OPEN_BLOCK_COMMENT_PATTERN: &'static str = r#"(/\*)"#;
44 static ref OPEN_BLOCK_COMMENT: usize = 4;
45
46 static ref NON_STRING_PRIMITIVE_PATTERN: &'static str =
48 r#"((?x) # ignore whitespace and allow '#' comments
49
50 # Capture null, true, or false (lowercase only, as in the ECMAScript keywords).
51 # End with a word boundary ('\b' marker) to ensure the pattern does not match if
52 # it is followed by a word ('\w') character; for example, 'nullify' is a valid
53 # identifier (depending on the context) and must not match the 'null' value.
54
55 (?:(?:null|true|false)\b)|
56
57 # Capture all number formats. Every variant is allowed an optional '-' or '+' prefix.
58
59 (?:[-+]?(?:
60
61 # All of the following variants end in a word character. Use '\b' to prevent
62 # matching numbers immediately followed by another word character, for example,
63 # 'NaNo', 'Infinity_', or '0xadef1234ghi'.
64
65 (?:(?:
66 NaN|
67 Infinity|
68
69 # hexadecimal notation
70 (?:0[xX][0-9a-fA-F]+)|
71
72 # decimal exponent notation
73 (?:(?:0|(?:[1-9][0-9]*))?\.[0-9]+[eE][+-]?[0-9]+)|
74
75 # integer exponent notation with optional trailing decimal point
76 (?:(?:0|(?:[1-9][0-9]*))\.?[eE][+-]?[0-9]+)|
77
78 # decimal notation
79 (?:(?:0|(?:[1-9][0-9]*))?\.[0-9]+)
80 )\b)|
81
82 # Capture integers, with an optional trailing decimal point.
83 # If the value ends in a digit (no trailing decimal point), apply `\b` to prevent
84 # matching integers immediatly followed by a word character (for example, 1200PDT).
85 # But if the integer has a trailing decimal, the '\b' does not apply. (Since '.' is
86 # not itself a '\w' word character, the '\b' would have the opposite affect,
87 # matching only if the next character is a word character, unless there is no next
88 # character.)
89
90 (?:
91 (?:0|(?:[1-9][0-9]*))(?:\.|\b)
92 )
93 ))
94 )"#;
95 static ref NON_STRING_PRIMITIVE: usize = 5;
96
97 static ref UNQUOTED_PROPERTY_NAME_PATTERN: &'static str = r#"[\$\w&&[^\d]][\$\w]*"#;
99 static ref UNQUOTED_PROPERTY_NAME_REGEX: Regex =
100 Regex::new(&exact_match(&*UNQUOTED_PROPERTY_NAME_PATTERN)).unwrap();
101
102 static ref UNQUOTED_PROPERTY_NAME_AND_COLON_PATTERN_STRING: String =
103 r#"(?:("#.to_owned() + *UNQUOTED_PROPERTY_NAME_PATTERN + r#")[\s&&[^\n]]*:)"#;
104 static ref UNQUOTED_PROPERTY_NAME_AND_COLON_PATTERN: &'static str =
105 &UNQUOTED_PROPERTY_NAME_AND_COLON_PATTERN_STRING;
106 static ref UNQUOTED_PROPERTY_NAME_AND_COLON: usize = 6;
107
108 static ref OPEN_QUOTE_PATTERN: &'static str = r#"(["'])"#;
110 static ref OPEN_QUOTE: usize = 7;
111
112 static ref BRACE_PATTERN: &'static str = r#"([{}\[\]])"#;
114 static ref BRACE: usize = 8;
115
116 static ref COMMA_PATTERN: &'static str = r#"(,)"#;
118 static ref COMMA: usize = 9;
119
120 static ref NEXT_TOKEN: Regex = Regex::new(
124 &from_start(&(r#"(?:"#.to_owned()
125 + &vec![
126 *WHITESPACE_PATTERN,
127 *NEWLINE_PATTERN,
128 *LINE_COMMENT_SLASHES_PATTERN,
129 *OPEN_BLOCK_COMMENT_PATTERN,
130 *NON_STRING_PRIMITIVE_PATTERN,
131 *UNQUOTED_PROPERTY_NAME_AND_COLON_PATTERN,
132 *OPEN_QUOTE_PATTERN,
133 *BRACE_PATTERN,
134 *COMMA_PATTERN,
135 ].join("|")
136 + r#")"#))
137 ).unwrap();
138
139 static ref LINE_COMMENT: Regex = Regex::new(
141 &from_start(r#"([^\n]*)"#)
142 ).unwrap();
143
144 static ref BLOCK_COMMENT: Regex = Regex::new(
146 &from_start(r#"((?:.|\n)*?)\*/"#)
147 ).unwrap();
148
149 static ref SINGLE_QUOTED: Regex = Regex::new(
151 &from_start(r#"((?:(?:\\\\)|(?:\\')|(?:\\\n)|(?:[^'\n]))*)(?:')"#)
152 ).unwrap();
153
154 static ref DOUBLE_QUOTED: Regex = Regex::new(
156 &from_start(r#"((?:(?:\\\\)|(?:\\")|(?:\\\n)|(?:[^"\n]))*)(?:")"#)
157 ).unwrap();
158
159 static ref COLON: Regex = Regex::new(
164 &from_start(r#"([\s&&[^\n]]*:)"#)
165 ).unwrap();
166}
167
168fn matches_unquoted_property_name(strval: &str) -> bool {
169 const KEYWORDS: &'static [&'static str] = &["true", "false", "null"];
170 UNQUOTED_PROPERTY_NAME_REGEX.is_match(strval) && !KEYWORDS.contains(&strval)
171}
172
173struct Capturer {
174 regex: &'static Regex,
175 overall_match: Option<String>,
176 locations: CaptureLocations,
177}
178
179impl Capturer {
180 fn new(regex: &'static Regex) -> Self {
181 Self { regex, overall_match: None, locations: regex.capture_locations() }
182 }
183
184 fn capture<'a>(&mut self, text: &'a str) -> Option<Match<'a>> {
185 let captures = self.regex.captures_read(&mut self.locations, text);
186 if let Some(captures) = &captures {
187 self.overall_match = Some(text[0..captures.end()].to_string());
188 } else {
189 self.overall_match = None;
190 }
191 captures
192 }
193
194 fn overall_match<'a>(&'a self) -> Option<&'a str> {
195 self.overall_match.as_deref()
196 }
197
198 fn captured<'a>(&'a self, i: usize) -> Option<&'a str> {
199 if let (Some(overall_match), Some((start, end))) =
200 (&self.overall_match, self.locations.get(i))
201 {
202 Some(&overall_match[start..end])
203 } else {
204 None
205 }
206 }
207}
208
209struct ParserErrorContext {
215 line: String,
217
218 indicator_start: usize,
220
221 indicator_len: usize,
224}
225
226impl ParserErrorContext {
227 fn new(line: String, indicator_start: usize, indicator_len: usize) -> Self {
228 assert!(indicator_len >= 1);
229 Self { line, indicator_start, indicator_len }
230 }
231
232 fn line(&self) -> &str {
233 &self.line
234 }
235
236 fn indicator(&self) -> String {
237 let mut line = " ".repeat(self.indicator_start) + "^";
238 if self.indicator_len > 1 {
239 line += &"~".repeat(self.indicator_len - 1);
240 }
241 line
242 }
243}
244
245pub(crate) struct Parser<'parser> {
246 remaining: &'parser str,
248
249 filename: &'parser Option<String>,
251
252 current_line: &'parser str,
254
255 next_line: &'parser str,
257
258 line_number: usize,
260
261 column_number: usize,
263
264 next_line_number: usize,
266
267 next_column_number: usize,
269
270 scope_stack: Vec<Rc<RefCell<Value>>>,
274
275 nesting_limit: usize,
278
279 colon_capturer: Capturer,
281}
282
283impl<'parser> Parser<'parser> {
284 pub const DEFAULT_NESTING_LIMIT: usize = 1000;
286
287 pub fn new(filename: &'parser Option<String>) -> Self {
288 let remaining = "";
289 let current_line = &remaining;
290 Self {
291 remaining,
292 filename,
293 current_line,
294 next_line: current_line,
295 line_number: 1,
296 column_number: 1,
297 next_line_number: 1,
298 next_column_number: 1,
299 scope_stack: Vec::default(),
300 nesting_limit: Self::DEFAULT_NESTING_LIMIT,
301 colon_capturer: Capturer::new(&COLON),
302 }
303 }
304
305 pub fn set_nesting_limit(&mut self, new_limit: usize) {
309 self.nesting_limit = new_limit;
310 }
311
312 fn current_scope(&self) -> Rc<RefCell<Value>> {
313 assert!(self.scope_stack.len() > 0);
314 self.scope_stack.last().unwrap().clone()
315 }
316
317 fn with_container<F, T>(&self, f: F) -> Result<T, Error>
318 where
319 F: FnOnce(&mut dyn Container) -> Result<T, Error>,
320 {
321 match &mut *self.current_scope().borrow_mut() {
322 Value::Array { val, .. } => f(val),
323 Value::Object { val, .. } => f(val),
324 unexpected => Err(Error::internal(
325 self.location(),
326 format!(
327 "Current scope should be an Array or Object, but scope was {:?}",
328 unexpected
329 ),
330 )),
331 }
332 }
333
334 fn with_array<F, T>(&self, f: F) -> Result<T, Error>
335 where
336 F: FnOnce(&mut Array) -> Result<T, Error>,
337 {
338 match &mut *self.current_scope().borrow_mut() {
339 Value::Array { val, .. } => f(val),
340 unexpected => Err(self.error(format!(
341 "Invalid Array token found while parsing an {:?} (mismatched braces?)",
342 unexpected
343 ))),
344 }
345 }
346
347 fn with_object<F, T>(&self, f: F) -> Result<T, Error>
348 where
349 F: FnOnce(&mut Object) -> Result<T, Error>,
350 {
351 match &mut *self.current_scope().borrow_mut() {
352 Value::Object { val, .. } => f(val),
353 unexpected => Err(self.error(format!(
354 "Invalid Object token found while parsing an {:?} (mismatched braces?)",
355 unexpected
356 ))),
357 }
358 }
359
360 fn is_in_array(&self) -> bool {
361 (*self.current_scope().borrow()).is_array()
362 }
363
364 fn is_in_object(&self) -> bool {
365 !self.is_in_array()
366 }
367
368 fn add_value(&mut self, value: Value) -> Result<(), Error> {
369 let is_container = value.is_object() || value.is_array();
370 let value_ref = Rc::new(RefCell::new(value));
371 self.with_container(|container| container.add_value(value_ref.clone(), self))?;
372 if is_container {
373 self.scope_stack.push(value_ref.clone());
374 if self.scope_stack.len() > self.nesting_limit {
375 return Err(self.error(format!(
376 "The given JSON5 document exceeds the parser's nesting limit of {}",
377 self.nesting_limit
378 )));
379 }
380 }
381 Ok(())
382 }
383
384 fn on_newline(&mut self) -> Result<(), Error> {
385 self.with_container(|container| container.on_newline())
386 }
387
388 fn add_line_comment(
401 &self,
402 captured: Option<&str>,
403 pending_new_line_comment_block: bool,
404 ) -> Result<bool, Error> {
405 match captured {
406 Some(content) => {
407 let content = content.trim_end();
408 self.with_container(|container| {
409 container.add_line_comment(
410 content,
411 self.column_number,
412 pending_new_line_comment_block,
413 )
414 })
415 }
416 None => Err(Error::internal(
417 self.location(),
418 "Line comment regex should support empty line comment",
419 )),
420 }
421 }
422
423 fn add_block_comment(&self, captured: Option<&str>) -> Result<(), Error> {
424 match captured {
425 Some(content) => {
426 let indent_count = self.column_number - 3;
430 let indent = " ".repeat(indent_count);
431 if content
432 .lines()
433 .enumerate()
434 .find(|(index, line)| {
435 *index > 0 && !line.starts_with(&indent) && line.trim() != ""
436 })
437 .is_some()
438 {
439 self.with_container(|container| {
440 container.add_block_comment(Comment::Block {
441 lines: content.lines().map(|line| line.to_owned()).collect(),
442 align: false,
443 })
444 })
445 } else {
446 let trimmed_lines = content
449 .lines()
450 .enumerate()
451 .map(|(index, line)| {
452 if index == 0 {
453 line
454 } else if line.trim().len() == 0 {
455 ""
456 } else {
457 &line[indent_count..]
458 }
459 })
460 .collect::<Vec<&str>>();
461 self.with_container(|container| {
462 container.add_block_comment(Comment::Block {
463 lines: trimmed_lines.iter().map(|line| line.to_string()).collect(),
464 align: true,
465 })
466 })
467 }
468 }
469 None => return Err(self.error("Block comment started without closing \"*/\"")),
470 }
471 }
472
473 fn take_pending_comments(&mut self) -> Result<Vec<Comment>, Error> {
474 self.with_container(|container| Ok(container.take_pending_comments()))
475 }
476
477 fn set_pending_property(&self, name: &str) -> Result<(), Error> {
483 self.with_object(|object| object.set_pending_property(name.to_string(), self))
484 }
485
486 fn add_quoted_string(&mut self, quote: &str, captured: Option<&str>) -> Result<(), Error> {
500 match captured {
501 Some(unquoted) => {
502 if self.is_in_object()
503 && !self.with_object(|object| object.has_pending_property())?
504 {
505 let captured = self.colon_capturer.capture(self.remaining);
506 if self.consume_if_matched(captured) {
507 if matches_unquoted_property_name(&unquoted) {
508 self.set_pending_property(unquoted)
509 } else {
510 self.set_pending_property(&format!("{}{}{}", quote, &unquoted, quote))
511 }
512 } else {
513 return Err(self.error("Property name separator (:) missing"));
514 }
515 } else {
516 let comments = self.take_pending_comments()?;
517 self.add_value(Primitive::new(
518 format!("{}{}{}", quote, &unquoted, quote),
519 comments,
520 ))
521 }
522 }
523 None => return Err(self.error("Unclosed string")),
524 }
525 }
526
527 fn add_non_string_primitive(&mut self, non_string_primitive: &str) -> Result<(), Error> {
528 let comments = self.take_pending_comments()?;
529 self.add_value(Primitive::new(non_string_primitive.to_string(), comments))
530 }
531
532 fn on_brace(&mut self, brace: &str) -> Result<(), Error> {
533 match brace {
534 "{" => self.open_object(),
535 "}" => self.close_object(),
536 "[" => self.open_array(),
537 "]" => self.close_array(),
538 unexpected => Err(Error::internal(
539 self.location(),
540 format!("regex returned unexpected brace string: {}", unexpected),
541 )),
542 }
543 }
544
545 fn open_object(&mut self) -> Result<(), Error> {
546 let comments = self.take_pending_comments()?;
547 self.add_value(Object::new(comments))
548 }
549
550 fn exit_scope(&mut self) -> Result<(), Error> {
551 self.scope_stack.pop();
552 if self.scope_stack.is_empty() {
553 Err(self.error("Closing brace without a matching opening brace"))
554 } else {
555 Ok(())
556 }
557 }
558
559 fn close_object(&mut self) -> Result<(), Error> {
560 self.with_object(|object| object.close(self))?;
561 self.exit_scope()
562 }
563
564 fn open_array(&mut self) -> Result<(), Error> {
565 let comments = self.take_pending_comments()?;
566 self.add_value(Array::new(comments))
567 }
568
569 fn close_array(&mut self) -> Result<(), Error> {
570 self.with_array(|array| array.close(self))?;
571 self.exit_scope()
572 }
573
574 fn end_value(&self) -> Result<(), Error> {
575 self.with_container(|container| container.end_value(self))
576 }
577
578 pub fn location(&self) -> Option<Location> {
579 Some(Location::new(self.filename.clone(), self.line_number, self.column_number))
580 }
581
582 pub fn error(&self, err: impl std::fmt::Display) -> Error {
583 const MAX_ERROR_LINE_LEN: usize = 200;
584 const MIN_CONTEXT_LEN: usize = 10;
585 const ELLIPSIS: &str = "\u{2026}";
586 let error_context = self.get_error_context(MAX_ERROR_LINE_LEN, MIN_CONTEXT_LEN, ELLIPSIS);
587 Error::parse(
588 self.location(),
589 format!("{}:\n{}\n{}", err, error_context.line(), error_context.indicator()),
590 )
591 }
592
593 fn consume_if_matched<'a>(&mut self, matched: Option<Match<'a>>) -> bool {
594 self.column_number = self.next_column_number;
595 if self.line_number < self.next_line_number {
596 self.line_number = self.next_line_number;
597 self.current_line = self.next_line;
598 }
599 if let Some(matched) = matched {
600 let matched_and_remaining = &self.remaining[matched.start()..];
601 self.remaining = &self.remaining[matched.end()..];
602
603 let mut some_matched_lines = None;
606 for c in matched.as_str().chars() {
607 if c == '\n' {
608 let matched_lines = some_matched_lines
609 .get_or_insert_with(|| matched_and_remaining.lines().skip(1));
610 self.next_line = matched_lines.next().unwrap_or(self.current_line);
611 self.next_line_number += 1;
612 self.next_column_number = 1;
613 } else {
614 self.next_column_number += 1;
615 }
616 }
617 true
618 } else {
619 false
620 }
621 }
622
623 fn capture(&mut self, capturer: &mut Capturer) -> bool {
624 self.consume_if_matched(capturer.capture(self.remaining))
625 }
626
627 fn consume<'a>(&mut self, capturer: &'a mut Capturer) -> Option<&'a str> {
628 if self.capture(capturer) {
629 capturer.captured(1)
630 } else {
631 None
632 }
633 }
634
635 pub fn parse(&mut self, buffer: &'parser str) -> Result<Array, Error> {
639 self.parse_from_location(buffer, 1, 1)
640 }
641
642 pub fn parse_from_location(
646 &mut self,
647 buffer: &'parser str,
648 starting_line_number: usize,
649 starting_column_number: usize,
650 ) -> Result<Array, Error> {
651 self.remaining = buffer;
652 self.current_line = &self.remaining;
653
654 assert!(starting_line_number > 0, "document line numbers are 1-based");
655 self.next_line_number = starting_line_number;
656 self.next_column_number = starting_column_number;
657
658 self.next_line = self.current_line;
659 self.line_number = self.next_line_number - 1;
660 self.column_number = self.next_column_number - 1;
661 self.scope_stack = vec![Rc::new(RefCell::new(Array::new(vec![])))];
662
663 let mut next_token = Capturer::new(&NEXT_TOKEN);
664 let mut single_quoted = Capturer::new(&SINGLE_QUOTED);
665 let mut double_quoted = Capturer::new(&DOUBLE_QUOTED);
666 let mut line_comment = Capturer::new(&LINE_COMMENT);
667 let mut block_comment = Capturer::new(&BLOCK_COMMENT);
668
669 let mut just_captured_line_comment = false;
676 let mut pending_blank_line = false;
677 let mut pending_new_line_comment_block = false;
678
679 while self.remaining.len() > 0 {
680 let mut reset_line_comment_break_check = true;
682
683 if self.capture(&mut next_token) {
684 if let Some(_) = next_token.captured(*WHITESPACE) {
687 reset_line_comment_break_check = false;
688 Ok(()) } else if let Some(_) = next_token.captured(*NEWLINE) {
690 reset_line_comment_break_check = false;
691 if just_captured_line_comment {
692 if pending_blank_line {
693 pending_new_line_comment_block = true;
694 pending_blank_line = false;
695 } else if !pending_new_line_comment_block {
696 pending_blank_line = true;
697 }
698 }
699 self.on_newline()
700 } else if let Some(_) = next_token.captured(*COMMA) {
701 self.end_value()
702 } else if let Some(brace) = next_token.captured(*BRACE) {
703 self.on_brace(&brace)
704 } else if let Some(non_string_primitive) =
705 next_token.captured(*NON_STRING_PRIMITIVE)
706 {
707 self.add_non_string_primitive(&non_string_primitive)
708 } else if let Some(quote) = next_token.captured(*OPEN_QUOTE) {
709 let quoted_string = if quote == "'" {
710 self.consume(&mut single_quoted)
711 } else {
712 self.consume(&mut double_quoted)
713 };
714 self.add_quoted_string("e, quoted_string)
715 } else if let Some(unquoted_property_name) =
716 next_token.captured(*UNQUOTED_PROPERTY_NAME_AND_COLON)
717 {
718 self.set_pending_property(unquoted_property_name)
719 } else if let Some(_line_comment_start) = next_token.captured(*LINE_COMMENT_SLASHES)
720 {
721 reset_line_comment_break_check = false;
722 pending_blank_line = false;
723 let line_comment = self.consume(&mut line_comment);
724 if self.add_line_comment(line_comment, pending_new_line_comment_block)? {
725 just_captured_line_comment = true;
727 pending_new_line_comment_block = false;
728 } Ok(())
730 } else if let Some(_block_comment_start) = next_token.captured(*OPEN_BLOCK_COMMENT)
731 {
732 let block_comment = self.consume(&mut block_comment);
733 self.add_block_comment(block_comment)
734 } else {
735 Err(Error::internal(
736 self.location(),
737 format!(
738 "NEXT_TOKEN matched an unexpected capture group: {}",
739 next_token.overall_match().unwrap_or("")
740 ),
741 ))
742 }
743 } else {
744 Err(self.error("Unexpected token"))
745 }?;
746
747 if reset_line_comment_break_check {
748 just_captured_line_comment = false;
749 pending_blank_line = false;
750 pending_new_line_comment_block = false;
751 }
752 }
753 self.remaining = "";
754 self.close_document()?;
755
756 match Rc::try_unwrap(self.scope_stack.pop().unwrap())
757 .map_err(|_| Error::internal(None, "Rc<> for document array could not be unwrapped."))?
758 .into_inner()
759 {
760 Value::Array { val, .. } => Ok(val),
761 unexpected => Err(Error::internal(
762 self.location(),
763 format!("Final scope should be an Array, but scope was {:?}", unexpected),
764 )),
765 }
766 }
767
768 fn close_document(&mut self) -> Result<(), Error> {
769 if self.scope_stack.len() == 1 {
770 Ok(())
771 } else {
772 Err(self.error("Mismatched braces in the document"))
773 }
774 }
775
776 fn get_error_context(
783 &self,
784 max_error_line_len: usize,
785 min_context_len: usize,
786 ellipsis: &str,
787 ) -> ParserErrorContext {
788 let error_line_len = self.current_line.chars().count();
789
790 let indicator_start = std::cmp::min(self.column_number - 1, error_line_len);
792
793 let indicator_len = if self.line_number == self.next_line_number {
794 std::cmp::max(
795 std::cmp::min(
796 self.next_column_number - self.column_number,
797 error_line_len - indicator_start,
798 ),
799 1,
800 )
801 } else {
802 1
803 };
804
805 if error_line_len <= max_error_line_len {
806 ParserErrorContext::new(self.current_line.to_owned(), indicator_start, indicator_len)
807 } else {
808 trim_error_line_and_indicator(
809 self.current_line,
810 indicator_start,
811 indicator_len,
812 error_line_len,
813 max_error_line_len,
814 min_context_len,
815 ellipsis,
816 )
817 }
818 }
819}
820
821struct CharRange {
822 range: std::ops::Range<usize>,
823}
824
825impl CharRange {
826 fn new(range: std::ops::Range<usize>) -> Self {
827 Self { range }
828 }
829
830 fn to_byte_range(self, from_string: &str) -> Option<std::ops::Range<usize>> {
831 let char_len = from_string.chars().count();
832 let mut some_start_byte =
833 if self.range.start == char_len { Some(from_string.len()) } else { None };
834 let mut some_end_byte =
835 if self.range.end == char_len { Some(from_string.len()) } else { None };
836 if let (Some(start_byte), Some(end_byte)) = (some_start_byte, some_end_byte) {
837 return Some(start_byte..end_byte);
838 }
839 for (char_pos, (byte_pos, _char)) in from_string.char_indices().enumerate() {
840 if char_pos == self.range.start {
841 if let Some(end_byte) = some_end_byte {
842 return Some(byte_pos..end_byte);
843 }
844 some_start_byte = Some(byte_pos);
845 }
846 if char_pos == self.range.end {
847 if let Some(start_byte) = some_start_byte {
848 return Some(start_byte..byte_pos);
849 }
850 some_end_byte = Some(byte_pos);
851 }
852 }
853 None
854 }
855}
856
857fn trim_error_line_and_indicator(
858 error_line: &str,
859 indicator_start: usize,
860 mut indicator_len: usize,
861 error_line_len: usize,
862 max_error_line_len: usize,
863 min_context_len: usize,
864 ellipsis: &str,
865) -> ParserErrorContext {
866 let ellipsis_len = ellipsis.chars().count();
867
868 assert!(max_error_line_len > ellipsis_len);
869 assert!(max_error_line_len < error_line_len);
870 assert!(
871 indicator_start <= error_line_len,
872 "Error because indicator_start={} > error_line_len={}\n{}",
873 indicator_start,
874 error_line_len,
875 error_line
876 );
877 assert!(
878 indicator_len == 1 || (indicator_start + indicator_len) <= error_line_len,
879 "Error because indicator_start={}, indicator_len={}, error_line_len={}\n{}",
880 indicator_start,
881 indicator_len,
882 error_line_len,
883 error_line
884 );
885
886 indicator_len = std::cmp::min(indicator_len, max_error_line_len);
887
888 let min_right_context_len = std::cmp::max(min_context_len, indicator_len);
889
890 let context_end =
891 std::cmp::min(indicator_start + min_right_context_len, error_line_len - ellipsis_len);
892 if context_end < max_error_line_len - ellipsis_len {
893 let slice_bytes = CharRange::new(0..(max_error_line_len - ellipsis_len))
894 .to_byte_range(error_line)
895 .expect("char indices should map to String bytes");
896 return ParserErrorContext::new(
897 error_line[slice_bytes].to_string() + ellipsis,
898 indicator_start,
899 indicator_len,
900 );
901 }
902
903 let context_start = indicator_start - std::cmp::min(indicator_start, min_context_len);
904 if error_line_len - context_start < max_error_line_len - ellipsis_len {
905 let start_char = error_line_len - (max_error_line_len - ellipsis_len);
906 let slice_bytes = CharRange::new(start_char..error_line_len)
907 .to_byte_range(error_line)
908 .expect("char indices should map to String bytes");
909 return ParserErrorContext::new(
910 ellipsis.to_owned() + &error_line[slice_bytes],
911 (indicator_start + ellipsis_len) - start_char,
912 indicator_len,
913 );
914 }
915
916 let margin_chars =
917 max_error_line_len - std::cmp::min(max_error_line_len, (ellipsis_len * 2) + indicator_len);
918 let right_margin = std::cmp::min(
919 error_line_len - std::cmp::min(error_line_len, indicator_start + indicator_len),
920 margin_chars / 2,
921 );
922 let left_margin = margin_chars - right_margin;
923 let mut start_char = indicator_start - left_margin;
924 let mut end_char =
925 std::cmp::min(indicator_start + indicator_len + right_margin, error_line_len);
926 let mut start_ellipsis = ellipsis;
927 let mut end_ellipsis = ellipsis;
928 if start_char == 0 {
929 start_ellipsis = "";
930 end_char += ellipsis_len;
931 } else if end_char == error_line_len {
932 end_ellipsis = "";
933 start_char -= ellipsis_len;
934 }
935
936 let slice_bytes = CharRange::new(start_char..end_char)
937 .to_byte_range(error_line)
938 .expect("char indices should map to String bytes");
939 ParserErrorContext::new(
940 start_ellipsis.to_owned() + &error_line[slice_bytes] + end_ellipsis,
941 (indicator_start + ellipsis_len) - start_char,
942 indicator_len,
943 )
944}
945
946#[cfg(test)]
947mod tests {
948 use {super::*, crate::test_error, proptest::prelude::*};
949
950 fn gen_error_line_test(
951 error_line: &str,
952 pattern: &str,
953 max_error_line_len: usize,
954 min_context_len: usize,
955 ellipsis: &str,
956 expected_errorline: &str,
957 expected_indicator: &str,
958 ) -> Result<(), String> {
959 let some_newline = pattern.find("\n");
960 let pattern_line1 =
961 if let Some(newline) = some_newline { &pattern[0..newline] } else { &pattern };
962 assert!(pattern_line1.len() > 0);
963 let indicator_start = error_line.find(pattern_line1).expect("pattern not found in line");
964 let end = indicator_start + pattern.len();
965 let indicator_len = end - indicator_start;
966 let error_context = if error_line.chars().count() <= max_error_line_len {
967 ParserErrorContext::new(error_line.to_owned(), indicator_start, indicator_len)
968 } else {
969 trim_error_line_and_indicator(
970 error_line,
971 indicator_start,
972 indicator_len,
973 error_line.chars().count(),
974 max_error_line_len,
975 min_context_len,
976 ellipsis,
977 )
978 };
979 let actual_errorline = error_context.line();
980 let actual_indicator = error_context.indicator();
981 let mut errors = String::new();
982 if expected_errorline != actual_errorline {
983 println!(
984 r#"
985expected_errorline: >>>{}<<< (charlen={})
986 actual_errorline: >>>{}<<< (charlen={} of {}, min context len={})"#,
987 expected_errorline,
988 expected_errorline.chars().count(),
989 actual_errorline,
990 actual_errorline.chars().count(),
991 max_error_line_len,
992 min_context_len,
993 );
994 errors.push_str("actual errorline does not match expected");
995 } else if expected_indicator != actual_indicator {
996 println!(
997 r#"
998 {}"#,
999 actual_errorline,
1000 );
1001 }
1002 if expected_indicator != actual_indicator {
1003 if errors.len() > 0 {
1004 errors.push_str(" and ");
1005 }
1006 println!(
1007 r#"
1008expected_indicator: {}
1009 actual_indicator: {}"#,
1010 expected_indicator, actual_indicator,
1011 );
1012 errors.push_str("actual indicator does not match expected");
1013 } else if expected_errorline != actual_errorline {
1014 println!(
1015 r#"
1016 {}"#,
1017 actual_indicator,
1018 );
1019 }
1020 if errors.len() > 0 {
1021 println!("{}", errors);
1022 Err(errors)
1023 } else {
1024 Ok(())
1025 }
1026 }
1027
1028 #[test]
1029 fn test_error_line1() {
1030 gen_error_line_test(
1031 " good token, bad token;",
1032 "bad",
1033 30,
1034 10,
1035 " ... ",
1036 " good token, bad token;",
1037 " ^~~",
1038 )
1039 .expect("actual should match expected");
1040 }
1041
1042 #[test]
1043 fn test_error_line2() {
1044 gen_error_line_test(
1045 " good token, bad token;",
1046 "token;",
1047 20,
1048 10,
1049 " ... ",
1050 " ... ken, bad token;",
1051 " ^~~~~~",
1052 )
1053 .expect("actual should match expected");
1054 }
1055
1056 #[test]
1057 fn test_error_line2_short_ellipsis() {
1058 gen_error_line_test(
1059 " good token, bad token;",
1060 "token;",
1061 20,
1062 10,
1063 "…",
1064 "…d token, bad token;",
1065 " ^~~~~~",
1066 )
1067 .expect("actual should match expected");
1068 }
1069
1070 #[test]
1071 fn test_error_line3() {
1072 gen_error_line_test(
1073 "A good token, bad token;",
1074 "bad",
1075 20,
1076 10,
1077 " ... ",
1078 " ... en, bad to ... ",
1079 " ^~~",
1080 )
1081 .expect("actual should match expected");
1082 }
1083
1084 #[test]
1085 fn test_error_line3_short_ellipsis() {
1086 gen_error_line_test(
1087 "A good token, bad token;",
1088 "bad",
1089 20,
1090 10,
1091 "…",
1092 "…d token, bad token;",
1093 " ^~~",
1094 )
1095 .expect("actual should match expected");
1096 }
1097
1098 #[test]
1099 fn test_error_line3_escaped_unicode_ellipsis() {
1100 gen_error_line_test(
1101 "A good token, bad token;",
1102 "bad",
1103 20,
1104 10,
1105 "\u{2026}",
1106 "…d token, bad token;",
1107 " ^~~",
1108 )
1109 .expect("actual should match expected");
1110 }
1111
1112 #[test]
1113 fn test_error_line4() {
1114 gen_error_line_test(
1115 "A good token, bad token;",
1116 "bad",
1117 10,
1118 10,
1119 " ... ",
1120 " ... bad ... ",
1121 " ^~~",
1122 )
1123 .expect("actual should match expected");
1124 }
1125
1126 #[test]
1127 fn test_error_line4_short_context() {
1128 gen_error_line_test(
1129 "A good token, bad token;",
1130 "bad",
1131 10,
1132 5,
1133 " ... ",
1134 " ... bad ... ",
1135 " ^~~",
1136 )
1137 .expect("actual should match expected");
1138 }
1139
1140 #[test]
1141 fn test_error_line4_long_pattern() {
1142 gen_error_line_test(
1143 "A good token, bad token;",
1144 "bad token",
1145 10,
1146 10,
1147 " ... ",
1148 " ... bad token ... ",
1149 " ^~~~~~~~~",
1150 )
1151 .expect("actual should match expected");
1152 }
1153
1154 #[test]
1155 fn test_error_line4_long_pattern_short_context_big_ellipsis() {
1156 gen_error_line_test(
1157 "A good token, bad token;",
1158 "bad token",
1159 10,
1160 4,
1161 " ... ",
1162 " ... bad token ... ",
1163 " ^~~~~~~~~",
1164 )
1165 .expect("actual should match expected");
1166 }
1167
1168 #[test]
1169 fn test_error_line4_long_pattern_short_context_short_ellipsis() {
1170 gen_error_line_test(
1171 "A good token, bad token;",
1172 "bad",
1173 10,
1174 4,
1175 "\u{2026}",
1176 "…n, bad t…",
1177 " ^~~",
1178 )
1179 .expect("actual should match expected");
1180 }
1181
1182 #[test]
1183 fn test_error_line5() {
1184 gen_error_line_test(
1185 r#"[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]"#,
1186 "a_prop",
1187 200,
1188 10,
1189 " ... ",
1190 r#"[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]"#,
1191 r#" ^~~~~~"#,
1192 ).expect("actual should match expected");
1193 }
1194
1195 #[test]
1196 fn test_error_line6() {
1197 gen_error_line_test(
1198 r#"[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]"#,
1199 "a_prop",
1200 100,
1201 10,
1202 " ... ",
1203 r#"[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]] ... "#,
1204 r#" ^~~~~~"#,
1205 ).expect("actual should match expected");
1206 }
1207
1208 #[test]
1209 fn test_error_line7() {
1210 gen_error_line_test(
1211 r#"[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]"#,
1212 "a_prop",
1213 100,
1214 5,
1215 " ... ",
1216 r#"[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]] ... "#,
1217 r#" ^~~~~~"#,
1218 ).expect("actual should match expected");
1219 }
1220
1221 #[test]
1222 fn test_error_line7_more_braces() {
1223 gen_error_line_test(
1224 r#"[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]"#,
1225 "a_prop",
1226 100,
1227 10,
1228 " ... ",
1229 r#"[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]] ... "#,
1230 r#" ^~~~~~"#,
1231 ).expect("actual should match expected");
1232 }
1233
1234 #[test]
1235 fn test_error_line8() {
1236 gen_error_line_test(
1237 r#"[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]"#,
1238 "a_prop",
1239 100,
1240 10,
1241 " ... ",
1242 r#" ... [[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]"#,
1243 r#" ^~~~~~"#,
1244 ).expect("actual should match expected");
1245 }
1246
1247 #[test]
1248 fn test_error_line9() {
1249 gen_error_line_test(
1250 r#"[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]"#,
1251 "a_prop",
1252 100,
1253 10,
1254 " ... ",
1255 r#" ... [[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]] ... "#,
1256 r#" ^~~~~~"#,
1257 ).expect("actual should match expected");
1258 }
1259
1260 lazy_static! {
1261 static ref NO_PERSIST: ProptestConfig = ProptestConfig {
1269 failure_persistence: None,
1270 .. ProptestConfig::default()
1271 };
1272
1273 static ref EXTRA_CASES_NO_PERSIST: ProptestConfig = ProptestConfig {
1275 failure_persistence: None,
1276 cases: 1024,
1277 .. ProptestConfig::default()
1278 };
1279 }
1280
1281 struct RegexTest<'a> {
1282 error: Option<&'a str>,
1283 prefix: &'a str,
1284 matches: &'a str,
1285 suffix: &'a str,
1286 next_regex: Option<&'a Regex>,
1287 next_prefix: &'a str,
1288 next_matches: &'a str,
1289 next_suffix: &'a str,
1290 trailing: &'a str,
1291 }
1292
1293 impl<'a> Default for RegexTest<'a> {
1294 fn default() -> Self {
1295 RegexTest {
1296 error: None,
1297 prefix: "",
1298 matches: "",
1299 suffix: "",
1300 next_regex: None,
1301 next_prefix: "",
1302 next_matches: "",
1303 next_suffix: "",
1304 trailing: "",
1305 }
1306 }
1307 }
1308
1309 fn try_capture(
1317 regex: &Regex,
1318 group_id: Option<usize>,
1319 test: RegexTest<'_>,
1320 ) -> Result<String, Error> {
1321 println!();
1322 println!("pattern: '{}'", regex.as_str());
1323
1324 let trailing = test.next_suffix.to_owned() + test.trailing;
1325 let test_string =
1326 test.prefix.to_owned() + test.matches + test.suffix + test.next_matches + &trailing;
1327 println!("capturing from: '{}'", test_string.escape_debug());
1328 println!(
1329 " {}{}{}{}",
1330 " ".repeat(test.prefix.len()),
1331 "^".repeat(test.matches.len()),
1332 " ".repeat(test.suffix.len()),
1333 "^".repeat(test.next_matches.len())
1334 );
1335
1336 let group_id = group_id.unwrap_or(1);
1337 println!("expected capture id: '{}'", group_id);
1338
1339 let capture = regex.captures(&test_string).ok_or_else(|| test_error!("capture failed"))?;
1340 let overall_match = capture.get(0).ok_or_else(|| test_error!("regex did not match"))?;
1341 println!(
1342 "overall match: '{}', length = {}",
1343 overall_match.as_str().escape_debug(),
1344 overall_match.end()
1345 );
1346
1347 let remaining = &test_string[overall_match.end()..];
1348 println!("remaining: '{}'", remaining.escape_debug());
1349
1350 const OVERALL_MATCH: usize = 0;
1351
1352 let mut capture_ids = vec![];
1353 for (index, subcapture) in capture.iter().enumerate() {
1354 if index != OVERALL_MATCH {
1355 if subcapture.is_some() {
1356 capture_ids.push(index);
1357 }
1358 }
1359 }
1360 println!("capture ids = {:?}", capture_ids);
1361
1362 let captured_text = capture
1363 .get(group_id)
1364 .ok_or_else(|| test_error!(format!("capture group {} did not match", group_id)))?
1365 .as_str();
1366 println!("captured: '{}'", captured_text.escape_debug());
1367 assert_eq!(captured_text, test.matches);
1368 assert_eq!(capture_ids.len(), 1);
1369 assert_eq!(remaining, test.next_matches.to_owned() + &trailing);
1370
1371 match test.next_regex {
1372 Some(next_regex) => test_capture(
1373 &*next_regex,
1374 None,
1375 RegexTest {
1376 prefix: test.next_prefix,
1377 matches: test.next_matches,
1378 suffix: test.next_suffix,
1379 trailing: test.trailing,
1380 ..Default::default()
1381 },
1382 ),
1383 None => Ok(captured_text.to_string()),
1384 }
1385 }
1386
1387 fn test_capture(
1388 regex: &Regex,
1389 group_id: Option<usize>,
1390 test: RegexTest<'_>,
1391 ) -> Result<String, Error> {
1392 let expected_error_str = test.error.clone();
1393 match try_capture(regex, group_id, test) {
1394 Ok(captured) => {
1395 println!("SUCCESSFUL CAPTURE! ... '{}'", captured);
1396 Ok(captured)
1397 }
1398 Err(actual_error) => match expected_error_str {
1399 Some(expected_error_str) => match &actual_error {
1400 Error::TestFailure(_location, actual_error_str) => {
1401 if expected_error_str == actual_error_str {
1402 println!("EXPECTED FAILURE (GOOD NEWS)! ... '{}'", actual_error);
1403 Ok(format!("{}", actual_error))
1404 } else {
1405 println!("{}", actual_error);
1406 println!("expected: {}", expected_error_str);
1407 println!(" actual: {}", actual_error_str);
1408 Err(test_error!(
1409 "Actual error string did not match expected error string."
1410 ))
1411 }
1412 }
1413 _unexpected_error_type => {
1414 println!("expected: Test failure: {}", expected_error_str);
1415 println!(" actual: {}", actual_error);
1416 Err(test_error!(
1417 "Actual error type did not match expected test failure type."
1418 ))
1419 }
1420 },
1421 None => Err(actual_error),
1422 },
1423 }
1424 }
1425
1426 fn test_regex(group_id: usize, test: RegexTest<'_>) -> Result<String, Error> {
1427 test_capture(&NEXT_TOKEN, Some(group_id), test)
1428 }
1429
1430 proptest! {
1431 #![proptest_config(NO_PERSIST)]
1432 #[test]
1433 fn test_whitespace_no_newlines(
1434 spaces in r#"[\s&&[^\n]]+"#,
1435 trailing_non_whitespace in r#"[^\s&&[^\n]]*"#,
1436 ) {
1437 test_regex(
1438 *WHITESPACE,
1439 RegexTest {
1440 matches: &spaces,
1441 trailing: &trailing_non_whitespace,
1442 ..Default::default()
1443 }
1444 )
1445 .unwrap();
1446 }
1447 }
1448
1449 proptest! {
1450 #![proptest_config(NO_PERSIST)]
1451 #[test]
1452 fn test_whitespace_until_newline(
1453 spaces in r#"[\s&&[^\n]]+"#,
1454 trailing_non_whitespace in r#"\n[^\s&&[^\n]]*"#,
1455 ) {
1456 test_regex(
1457 *WHITESPACE,
1458 RegexTest {
1459 matches: &spaces,
1460 trailing: &trailing_non_whitespace,
1461 ..Default::default()
1462 }
1463 )
1464 .unwrap();
1465 }
1466 }
1467
1468 proptest! {
1469 #![proptest_config(NO_PERSIST)]
1470 #[test]
1471 fn test_plain_ascii_whitespace_no_newline(
1472 spaces in r#"[ \t]+"#,
1473 trailing_non_whitespace in r#"[^\s&&[^\n]]*"#,
1474 ) {
1475 test_regex(
1476 *WHITESPACE,
1477 RegexTest {
1478 matches: &spaces,
1479 trailing: &trailing_non_whitespace,
1480 ..Default::default()
1481 }
1482 )
1483 .unwrap();
1484 }
1485 }
1486
1487 proptest! {
1488 #![proptest_config(NO_PERSIST)]
1489 #[test]
1490 fn test_newline(
1491 newline in r#"\n"#,
1492 any_chars in r#"\PC*"#,
1493 ) {
1494 test_regex(
1495 *NEWLINE,
1496 RegexTest { matches: &newline, trailing: &any_chars, ..Default::default() },
1497 )
1498 .unwrap();
1499 }
1500 }
1501
1502 proptest! {
1503 #![proptest_config(NO_PERSIST)]
1504 #[test]
1505 fn test_line_comment(
1506 line_comment_prefix in r#"//"#,
1507 line_comment_content in r#"(|[^\n][^\n]*)"#,
1508 more_lines_or_eof in r#"(\n\PC*)?"#,
1509 ) {
1510 test_regex(
1511 *LINE_COMMENT_SLASHES,
1512 RegexTest {
1513 matches: &line_comment_prefix,
1514 next_regex: Some(&*LINE_COMMENT),
1515 next_matches: &line_comment_content,
1516 trailing: &more_lines_or_eof,
1517 ..Default::default()
1518 },
1519 )
1520 .unwrap();
1521 }
1522 }
1523
1524 proptest! {
1525 #![proptest_config(NO_PERSIST)]
1526 #[test]
1527 fn test_empty_line_comment(
1528 line_comment_prefix in r#"//"#,
1529 more_lines_or_eof in r#"(\n\PC*)?"#,
1530 ) {
1531 test_regex(
1532 *LINE_COMMENT_SLASHES,
1533 RegexTest {
1534 matches: &line_comment_prefix,
1535 next_regex: Some(&*LINE_COMMENT),
1536 next_matches: "",
1537 trailing: &more_lines_or_eof,
1538 ..Default::default()
1539 },
1540 )
1541 .unwrap();
1542 }
1543 }
1544
1545 proptest! {
1546 #![proptest_config(NO_PERSIST)]
1547 #[test]
1548 fn test_block_comment(
1549 block_comment_content in r#"([^*]|([*][^*/]))*"#,
1550 optional_trailing_content in r#"\PC*"#,
1551 ) {
1552 test_regex(
1553 *OPEN_BLOCK_COMMENT,
1554 RegexTest {
1555 matches: "/*",
1556 next_regex: Some(&*BLOCK_COMMENT),
1557 next_matches: &block_comment_content,
1558 next_suffix: "*/",
1559 trailing: &optional_trailing_content,
1560
1561 ..Default::default()
1562 },
1563 )
1564 .unwrap();
1565 }
1566 }
1567
1568 proptest! {
1569 #![proptest_config(NO_PERSIST)]
1570 #[test]
1571 fn test_empty_block_comment(
1572 optional_trailing_content in r#"\PC*"#,
1573 ) {
1574 test_regex(
1575 *OPEN_BLOCK_COMMENT,
1576 RegexTest {
1577 matches: "/*",
1578 next_regex: Some(&*BLOCK_COMMENT),
1579 next_matches: "",
1580 next_suffix: "*/",
1581 trailing: &optional_trailing_content,
1582 ..Default::default()
1583 },
1584 )
1585 .unwrap();
1586 }
1587 }
1588
1589 proptest! {
1590 #![proptest_config(NO_PERSIST)]
1591 #[test]
1592 fn test_property_name(
1593 propname in r#"[\w$&&[^\d]][\w$]*"#,
1594 whitespace_to_colon in r#"[\s&&[^\n]]*:"#,
1595 trailing_content in r#"\PC+"#,
1596 ) {
1597 test_regex(
1598 *UNQUOTED_PROPERTY_NAME_AND_COLON,
1599 RegexTest {
1600 matches: &propname,
1601 suffix: &whitespace_to_colon,
1602 trailing: &trailing_content,
1603 ..Default::default()
1604 },
1605 )
1606 .unwrap();
1607 }
1608 }
1609
1610 proptest! {
1635 #![proptest_config(EXTRA_CASES_NO_PERSIST)]
1636 #[test]
1637 fn bad_property_name(
1638 propname in r#"[0-9][\w&&[^0-9eExX]][\w$]*"#,
1639 whitespace_to_colon in r#"[\s&&[^\n]]*:"#,
1640 trailing_content in r#"\PC+"#,
1641 ) {
1642 test_regex(
1643 *UNQUOTED_PROPERTY_NAME_AND_COLON,
1644 RegexTest {
1645 error: Some("capture failed"),
1646 matches: &propname,
1647 suffix: &whitespace_to_colon,
1648 trailing: &trailing_content,
1649 ..Default::default()
1650 },
1651 )
1652 .unwrap();
1653 }
1654 }
1655
1656 proptest! {
1661 #![proptest_config(EXTRA_CASES_NO_PERSIST)]
1662 #[test]
1663 fn bad_property_name_captures_number_first(
1664 propname in r#"[0-9]\$[\w$]*"#,
1665 whitespace_to_colon in r#"[\s&&[^\n]]*:"#,
1666 trailing_content in r#"\PC+"#,
1667 ) {
1668 test_regex(
1669 *UNQUOTED_PROPERTY_NAME_AND_COLON,
1670 RegexTest {
1671 error: Some(
1672 &format!("capture group {} did not match",
1673 *UNQUOTED_PROPERTY_NAME_AND_COLON)
1674 ),
1675 matches: &propname,
1676 suffix: &whitespace_to_colon,
1677 trailing: &trailing_content,
1678 ..Default::default()
1679 },
1680 )
1681 .unwrap();
1682 }
1683 }
1684
1685 proptest! {
1686 #![proptest_config(NO_PERSIST)]
1687 #[test]
1688 fn test_single_quoted_string(
1689 single_quote in r#"'"#,
1690 single_quoted_string in r#"(([^'\\\n])|(\\')|(\\\n)|(\\\\))*"#,
1691 non_literal_trailing_content in r#"\s*[,:/\]\}]"#,
1693 ) {
1694 test_regex(
1695 *OPEN_QUOTE,
1696 RegexTest {
1697 matches: &single_quote,
1698 next_regex: Some(&*SINGLE_QUOTED),
1699 next_matches: &single_quoted_string,
1700 next_suffix: &single_quote,
1701 trailing: &non_literal_trailing_content,
1702 ..Default::default()
1703 },
1704 )
1705 .unwrap();
1706 }
1707 }
1708
1709 proptest! {
1710 #![proptest_config(NO_PERSIST)]
1711 #[test]
1712 fn test_double_quoted_string(
1713 double_quote in r#"""#,
1714 double_quoted_string in r#"(([^"\\\n])|(\\")|(\\\n)|(\\\\))*"#,
1715 non_literal_trailing_content in r#"\s*[,:/\]\}]?\PC*"#,
1717 ) {
1718 test_regex(
1719 *OPEN_QUOTE,
1720 RegexTest {
1721 matches: &double_quote,
1722 next_regex: Some(&*DOUBLE_QUOTED),
1723 next_matches: &double_quoted_string,
1724 next_suffix: &double_quote,
1725 trailing: &non_literal_trailing_content,
1726 ..Default::default()
1727 },
1728 )
1729 .unwrap();
1730 }
1731 }
1732
1733 proptest! {
1734 #![proptest_config(NO_PERSIST)]
1735 #[test]
1736 fn test_non_string_primitive(
1737 non_string_primitive in
1738 concat!(
1739 r#"(null|true|false)|([-+]?(NaN|Infinity|(0[xX][0-9a-fA-F]+)"#,
1740 r#"|((0|([1-9][0-9]*))?\.[0-9]+[eE][+-]?[0-9]+)|((0|([1-9][0-9]*))?\.[0-9]+)|((0|([1-9][0-9]*))\.?)))"#
1741 ),
1742 ends_non_string_primitive in r#"(|([\s,\]\}]\PC*))"#,
1743 ) {
1744 test_regex(
1745 *NON_STRING_PRIMITIVE,
1746 RegexTest {
1747 matches: &non_string_primitive,
1748 trailing: &ends_non_string_primitive,
1749 ..Default::default()
1750 }
1751 )
1752 .unwrap();
1753 }
1754 }
1755
1756 proptest! {
1757 #![proptest_config(NO_PERSIST)]
1758 #[test]
1759 fn test_brace(
1760 brace in r#"[\[\{\}\]]"#,
1761 any_chars in r#"\PC*"#,
1763 ) {
1764 test_regex(
1765 *BRACE,
1766 RegexTest { matches: &brace, trailing: &any_chars, ..Default::default() },
1767 )
1768 .unwrap();
1769 }
1770 }
1771
1772 proptest! {
1773 #![proptest_config(NO_PERSIST)]
1774 #[test]
1775 fn test_comma(
1776 comma in r#","#,
1777 any_chars in r#"\PC*"#,
1778 ) {
1779 test_regex(
1780 *COMMA,
1781 RegexTest { matches: &comma, trailing: &any_chars, ..Default::default() },
1782 )
1783 .unwrap();
1784 }
1785 }
1786
1787 proptest! {
1788 #![proptest_config(NO_PERSIST)]
1789 #[test]
1790 fn test_colon(
1791 colon in r#":"#,
1792 any_chars in r#"\PC*"#,
1793 ) {
1794 test_capture(
1795 &*COLON,
1796 None,
1797 RegexTest { matches: &colon, trailing: &any_chars, ..Default::default() },
1798 )
1799 .unwrap();
1800 }
1801 }
1802
1803 #[test]
1804 fn test_regex_line_comment() {
1805 test_regex(
1806 *LINE_COMMENT_SLASHES,
1807 RegexTest {
1808 matches: "//",
1809 next_regex: Some(&*LINE_COMMENT),
1810 next_matches: " some line comment",
1811 trailing: "",
1812 ..Default::default()
1813 },
1814 )
1815 .unwrap();
1816
1817 test_regex(
1818 *LINE_COMMENT_SLASHES,
1819 RegexTest {
1820 matches: "//",
1821 next_regex: Some(&*LINE_COMMENT),
1822 next_matches: " some line comment",
1823 trailing: "\n more lines",
1824 ..Default::default()
1825 },
1826 )
1827 .unwrap();
1828
1829 test_regex(
1830 *LINE_COMMENT_SLASHES,
1831 RegexTest {
1832 matches: "//",
1833 next_regex: Some(&*LINE_COMMENT),
1834 trailing: "\nan empty line comment",
1835 ..Default::default()
1836 },
1837 )
1838 .unwrap();
1839
1840 test_regex(
1841 *LINE_COMMENT_SLASHES,
1842 RegexTest {
1843 matches: "//",
1844 next_regex: Some(&*LINE_COMMENT),
1845 next_matches: "/\t some doc comment",
1846 trailing: "\nmultiple lines\nare here\n",
1847 ..Default::default()
1848 },
1849 )
1850 .unwrap();
1851 }
1852
1853 #[test]
1854 fn test_regex_block_comment() {
1855 test_regex(
1856 *OPEN_BLOCK_COMMENT,
1857 RegexTest {
1858 matches: "/*",
1859 next_regex: Some(&*BLOCK_COMMENT),
1860 next_matches: " this is a single line block comment ",
1861 next_suffix: "*/",
1862 trailing: "\n\nproperty: ignored",
1863 ..Default::default()
1864 },
1865 )
1866 .unwrap();
1867
1868 test_regex(
1869 *OPEN_BLOCK_COMMENT,
1870 RegexTest {
1871 matches: "/*",
1872 next_regex: Some(&*BLOCK_COMMENT),
1873 next_matches: " this is a
1874 multiline block comment",
1875 next_suffix: "*/",
1876 trailing: "\n\nproperty: ignored",
1877 ..Default::default()
1878 },
1879 )
1880 .unwrap();
1881
1882 test_regex(
1883 *OPEN_BLOCK_COMMENT,
1884 RegexTest {
1885 matches: "/*",
1886 next_regex: Some(&*BLOCK_COMMENT),
1887 next_matches: "",
1888 next_suffix: "*/",
1889 trailing: " to test an empty block comment",
1890 ..Default::default()
1891 },
1892 )
1893 .unwrap();
1894 }
1895
1896 #[test]
1897 fn test_regex_non_string_primitive() {
1898 test_regex(*NON_STRING_PRIMITIVE, RegexTest { matches: "null", ..Default::default() })
1899 .unwrap();
1900
1901 test_regex(
1902 *NON_STRING_PRIMITIVE,
1903 RegexTest { matches: "NULL", error: Some("capture failed"), ..Default::default() },
1904 )
1905 .unwrap();
1906
1907 test_regex(
1908 *NON_STRING_PRIMITIVE,
1909 RegexTest { matches: "nullify", error: Some("capture failed"), ..Default::default() },
1910 )
1911 .unwrap();
1912
1913 test_regex(*NON_STRING_PRIMITIVE, RegexTest { matches: "true", ..Default::default() })
1914 .unwrap();
1915
1916 test_regex(
1917 *NON_STRING_PRIMITIVE,
1918 RegexTest { matches: "True", error: Some("capture failed"), ..Default::default() },
1919 )
1920 .unwrap();
1921
1922 test_regex(
1923 *NON_STRING_PRIMITIVE,
1924 RegexTest { matches: "truest", error: Some("capture failed"), ..Default::default() },
1925 )
1926 .unwrap();
1927
1928 test_regex(*NON_STRING_PRIMITIVE, RegexTest { matches: "false", ..Default::default() })
1929 .unwrap();
1930
1931 for prefix in &["", "-", "+"] {
1932 for exp_prefix in &["", "-", "+"] {
1933 test_regex(
1934 *NON_STRING_PRIMITIVE,
1935 RegexTest {
1936 matches: &(prefix.to_string() + "123e" + exp_prefix + "456"),
1937 ..Default::default()
1938 },
1939 )
1940 .unwrap();
1941
1942 test_regex(
1943 *NON_STRING_PRIMITIVE,
1944 RegexTest {
1945 matches: &(prefix.to_string() + "123E" + exp_prefix + "456"),
1946 ..Default::default()
1947 },
1948 )
1949 .unwrap();
1950 }
1951
1952 test_regex(
1953 *NON_STRING_PRIMITIVE,
1954 RegexTest { matches: &(prefix.to_string() + "0x1a2b3e4f"), ..Default::default() },
1955 )
1956 .unwrap();
1957
1958 test_regex(
1959 *NON_STRING_PRIMITIVE,
1960 RegexTest { matches: &(prefix.to_string() + "0X1a2b3e4f"), ..Default::default() },
1961 )
1962 .unwrap();
1963
1964 test_regex(
1965 *NON_STRING_PRIMITIVE,
1966 RegexTest { matches: &(prefix.to_string() + "0x1A2B3E4F"), ..Default::default() },
1967 )
1968 .unwrap();
1969
1970 test_regex(
1971 *NON_STRING_PRIMITIVE,
1972 RegexTest { matches: &(prefix.to_string() + "0X1a2B3e4F"), ..Default::default() },
1973 )
1974 .unwrap();
1975
1976 test_regex(
1977 *NON_STRING_PRIMITIVE,
1978 RegexTest {
1979 matches: &(prefix.to_string() + "0x1a2b3e4fg"),
1980 error: Some("capture failed"),
1981 ..Default::default()
1982 },
1983 )
1984 .unwrap();
1985
1986 test_regex(
1987 *NON_STRING_PRIMITIVE,
1988 RegexTest {
1989 matches: &(prefix.to_string() + "0X"),
1990 error: Some("capture failed"),
1991 ..Default::default()
1992 },
1993 )
1994 .unwrap();
1995
1996 test_regex(*NON_STRING_PRIMITIVE, RegexTest { matches: "NaN", ..Default::default() })
1997 .unwrap();
1998
1999 test_regex(
2000 *NON_STRING_PRIMITIVE,
2001 RegexTest { matches: "NAN", error: Some("capture failed"), ..Default::default() },
2002 )
2003 .unwrap();
2004
2005 test_regex(
2006 *NON_STRING_PRIMITIVE,
2007 RegexTest { matches: "NaN0", error: Some("capture failed"), ..Default::default() },
2008 )
2009 .unwrap();
2010
2011 test_regex(
2012 *NON_STRING_PRIMITIVE,
2013 RegexTest { matches: "Infinity", ..Default::default() },
2014 )
2015 .unwrap();
2016
2017 test_regex(
2018 *NON_STRING_PRIMITIVE,
2019 RegexTest {
2020 matches: "infinity",
2021 error: Some("capture failed"),
2022 ..Default::default()
2023 },
2024 )
2025 .unwrap();
2026
2027 test_regex(
2028 *NON_STRING_PRIMITIVE,
2029 RegexTest {
2030 matches: "Infinity_",
2031 error: Some("capture failed"),
2032 ..Default::default()
2033 },
2034 )
2035 .unwrap();
2036
2037 test_regex(
2038 *NON_STRING_PRIMITIVE,
2039 RegexTest { matches: &(prefix.to_string() + "0"), ..Default::default() },
2040 )
2041 .unwrap();
2042
2043 test_regex(
2044 *NON_STRING_PRIMITIVE,
2045 RegexTest {
2046 matches: &(prefix.to_string() + "1234567890123456789012345678901234567890"),
2047 ..Default::default()
2048 },
2049 )
2050 .unwrap();
2051
2052 test_regex(
2053 *NON_STRING_PRIMITIVE,
2054 RegexTest { matches: &(prefix.to_string() + "12345.67890"), ..Default::default() },
2055 )
2056 .unwrap();
2057
2058 test_regex(
2059 *NON_STRING_PRIMITIVE,
2060 RegexTest { matches: &(prefix.to_string() + ".67890"), ..Default::default() },
2061 )
2062 .unwrap();
2063
2064 test_regex(
2065 *NON_STRING_PRIMITIVE,
2066 RegexTest { matches: &(prefix.to_string() + "12345."), ..Default::default() },
2067 )
2068 .unwrap();
2069 }
2070 }
2071
2072 #[test]
2073 fn test_regex_unquoted_property_name() {
2074 test_regex(
2075 *UNQUOTED_PROPERTY_NAME_AND_COLON,
2076 RegexTest {
2077 matches: "propname",
2078 suffix: ":",
2079 trailing: " 'some property value',",
2080 ..Default::default()
2081 },
2082 )
2083 .unwrap();
2084
2085 test_regex(
2086 *UNQUOTED_PROPERTY_NAME_AND_COLON,
2087 RegexTest {
2088 matches: "propname",
2089 suffix: " :",
2090 trailing: " 'some property value',",
2091 ..Default::default()
2092 },
2093 )
2094 .unwrap();
2095
2096 test_regex(
2097 *UNQUOTED_PROPERTY_NAME_AND_COLON,
2098 RegexTest {
2099 error: Some("capture failed"),
2100 matches: "99propname",
2105 suffix: ":",
2106 trailing: " 'property names do not start with digits,",
2107 ..Default::default()
2108 },
2109 )
2110 .unwrap();
2111 }
2112
2113 #[test]
2114 fn test_regex_string() {
2115 test_regex(
2116 *OPEN_QUOTE,
2117 RegexTest {
2118 matches: "'",
2119 next_regex: Some(&*SINGLE_QUOTED),
2120 next_matches: "this is a simple single-quoted string",
2121 next_suffix: "'",
2122 trailing: "",
2123 ..Default::default()
2124 },
2125 )
2126 .unwrap();
2127
2128 test_regex(
2129 *OPEN_QUOTE,
2130 RegexTest {
2131 matches: "'",
2132 next_regex: Some(&*SINGLE_QUOTED),
2133 next_matches: " this is a \\
2134 multiline \"text\" string",
2135 next_suffix: "'",
2136 trailing: ", end of value",
2137 ..Default::default()
2138 },
2139 )
2140 .unwrap();
2141
2142 test_regex(
2143 *OPEN_QUOTE,
2144 RegexTest {
2145 matches: "\"",
2146 next_regex: Some(&*DOUBLE_QUOTED),
2147 next_matches: "this is a simple double-quoted string",
2148 next_suffix: "\"",
2149 trailing: "",
2150 ..Default::default()
2151 },
2152 )
2153 .unwrap();
2154
2155 test_regex(
2156 *OPEN_QUOTE,
2157 RegexTest {
2158 matches: "\"",
2159 next_regex: Some(&*DOUBLE_QUOTED),
2160 next_matches: " this is a \\
2161 multiline 'text' string with escaped \\\" double-quote",
2162 next_suffix: "\"",
2163 trailing: ", end of value",
2164 ..Default::default()
2165 },
2166 )
2167 .unwrap();
2168
2169 test_regex(
2170 *OPEN_QUOTE,
2171 RegexTest {
2172 matches: "\"",
2173 next_regex: Some(&*DOUBLE_QUOTED),
2174 next_matches: "",
2175 next_suffix: "\"",
2176 trailing: ", to test empty string",
2177 ..Default::default()
2178 },
2179 )
2180 .unwrap();
2181 }
2182
2183 #[test]
2184 fn test_regex_braces() {
2185 test_regex(*BRACE, RegexTest { matches: "[", trailing: " 1234 ]", ..Default::default() })
2186 .unwrap();
2187
2188 test_regex(*BRACE, RegexTest { matches: "[", trailing: "true]", ..Default::default() })
2189 .unwrap();
2190
2191 test_regex(
2192 *BRACE,
2193 RegexTest { matches: "[", trailing: "\n 'item',\n 'item2'\n]", ..Default::default() },
2194 )
2195 .unwrap();
2196
2197 test_regex(*BRACE, RegexTest { matches: "]", trailing: ",[1234],", ..Default::default() })
2198 .unwrap();
2199
2200 test_regex(*BRACE, RegexTest { matches: "{", trailing: " 1234 }", ..Default::default() })
2201 .unwrap();
2202
2203 test_regex(*BRACE, RegexTest { matches: "{", trailing: "true}", ..Default::default() })
2204 .unwrap();
2205
2206 test_regex(
2207 *BRACE,
2208 RegexTest { matches: "{", trailing: "\n 'item',\n 'item2'\n}", ..Default::default() },
2209 )
2210 .unwrap();
2211
2212 test_regex(*BRACE, RegexTest { matches: "}", trailing: ",{1234},", ..Default::default() })
2213 .unwrap();
2214 }
2215
2216 #[test]
2217 fn test_regex_command_colon() {
2218 test_regex(
2219 *COMMA,
2220 RegexTest { matches: ",", trailing: "\n 'item',\n 'item2'\n}", ..Default::default() },
2221 )
2222 .unwrap();
2223
2224 test_regex(*COMMA, RegexTest { matches: ",", trailing: "{1234},", ..Default::default() })
2225 .unwrap();
2226
2227 test_capture(&*COLON, None, RegexTest { matches: ":", ..Default::default() }).unwrap();
2228
2229 test_capture(&*COLON, None, RegexTest { matches: " \t :", ..Default::default() }).unwrap();
2230
2231 test_capture(
2232 &*COLON,
2233 None,
2234 RegexTest { error: Some("capture failed"), matches: " \n :", ..Default::default() },
2235 )
2236 .unwrap();
2237 }
2238
2239 #[test]
2240 fn test_enums() {
2241 let line_comment = Comment::Line("a line comment".to_owned());
2242 assert!(line_comment.is_line());
2243
2244 let block_comment =
2245 Comment::Block { lines: vec!["a block".into(), "comment".into()], align: true };
2246 assert!(block_comment.is_block());
2247
2248 let primitive_value = Primitive::new("l33t".to_owned(), vec![]);
2249 assert!(primitive_value.is_primitive());
2250
2251 let array_value = Array::new(vec![]);
2252 assert!(array_value.is_array());
2253
2254 let object_value = Object::new(vec![]);
2255 assert!(object_value.is_object());
2256 }
2257
2258 #[test]
2259 fn test_document_exceeds_nesting_limit() {
2260 let mut parser = Parser::new(&None);
2261 parser.set_nesting_limit(5);
2262 let good_buffer = r##"{
2263 list_of_lists_of_lists: [[[]]]
2264}"##;
2265 parser.parse_from_location(&good_buffer, 8, 15).expect("should NOT exceed nesting limit");
2266
2267 let bad_buffer = r##"{
2268 list_of_lists_of_lists: [[[[]]]]
2269}"##;
2270 let err = parser
2271 .parse_from_location(&bad_buffer, 8, 15)
2272 .expect_err("should exceed nesting limit");
2273 match err {
2274 Error::Parse(_, message) => {
2275 assert_eq!(
2276 message,
2277 r##"The given JSON5 document exceeds the parser's nesting limit of 5:
2278 list_of_lists_of_lists: [[[[]]]]
2279 ^"##
2280 )
2281 }
2282 _ => panic!("expected a parser error"),
2283 }
2284 }
2285
2286 #[test]
2287 fn test_parse_from_location_error_location() {
2288 let filename = Some("mixed_content.md".to_string());
2289 let mixed_document = r##"
2290Mixed Content Doc
2291=================
2292
2293This is a document with embedded JSON5 content.
2294
2295```json5
2296json5_value = {
2297 // The next line should generate a parser error
2298 999,
2299}
2300```
2301
2302End of mixed content document.
2303"##;
2304 let json5_slice =
2305 &mixed_document[mixed_document.find("{").unwrap()..mixed_document.find("}").unwrap()];
2306 let mut parser = Parser::new(&filename);
2307 let err = parser
2308 .parse_from_location(json5_slice, 8, 15)
2309 .expect_err("check error message for location");
2310 match err {
2311 Error::Parse(Some(loc), message) => {
2312 assert_eq!(loc.file, Some("mixed_content.md".to_owned()));
2313 assert_eq!(loc.line, 10);
2314 assert_eq!(loc.col, 5);
2315 assert_eq!(
2316 message,
2317 r##"Object values require property names:
2318 999,
2319 ^~~"##
2320 )
2321 }
2322 _ => panic!("expected a parser error"),
2323 }
2324 }
2325
2326 #[test]
2327 fn test_doc_with_nulls() {
2328 let mut parser = Parser::new(&None);
2329 let buffer = "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[////[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]";
2330 let err = parser.parse(&buffer).expect_err("should fail");
2331 match err {
2332 Error::Parse(_, message) => {
2333 assert!(message.starts_with("Mismatched braces in the document:"));
2334 }
2335 _ => panic!("expected a parser error"),
2336 }
2337 }
2338}