#![deny(missing_docs)]
use {
crate::{content::*, error::*},
lazy_static::lazy_static,
regex::{CaptureLocations, Match, Regex},
std::cell::RefCell,
std::rc::Rc,
};
fn from_start(regex: &str) -> String {
"^".to_owned() + regex
}
fn exact_match(regex: &str) -> String {
"^".to_owned() + regex + "$"
}
lazy_static! {
static ref WHITESPACE_PATTERN: &'static str = r#"([\s&&[^\n]]+)"#;
static ref WHITESPACE: usize = 1;
static ref NEWLINE_PATTERN: &'static str = r#"(\n)"#;
static ref NEWLINE: usize = 2;
static ref LINE_COMMENT_SLASHES_PATTERN: &'static str = r#"(//)"#;
static ref LINE_COMMENT_SLASHES: usize = 3;
static ref OPEN_BLOCK_COMMENT_PATTERN: &'static str = r#"(/\*)"#;
static ref OPEN_BLOCK_COMMENT: usize = 4;
static ref NON_STRING_PRIMITIVE_PATTERN: &'static str =
r#"((?x) # ignore whitespace and allow '#' comments
# Capture null, true, or false (lowercase only, as in the ECMAScript keywords).
# End with a word boundary ('\b' marker) to ensure the pattern does not match if
# it is followed by a word ('\w') character; for example, 'nullify' is a valid
# identifier (depending on the context) and must not match the 'null' value.
(?:(?:null|true|false)\b)|
# Capture all number formats. Every variant is allowed an optional '-' or '+' prefix.
(?:[-+]?(?:
# All of the following variants end in a word character. Use '\b' to prevent
# matching numbers immediately followed by another word character, for example,
# 'NaNo', 'Infinity_', or '0xadef1234ghi'.
(?:(?:
NaN|
Infinity|
# hexadecimal notation
(?:0[xX][0-9a-fA-F]+)|
# decimal exponent notation
(?:(?:0|(?:[1-9][0-9]*))?\.[0-9]+[eE][+-]?[0-9]+)|
# integer exponent notation with optional trailing decimal point
(?:(?:0|(?:[1-9][0-9]*))\.?[eE][+-]?[0-9]+)|
# decimal notation
(?:(?:0|(?:[1-9][0-9]*))?\.[0-9]+)
)\b)|
# Capture integers, with an optional trailing decimal point.
# If the value ends in a digit (no trailing decimal point), apply `\b` to prevent
# matching integers immediatly followed by a word character (for example, 1200PDT).
# But if the integer has a trailing decimal, the '\b' does not apply. (Since '.' is
# not itself a '\w' word character, the '\b' would have the opposite affect,
# matching only if the next character is a word character, unless there is no next
# character.)
(?:
(?:0|(?:[1-9][0-9]*))(?:\.|\b)
)
))
)"#;
static ref NON_STRING_PRIMITIVE: usize = 5;
static ref UNQUOTED_PROPERTY_NAME_PATTERN: &'static str = r#"[\$\w&&[^\d]][\$\w]*"#;
static ref UNQUOTED_PROPERTY_NAME_REGEX: Regex =
Regex::new(&exact_match(&*UNQUOTED_PROPERTY_NAME_PATTERN)).unwrap();
static ref UNQUOTED_PROPERTY_NAME_AND_COLON_PATTERN_STRING: String =
r#"(?:("#.to_owned() + *UNQUOTED_PROPERTY_NAME_PATTERN + r#")[\s&&[^\n]]*:)"#;
static ref UNQUOTED_PROPERTY_NAME_AND_COLON_PATTERN: &'static str =
&UNQUOTED_PROPERTY_NAME_AND_COLON_PATTERN_STRING;
static ref UNQUOTED_PROPERTY_NAME_AND_COLON: usize = 6;
static ref OPEN_QUOTE_PATTERN: &'static str = r#"(["'])"#;
static ref OPEN_QUOTE: usize = 7;
static ref BRACE_PATTERN: &'static str = r#"([{}\[\]])"#;
static ref BRACE: usize = 8;
static ref COMMA_PATTERN: &'static str = r#"(,)"#;
static ref COMMA: usize = 9;
static ref NEXT_TOKEN: Regex = Regex::new(
&from_start(&(r#"(?:"#.to_owned()
+ &vec![
*WHITESPACE_PATTERN,
*NEWLINE_PATTERN,
*LINE_COMMENT_SLASHES_PATTERN,
*OPEN_BLOCK_COMMENT_PATTERN,
*NON_STRING_PRIMITIVE_PATTERN,
*UNQUOTED_PROPERTY_NAME_AND_COLON_PATTERN,
*OPEN_QUOTE_PATTERN,
*BRACE_PATTERN,
*COMMA_PATTERN,
].join("|")
+ r#")"#))
).unwrap();
static ref LINE_COMMENT: Regex = Regex::new(
&from_start(r#"([^\n]*)"#)
).unwrap();
static ref BLOCK_COMMENT: Regex = Regex::new(
&from_start(r#"((?:.|\n)*?)\*/"#)
).unwrap();
static ref SINGLE_QUOTED: Regex = Regex::new(
&from_start(r#"((?:(?:\\\\)|(?:\\')|(?:\\\n)|(?:[^'\n]))*)(?:')"#)
).unwrap();
static ref DOUBLE_QUOTED: Regex = Regex::new(
&from_start(r#"((?:(?:\\\\)|(?:\\")|(?:\\\n)|(?:[^"\n]))*)(?:")"#)
).unwrap();
static ref COLON: Regex = Regex::new(
&from_start(r#"([\s&&[^\n]]*:)"#)
).unwrap();
}
fn matches_unquoted_property_name(strval: &str) -> bool {
const KEYWORDS: &'static [&'static str] = &["true", "false", "null"];
UNQUOTED_PROPERTY_NAME_REGEX.is_match(strval) && !KEYWORDS.contains(&strval)
}
struct Capturer {
regex: &'static Regex,
overall_match: Option<String>,
locations: CaptureLocations,
}
impl Capturer {
fn new(regex: &'static Regex) -> Self {
Self { regex, overall_match: None, locations: regex.capture_locations() }
}
fn capture<'a>(&mut self, text: &'a str) -> Option<Match<'a>> {
let captures = self.regex.captures_read(&mut self.locations, text);
if let Some(captures) = &captures {
self.overall_match = Some(text[0..captures.end()].to_string());
} else {
self.overall_match = None;
}
captures
}
fn overall_match<'a>(&'a self) -> Option<&'a str> {
self.overall_match.as_deref()
}
fn captured<'a>(&'a self, i: usize) -> Option<&'a str> {
if let (Some(overall_match), Some((start, end))) =
(&self.overall_match, self.locations.get(i))
{
Some(&overall_match[start..end])
} else {
None
}
}
}
struct ParserErrorContext {
line: String,
indicator_start: usize,
indicator_len: usize,
}
impl ParserErrorContext {
fn new(line: String, indicator_start: usize, indicator_len: usize) -> Self {
assert!(indicator_len >= 1);
Self { line, indicator_start, indicator_len }
}
fn line(&self) -> &str {
&self.line
}
fn indicator(&self) -> String {
let mut line = " ".repeat(self.indicator_start) + "^";
if self.indicator_len > 1 {
line += &"~".repeat(self.indicator_len - 1);
}
line
}
}
pub(crate) struct Parser<'parser> {
remaining: &'parser str,
filename: &'parser Option<String>,
current_line: &'parser str,
next_line: &'parser str,
line_number: usize,
column_number: usize,
next_line_number: usize,
next_column_number: usize,
scope_stack: Vec<Rc<RefCell<Value>>>,
nesting_limit: usize,
colon_capturer: Capturer,
}
impl<'parser> Parser<'parser> {
pub const DEFAULT_NESTING_LIMIT: usize = 1000;
pub fn new(filename: &'parser Option<String>) -> Self {
let remaining = "";
let current_line = &remaining;
Self {
remaining,
filename,
current_line,
next_line: current_line,
line_number: 1,
column_number: 1,
next_line_number: 1,
next_column_number: 1,
scope_stack: Vec::default(),
nesting_limit: Self::DEFAULT_NESTING_LIMIT,
colon_capturer: Capturer::new(&COLON),
}
}
pub fn set_nesting_limit(&mut self, new_limit: usize) {
self.nesting_limit = new_limit;
}
fn current_scope(&self) -> Rc<RefCell<Value>> {
assert!(self.scope_stack.len() > 0);
self.scope_stack.last().unwrap().clone()
}
fn with_container<F, T>(&self, f: F) -> Result<T, Error>
where
F: FnOnce(&mut dyn Container) -> Result<T, Error>,
{
match &mut *self.current_scope().borrow_mut() {
Value::Array { val, .. } => f(val),
Value::Object { val, .. } => f(val),
unexpected => Err(Error::internal(
self.location(),
format!(
"Current scope should be an Array or Object, but scope was {:?}",
unexpected
),
)),
}
}
fn with_array<F, T>(&self, f: F) -> Result<T, Error>
where
F: FnOnce(&mut Array) -> Result<T, Error>,
{
match &mut *self.current_scope().borrow_mut() {
Value::Array { val, .. } => f(val),
unexpected => Err(self.error(format!(
"Invalid Array token found while parsing an {:?} (mismatched braces?)",
unexpected
))),
}
}
fn with_object<F, T>(&self, f: F) -> Result<T, Error>
where
F: FnOnce(&mut Object) -> Result<T, Error>,
{
match &mut *self.current_scope().borrow_mut() {
Value::Object { val, .. } => f(val),
unexpected => Err(self.error(format!(
"Invalid Object token found while parsing an {:?} (mismatched braces?)",
unexpected
))),
}
}
fn is_in_array(&self) -> bool {
(*self.current_scope().borrow()).is_array()
}
fn is_in_object(&self) -> bool {
!self.is_in_array()
}
fn add_value(&mut self, value: Value) -> Result<(), Error> {
let is_container = value.is_object() || value.is_array();
let value_ref = Rc::new(RefCell::new(value));
self.with_container(|container| container.add_value(value_ref.clone(), self))?;
if is_container {
self.scope_stack.push(value_ref.clone());
if self.scope_stack.len() > self.nesting_limit {
return Err(self.error(format!(
"The given JSON5 document exceeds the parser's nesting limit of {}",
self.nesting_limit
)));
}
}
Ok(())
}
fn on_newline(&mut self) -> Result<(), Error> {
self.with_container(|container| container.on_newline())
}
fn add_line_comment(
&self,
captured: Option<&str>,
pending_new_line_comment_block: bool,
) -> Result<bool, Error> {
match captured {
Some(content) => {
let content = content.trim_end();
self.with_container(|container| {
container.add_line_comment(
content,
self.column_number,
pending_new_line_comment_block,
)
})
}
None => Err(Error::internal(
self.location(),
"Line comment regex should support empty line comment",
)),
}
}
fn add_block_comment(&self, captured: Option<&str>) -> Result<(), Error> {
match captured {
Some(content) => {
let indent_count = self.column_number - 3;
let indent = " ".repeat(indent_count);
if content
.lines()
.enumerate()
.find(|(index, line)| {
*index > 0 && !line.starts_with(&indent) && line.trim() != ""
})
.is_some()
{
self.with_container(|container| {
container.add_block_comment(Comment::Block {
lines: content.lines().map(|line| line.to_owned()).collect(),
align: false,
})
})
} else {
let trimmed_lines = content
.lines()
.enumerate()
.map(|(index, line)| {
if index == 0 {
line
} else if line.trim().len() == 0 {
""
} else {
&line[indent_count..]
}
})
.collect::<Vec<&str>>();
self.with_container(|container| {
container.add_block_comment(Comment::Block {
lines: trimmed_lines.iter().map(|line| line.to_string()).collect(),
align: true,
})
})
}
}
None => return Err(self.error("Block comment started without closing \"*/\"")),
}
}
fn take_pending_comments(&mut self) -> Result<Vec<Comment>, Error> {
self.with_container(|container| Ok(container.take_pending_comments()))
}
fn set_pending_property(&self, name: &str) -> Result<(), Error> {
self.with_object(|object| object.set_pending_property(name.to_string(), self))
}
fn add_quoted_string(&mut self, quote: &str, captured: Option<&str>) -> Result<(), Error> {
match captured {
Some(unquoted) => {
if self.is_in_object()
&& !self.with_object(|object| object.has_pending_property())?
{
let captured = self.colon_capturer.capture(self.remaining);
if self.consume_if_matched(captured) {
if matches_unquoted_property_name(&unquoted) {
self.set_pending_property(unquoted)
} else {
self.set_pending_property(&format!("{}{}{}", quote, &unquoted, quote))
}
} else {
return Err(self.error("Property name separator (:) missing"));
}
} else {
let comments = self.take_pending_comments()?;
self.add_value(Primitive::new(
format!("{}{}{}", quote, &unquoted, quote),
comments,
))
}
}
None => return Err(self.error("Unclosed string")),
}
}
fn add_non_string_primitive(&mut self, non_string_primitive: &str) -> Result<(), Error> {
let comments = self.take_pending_comments()?;
self.add_value(Primitive::new(non_string_primitive.to_string(), comments))
}
fn on_brace(&mut self, brace: &str) -> Result<(), Error> {
match brace {
"{" => self.open_object(),
"}" => self.close_object(),
"[" => self.open_array(),
"]" => self.close_array(),
unexpected => Err(Error::internal(
self.location(),
format!("regex returned unexpected brace string: {}", unexpected),
)),
}
}
fn open_object(&mut self) -> Result<(), Error> {
let comments = self.take_pending_comments()?;
self.add_value(Object::new(comments))
}
fn exit_scope(&mut self) -> Result<(), Error> {
self.scope_stack.pop();
if self.scope_stack.is_empty() {
Err(self.error("Closing brace without a matching opening brace"))
} else {
Ok(())
}
}
fn close_object(&mut self) -> Result<(), Error> {
self.with_object(|object| object.close(self))?;
self.exit_scope()
}
fn open_array(&mut self) -> Result<(), Error> {
let comments = self.take_pending_comments()?;
self.add_value(Array::new(comments))
}
fn close_array(&mut self) -> Result<(), Error> {
self.with_array(|array| array.close(self))?;
self.exit_scope()
}
fn end_value(&self) -> Result<(), Error> {
self.with_container(|container| container.end_value(self))
}
pub fn location(&self) -> Option<Location> {
Some(Location::new(self.filename.clone(), self.line_number, self.column_number))
}
pub fn error(&self, err: impl std::fmt::Display) -> Error {
const MAX_ERROR_LINE_LEN: usize = 200;
const MIN_CONTEXT_LEN: usize = 10;
const ELLIPSIS: &str = "\u{2026}";
let error_context = self.get_error_context(MAX_ERROR_LINE_LEN, MIN_CONTEXT_LEN, ELLIPSIS);
Error::parse(
self.location(),
format!("{}:\n{}\n{}", err, error_context.line(), error_context.indicator()),
)
}
fn consume_if_matched<'a>(&mut self, matched: Option<Match<'a>>) -> bool {
self.column_number = self.next_column_number;
if self.line_number < self.next_line_number {
self.line_number = self.next_line_number;
self.current_line = self.next_line;
}
if let Some(matched) = matched {
let matched_and_remaining = &self.remaining[matched.start()..];
self.remaining = &self.remaining[matched.end()..];
let mut some_matched_lines = None;
for c in matched.as_str().chars() {
if c == '\n' {
let matched_lines = some_matched_lines
.get_or_insert_with(|| matched_and_remaining.lines().skip(1));
self.next_line = matched_lines.next().unwrap_or(self.current_line);
self.next_line_number += 1;
self.next_column_number = 1;
} else {
self.next_column_number += 1;
}
}
true
} else {
false
}
}
fn capture(&mut self, capturer: &mut Capturer) -> bool {
self.consume_if_matched(capturer.capture(self.remaining))
}
fn consume<'a>(&mut self, capturer: &'a mut Capturer) -> Option<&'a str> {
if self.capture(capturer) {
capturer.captured(1)
} else {
None
}
}
pub fn parse(&mut self, buffer: &'parser str) -> Result<Array, Error> {
self.parse_from_location(buffer, 1, 1)
}
pub fn parse_from_location(
&mut self,
buffer: &'parser str,
starting_line_number: usize,
starting_column_number: usize,
) -> Result<Array, Error> {
self.remaining = buffer;
self.current_line = &self.remaining;
assert!(starting_line_number > 0, "document line numbers are 1-based");
self.next_line_number = starting_line_number;
self.next_column_number = starting_column_number;
self.next_line = self.current_line;
self.line_number = self.next_line_number - 1;
self.column_number = self.next_column_number - 1;
self.scope_stack = vec![Rc::new(RefCell::new(Array::new(vec![])))];
let mut next_token = Capturer::new(&NEXT_TOKEN);
let mut single_quoted = Capturer::new(&SINGLE_QUOTED);
let mut double_quoted = Capturer::new(&DOUBLE_QUOTED);
let mut line_comment = Capturer::new(&LINE_COMMENT);
let mut block_comment = Capturer::new(&BLOCK_COMMENT);
let mut just_captured_line_comment = false;
let mut pending_blank_line = false;
let mut pending_new_line_comment_block = false;
while self.remaining.len() > 0 {
let mut reset_line_comment_break_check = true;
if self.capture(&mut next_token) {
if let Some(_) = next_token.captured(*WHITESPACE) {
reset_line_comment_break_check = false;
Ok(()) } else if let Some(_) = next_token.captured(*NEWLINE) {
reset_line_comment_break_check = false;
if just_captured_line_comment {
if pending_blank_line {
pending_new_line_comment_block = true;
pending_blank_line = false;
} else if !pending_new_line_comment_block {
pending_blank_line = true;
}
}
self.on_newline()
} else if let Some(_) = next_token.captured(*COMMA) {
self.end_value()
} else if let Some(brace) = next_token.captured(*BRACE) {
self.on_brace(&brace)
} else if let Some(non_string_primitive) =
next_token.captured(*NON_STRING_PRIMITIVE)
{
self.add_non_string_primitive(&non_string_primitive)
} else if let Some(quote) = next_token.captured(*OPEN_QUOTE) {
let quoted_string = if quote == "'" {
self.consume(&mut single_quoted)
} else {
self.consume(&mut double_quoted)
};
self.add_quoted_string("e, quoted_string)
} else if let Some(unquoted_property_name) =
next_token.captured(*UNQUOTED_PROPERTY_NAME_AND_COLON)
{
self.set_pending_property(unquoted_property_name)
} else if let Some(_line_comment_start) = next_token.captured(*LINE_COMMENT_SLASHES)
{
reset_line_comment_break_check = false;
pending_blank_line = false;
let line_comment = self.consume(&mut line_comment);
if self.add_line_comment(line_comment, pending_new_line_comment_block)? {
just_captured_line_comment = true;
pending_new_line_comment_block = false;
} Ok(())
} else if let Some(_block_comment_start) = next_token.captured(*OPEN_BLOCK_COMMENT)
{
let block_comment = self.consume(&mut block_comment);
self.add_block_comment(block_comment)
} else {
Err(Error::internal(
self.location(),
format!(
"NEXT_TOKEN matched an unexpected capture group: {}",
next_token.overall_match().unwrap_or("")
),
))
}
} else {
Err(self.error("Unexpected token"))
}?;
if reset_line_comment_break_check {
just_captured_line_comment = false;
pending_blank_line = false;
pending_new_line_comment_block = false;
}
}
self.remaining = "";
self.close_document()?;
match Rc::try_unwrap(self.scope_stack.pop().unwrap())
.map_err(|_| Error::internal(None, "Rc<> for document array could not be unwrapped."))?
.into_inner()
{
Value::Array { val, .. } => Ok(val),
unexpected => Err(Error::internal(
self.location(),
format!("Final scope should be an Array, but scope was {:?}", unexpected),
)),
}
}
fn close_document(&mut self) -> Result<(), Error> {
if self.scope_stack.len() == 1 {
Ok(())
} else {
Err(self.error("Mismatched braces in the document"))
}
}
fn get_error_context(
&self,
max_error_line_len: usize,
min_context_len: usize,
ellipsis: &str,
) -> ParserErrorContext {
let error_line_len = self.current_line.chars().count();
let indicator_start = std::cmp::min(self.column_number - 1, error_line_len);
let indicator_len = if self.line_number == self.next_line_number {
std::cmp::max(
std::cmp::min(
self.next_column_number - self.column_number,
error_line_len - indicator_start,
),
1,
)
} else {
1
};
if error_line_len <= max_error_line_len {
ParserErrorContext::new(self.current_line.to_owned(), indicator_start, indicator_len)
} else {
trim_error_line_and_indicator(
self.current_line,
indicator_start,
indicator_len,
error_line_len,
max_error_line_len,
min_context_len,
ellipsis,
)
}
}
}
struct CharRange {
range: std::ops::Range<usize>,
}
impl CharRange {
fn new(range: std::ops::Range<usize>) -> Self {
Self { range }
}
fn to_byte_range(self, from_string: &str) -> Option<std::ops::Range<usize>> {
let char_len = from_string.chars().count();
let mut some_start_byte =
if self.range.start == char_len { Some(from_string.len()) } else { None };
let mut some_end_byte =
if self.range.end == char_len { Some(from_string.len()) } else { None };
if let (Some(start_byte), Some(end_byte)) = (some_start_byte, some_end_byte) {
return Some(start_byte..end_byte);
}
for (char_pos, (byte_pos, _char)) in from_string.char_indices().enumerate() {
if char_pos == self.range.start {
if let Some(end_byte) = some_end_byte {
return Some(byte_pos..end_byte);
}
some_start_byte = Some(byte_pos);
}
if char_pos == self.range.end {
if let Some(start_byte) = some_start_byte {
return Some(start_byte..byte_pos);
}
some_end_byte = Some(byte_pos);
}
}
None
}
}
fn trim_error_line_and_indicator(
error_line: &str,
indicator_start: usize,
mut indicator_len: usize,
error_line_len: usize,
max_error_line_len: usize,
min_context_len: usize,
ellipsis: &str,
) -> ParserErrorContext {
let ellipsis_len = ellipsis.chars().count();
assert!(max_error_line_len > ellipsis_len);
assert!(max_error_line_len < error_line_len);
assert!(
indicator_start <= error_line_len,
"Error because indicator_start={} > error_line_len={}\n{}",
indicator_start,
error_line_len,
error_line
);
assert!(
indicator_len == 1 || (indicator_start + indicator_len) <= error_line_len,
"Error because indicator_start={}, indicator_len={}, error_line_len={}\n{}",
indicator_start,
indicator_len,
error_line_len,
error_line
);
indicator_len = std::cmp::min(indicator_len, max_error_line_len);
let min_right_context_len = std::cmp::max(min_context_len, indicator_len);
let context_end =
std::cmp::min(indicator_start + min_right_context_len, error_line_len - ellipsis_len);
if context_end < max_error_line_len - ellipsis_len {
let slice_bytes = CharRange::new(0..(max_error_line_len - ellipsis_len))
.to_byte_range(error_line)
.expect("char indices should map to String bytes");
return ParserErrorContext::new(
error_line[slice_bytes].to_string() + ellipsis,
indicator_start,
indicator_len,
);
}
let context_start = indicator_start - std::cmp::min(indicator_start, min_context_len);
if error_line_len - context_start < max_error_line_len - ellipsis_len {
let start_char = error_line_len - (max_error_line_len - ellipsis_len);
let slice_bytes = CharRange::new(start_char..error_line_len)
.to_byte_range(error_line)
.expect("char indices should map to String bytes");
return ParserErrorContext::new(
ellipsis.to_owned() + &error_line[slice_bytes],
(indicator_start + ellipsis_len) - start_char,
indicator_len,
);
}
let margin_chars =
max_error_line_len - std::cmp::min(max_error_line_len, (ellipsis_len * 2) + indicator_len);
let right_margin = std::cmp::min(
error_line_len - std::cmp::min(error_line_len, indicator_start + indicator_len),
margin_chars / 2,
);
let left_margin = margin_chars - right_margin;
let mut start_char = indicator_start - left_margin;
let mut end_char =
std::cmp::min(indicator_start + indicator_len + right_margin, error_line_len);
let mut start_ellipsis = ellipsis;
let mut end_ellipsis = ellipsis;
if start_char == 0 {
start_ellipsis = "";
end_char += ellipsis_len;
} else if end_char == error_line_len {
end_ellipsis = "";
start_char -= ellipsis_len;
}
let slice_bytes = CharRange::new(start_char..end_char)
.to_byte_range(error_line)
.expect("char indices should map to String bytes");
ParserErrorContext::new(
start_ellipsis.to_owned() + &error_line[slice_bytes] + end_ellipsis,
(indicator_start + ellipsis_len) - start_char,
indicator_len,
)
}
#[cfg(test)]
mod tests {
use {super::*, crate::test_error, proptest::prelude::*};
fn gen_error_line_test(
error_line: &str,
pattern: &str,
max_error_line_len: usize,
min_context_len: usize,
ellipsis: &str,
expected_errorline: &str,
expected_indicator: &str,
) -> Result<(), String> {
let some_newline = pattern.find("\n");
let pattern_line1 =
if let Some(newline) = some_newline { &pattern[0..newline] } else { &pattern };
assert!(pattern_line1.len() > 0);
let indicator_start = error_line.find(pattern_line1).expect("pattern not found in line");
let end = indicator_start + pattern.len();
let indicator_len = end - indicator_start;
let error_context = if error_line.chars().count() <= max_error_line_len {
ParserErrorContext::new(error_line.to_owned(), indicator_start, indicator_len)
} else {
trim_error_line_and_indicator(
error_line,
indicator_start,
indicator_len,
error_line.chars().count(),
max_error_line_len,
min_context_len,
ellipsis,
)
};
let actual_errorline = error_context.line();
let actual_indicator = error_context.indicator();
let mut errors = String::new();
if expected_errorline != actual_errorline {
println!(
r#"
expected_errorline: >>>{}<<< (charlen={})
actual_errorline: >>>{}<<< (charlen={} of {}, min context len={})"#,
expected_errorline,
expected_errorline.chars().count(),
actual_errorline,
actual_errorline.chars().count(),
max_error_line_len,
min_context_len,
);
errors.push_str("actual errorline does not match expected");
} else if expected_indicator != actual_indicator {
println!(
r#"
{}"#,
actual_errorline,
);
}
if expected_indicator != actual_indicator {
if errors.len() > 0 {
errors.push_str(" and ");
}
println!(
r#"
expected_indicator: {}
actual_indicator: {}"#,
expected_indicator, actual_indicator,
);
errors.push_str("actual indicator does not match expected");
} else if expected_errorline != actual_errorline {
println!(
r#"
{}"#,
actual_indicator,
);
}
if errors.len() > 0 {
println!("{}", errors);
Err(errors)
} else {
Ok(())
}
}
#[test]
fn test_error_line1() {
gen_error_line_test(
" good token, bad token;",
"bad",
30,
10,
" ... ",
" good token, bad token;",
" ^~~",
)
.expect("actual should match expected");
}
#[test]
fn test_error_line2() {
gen_error_line_test(
" good token, bad token;",
"token;",
20,
10,
" ... ",
" ... ken, bad token;",
" ^~~~~~",
)
.expect("actual should match expected");
}
#[test]
fn test_error_line2_short_ellipsis() {
gen_error_line_test(
" good token, bad token;",
"token;",
20,
10,
"…",
"…d token, bad token;",
" ^~~~~~",
)
.expect("actual should match expected");
}
#[test]
fn test_error_line3() {
gen_error_line_test(
"A good token, bad token;",
"bad",
20,
10,
" ... ",
" ... en, bad to ... ",
" ^~~",
)
.expect("actual should match expected");
}
#[test]
fn test_error_line3_short_ellipsis() {
gen_error_line_test(
"A good token, bad token;",
"bad",
20,
10,
"…",
"…d token, bad token;",
" ^~~",
)
.expect("actual should match expected");
}
#[test]
fn test_error_line3_escaped_unicode_ellipsis() {
gen_error_line_test(
"A good token, bad token;",
"bad",
20,
10,
"\u{2026}",
"…d token, bad token;",
" ^~~",
)
.expect("actual should match expected");
}
#[test]
fn test_error_line4() {
gen_error_line_test(
"A good token, bad token;",
"bad",
10,
10,
" ... ",
" ... bad ... ",
" ^~~",
)
.expect("actual should match expected");
}
#[test]
fn test_error_line4_short_context() {
gen_error_line_test(
"A good token, bad token;",
"bad",
10,
5,
" ... ",
" ... bad ... ",
" ^~~",
)
.expect("actual should match expected");
}
#[test]
fn test_error_line4_long_pattern() {
gen_error_line_test(
"A good token, bad token;",
"bad token",
10,
10,
" ... ",
" ... bad token ... ",
" ^~~~~~~~~",
)
.expect("actual should match expected");
}
#[test]
fn test_error_line4_long_pattern_short_context_big_ellipsis() {
gen_error_line_test(
"A good token, bad token;",
"bad token",
10,
4,
" ... ",
" ... bad token ... ",
" ^~~~~~~~~",
)
.expect("actual should match expected");
}
#[test]
fn test_error_line4_long_pattern_short_context_short_ellipsis() {
gen_error_line_test(
"A good token, bad token;",
"bad",
10,
4,
"\u{2026}",
"…n, bad t…",
" ^~~",
)
.expect("actual should match expected");
}
#[test]
fn test_error_line5() {
gen_error_line_test(
r#"[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]"#,
"a_prop",
200,
10,
" ... ",
r#"[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]"#,
r#" ^~~~~~"#,
).expect("actual should match expected");
}
#[test]
fn test_error_line6() {
gen_error_line_test(
r#"[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]"#,
"a_prop",
100,
10,
" ... ",
r#"[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]] ... "#,
r#" ^~~~~~"#,
).expect("actual should match expected");
}
#[test]
fn test_error_line7() {
gen_error_line_test(
r#"[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]"#,
"a_prop",
100,
5,
" ... ",
r#"[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]] ... "#,
r#" ^~~~~~"#,
).expect("actual should match expected");
}
#[test]
fn test_error_line7_more_braces() {
gen_error_line_test(
r#"[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]"#,
"a_prop",
100,
10,
" ... ",
r#"[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]] ... "#,
r#" ^~~~~~"#,
).expect("actual should match expected");
}
#[test]
fn test_error_line8() {
gen_error_line_test(
r#"[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]"#,
"a_prop",
100,
10,
" ... ",
r#" ... [[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]"#,
r#" ^~~~~~"#,
).expect("actual should match expected");
}
#[test]
fn test_error_line9() {
gen_error_line_test(
r#"[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]"#,
"a_prop",
100,
10,
" ... ",
r#" ... [[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]] ... "#,
r#" ^~~~~~"#,
).expect("actual should match expected");
}
lazy_static! {
static ref NO_PERSIST: ProptestConfig = ProptestConfig {
failure_persistence: None,
.. ProptestConfig::default()
};
static ref EXTRA_CASES_NO_PERSIST: ProptestConfig = ProptestConfig {
failure_persistence: None,
cases: 1024,
.. ProptestConfig::default()
};
}
struct RegexTest<'a> {
error: Option<&'a str>,
prefix: &'a str,
matches: &'a str,
suffix: &'a str,
next_regex: Option<&'a Regex>,
next_prefix: &'a str,
next_matches: &'a str,
next_suffix: &'a str,
trailing: &'a str,
}
impl<'a> Default for RegexTest<'a> {
fn default() -> Self {
RegexTest {
error: None,
prefix: "",
matches: "",
suffix: "",
next_regex: None,
next_prefix: "",
next_matches: "",
next_suffix: "",
trailing: "",
}
}
}
fn try_capture(
regex: &Regex,
group_id: Option<usize>,
test: RegexTest<'_>,
) -> Result<String, Error> {
println!();
println!("pattern: '{}'", regex.as_str());
let trailing = test.next_suffix.to_owned() + test.trailing;
let test_string =
test.prefix.to_owned() + test.matches + test.suffix + test.next_matches + &trailing;
println!("capturing from: '{}'", test_string.escape_debug());
println!(
" {}{}{}{}",
" ".repeat(test.prefix.len()),
"^".repeat(test.matches.len()),
" ".repeat(test.suffix.len()),
"^".repeat(test.next_matches.len())
);
let group_id = group_id.unwrap_or(1);
println!("expected capture id: '{}'", group_id);
let capture = regex.captures(&test_string).ok_or_else(|| test_error!("capture failed"))?;
let overall_match = capture.get(0).ok_or_else(|| test_error!("regex did not match"))?;
println!(
"overall match: '{}', length = {}",
overall_match.as_str().escape_debug(),
overall_match.end()
);
let remaining = &test_string[overall_match.end()..];
println!("remaining: '{}'", remaining.escape_debug());
const OVERALL_MATCH: usize = 0;
let mut capture_ids = vec![];
for (index, subcapture) in capture.iter().enumerate() {
if index != OVERALL_MATCH {
if subcapture.is_some() {
capture_ids.push(index);
}
}
}
println!("capture ids = {:?}", capture_ids);
let captured_text = capture
.get(group_id)
.ok_or_else(|| test_error!(format!("capture group {} did not match", group_id)))?
.as_str();
println!("captured: '{}'", captured_text.escape_debug());
assert_eq!(captured_text, test.matches);
assert_eq!(capture_ids.len(), 1);
assert_eq!(remaining, test.next_matches.to_owned() + &trailing);
match test.next_regex {
Some(next_regex) => test_capture(
&*next_regex,
None,
RegexTest {
prefix: test.next_prefix,
matches: test.next_matches,
suffix: test.next_suffix,
trailing: test.trailing,
..Default::default()
},
),
None => Ok(captured_text.to_string()),
}
}
fn test_capture(
regex: &Regex,
group_id: Option<usize>,
test: RegexTest<'_>,
) -> Result<String, Error> {
let expected_error_str = test.error.clone();
match try_capture(regex, group_id, test) {
Ok(captured) => {
println!("SUCCESSFUL CAPTURE! ... '{}'", captured);
Ok(captured)
}
Err(actual_error) => match expected_error_str {
Some(expected_error_str) => match &actual_error {
Error::TestFailure(_location, actual_error_str) => {
if expected_error_str == actual_error_str {
println!("EXPECTED FAILURE (GOOD NEWS)! ... '{}'", actual_error);
Ok(format!("{}", actual_error))
} else {
println!("{}", actual_error);
println!("expected: {}", expected_error_str);
println!(" actual: {}", actual_error_str);
Err(test_error!(
"Actual error string did not match expected error string."
))
}
}
_unexpected_error_type => {
println!("expected: Test failure: {}", expected_error_str);
println!(" actual: {}", actual_error);
Err(test_error!(
"Actual error type did not match expected test failure type."
))
}
},
None => Err(actual_error),
},
}
}
fn test_regex(group_id: usize, test: RegexTest<'_>) -> Result<String, Error> {
test_capture(&NEXT_TOKEN, Some(group_id), test)
}
proptest! {
#![proptest_config(NO_PERSIST)]
#[test]
fn test_whitespace_no_newlines(
spaces in r#"[\s&&[^\n]]+"#,
trailing_non_whitespace in r#"[^\s&&[^\n]]*"#,
) {
test_regex(
*WHITESPACE,
RegexTest {
matches: &spaces,
trailing: &trailing_non_whitespace,
..Default::default()
}
)
.unwrap();
}
}
proptest! {
#![proptest_config(NO_PERSIST)]
#[test]
fn test_whitespace_until_newline(
spaces in r#"[\s&&[^\n]]+"#,
trailing_non_whitespace in r#"\n[^\s&&[^\n]]*"#,
) {
test_regex(
*WHITESPACE,
RegexTest {
matches: &spaces,
trailing: &trailing_non_whitespace,
..Default::default()
}
)
.unwrap();
}
}
proptest! {
#![proptest_config(NO_PERSIST)]
#[test]
fn test_plain_ascii_whitespace_no_newline(
spaces in r#"[ \t]+"#,
trailing_non_whitespace in r#"[^\s&&[^\n]]*"#,
) {
test_regex(
*WHITESPACE,
RegexTest {
matches: &spaces,
trailing: &trailing_non_whitespace,
..Default::default()
}
)
.unwrap();
}
}
proptest! {
#![proptest_config(NO_PERSIST)]
#[test]
fn test_newline(
newline in r#"\n"#,
any_chars in r#"\PC*"#,
) {
test_regex(
*NEWLINE,
RegexTest { matches: &newline, trailing: &any_chars, ..Default::default() },
)
.unwrap();
}
}
proptest! {
#![proptest_config(NO_PERSIST)]
#[test]
fn test_line_comment(
line_comment_prefix in r#"//"#,
line_comment_content in r#"(|[^\n][^\n]*)"#,
more_lines_or_eof in r#"(\n\PC*)?"#,
) {
test_regex(
*LINE_COMMENT_SLASHES,
RegexTest {
matches: &line_comment_prefix,
next_regex: Some(&*LINE_COMMENT),
next_matches: &line_comment_content,
trailing: &more_lines_or_eof,
..Default::default()
},
)
.unwrap();
}
}
proptest! {
#![proptest_config(NO_PERSIST)]
#[test]
fn test_empty_line_comment(
line_comment_prefix in r#"//"#,
more_lines_or_eof in r#"(\n\PC*)?"#,
) {
test_regex(
*LINE_COMMENT_SLASHES,
RegexTest {
matches: &line_comment_prefix,
next_regex: Some(&*LINE_COMMENT),
next_matches: "",
trailing: &more_lines_or_eof,
..Default::default()
},
)
.unwrap();
}
}
proptest! {
#![proptest_config(NO_PERSIST)]
#[test]
fn test_block_comment(
block_comment_content in r#"([^*]|([*][^*/]))*"#,
optional_trailing_content in r#"\PC*"#,
) {
test_regex(
*OPEN_BLOCK_COMMENT,
RegexTest {
matches: "/*",
next_regex: Some(&*BLOCK_COMMENT),
next_matches: &block_comment_content,
next_suffix: "*/",
trailing: &optional_trailing_content,
..Default::default()
},
)
.unwrap();
}
}
proptest! {
#![proptest_config(NO_PERSIST)]
#[test]
fn test_empty_block_comment(
optional_trailing_content in r#"\PC*"#,
) {
test_regex(
*OPEN_BLOCK_COMMENT,
RegexTest {
matches: "/*",
next_regex: Some(&*BLOCK_COMMENT),
next_matches: "",
next_suffix: "*/",
trailing: &optional_trailing_content,
..Default::default()
},
)
.unwrap();
}
}
proptest! {
#![proptest_config(NO_PERSIST)]
#[test]
fn test_property_name(
propname in r#"[\w$&&[^\d]][\w$]*"#,
whitespace_to_colon in r#"[\s&&[^\n]]*:"#,
trailing_content in r#"\PC+"#,
) {
test_regex(
*UNQUOTED_PROPERTY_NAME_AND_COLON,
RegexTest {
matches: &propname,
suffix: &whitespace_to_colon,
trailing: &trailing_content,
..Default::default()
},
)
.unwrap();
}
}
proptest! {
#![proptest_config(EXTRA_CASES_NO_PERSIST)]
#[test]
fn bad_property_name(
propname in r#"[0-9][\w&&[^0-9eExX]][\w$]*"#,
whitespace_to_colon in r#"[\s&&[^\n]]*:"#,
trailing_content in r#"\PC+"#,
) {
test_regex(
*UNQUOTED_PROPERTY_NAME_AND_COLON,
RegexTest {
error: Some("capture failed"),
matches: &propname,
suffix: &whitespace_to_colon,
trailing: &trailing_content,
..Default::default()
},
)
.unwrap();
}
}
proptest! {
#![proptest_config(EXTRA_CASES_NO_PERSIST)]
#[test]
fn bad_property_name_captures_number_first(
propname in r#"[0-9]\$[\w$]*"#,
whitespace_to_colon in r#"[\s&&[^\n]]*:"#,
trailing_content in r#"\PC+"#,
) {
test_regex(
*UNQUOTED_PROPERTY_NAME_AND_COLON,
RegexTest {
error: Some(
&format!("capture group {} did not match",
*UNQUOTED_PROPERTY_NAME_AND_COLON)
),
matches: &propname,
suffix: &whitespace_to_colon,
trailing: &trailing_content,
..Default::default()
},
)
.unwrap();
}
}
proptest! {
#![proptest_config(NO_PERSIST)]
#[test]
fn test_single_quoted_string(
single_quote in r#"'"#,
single_quoted_string in r#"(([^'\\\n])|(\\')|(\\\n)|(\\\\))*"#,
non_literal_trailing_content in r#"\s*[,:/\]\}]"#,
) {
test_regex(
*OPEN_QUOTE,
RegexTest {
matches: &single_quote,
next_regex: Some(&*SINGLE_QUOTED),
next_matches: &single_quoted_string,
next_suffix: &single_quote,
trailing: &non_literal_trailing_content,
..Default::default()
},
)
.unwrap();
}
}
proptest! {
#![proptest_config(NO_PERSIST)]
#[test]
fn test_double_quoted_string(
double_quote in r#"""#,
double_quoted_string in r#"(([^"\\\n])|(\\")|(\\\n)|(\\\\))*"#,
non_literal_trailing_content in r#"\s*[,:/\]\}]?\PC*"#,
) {
test_regex(
*OPEN_QUOTE,
RegexTest {
matches: &double_quote,
next_regex: Some(&*DOUBLE_QUOTED),
next_matches: &double_quoted_string,
next_suffix: &double_quote,
trailing: &non_literal_trailing_content,
..Default::default()
},
)
.unwrap();
}
}
proptest! {
#![proptest_config(NO_PERSIST)]
#[test]
fn test_non_string_primitive(
non_string_primitive in
concat!(
r#"(null|true|false)|([-+]?(NaN|Infinity|(0[xX][0-9a-fA-F]+)"#,
r#"|((0|([1-9][0-9]*))?\.[0-9]+[eE][+-]?[0-9]+)|((0|([1-9][0-9]*))?\.[0-9]+)|((0|([1-9][0-9]*))\.?)))"#
),
ends_non_string_primitive in r#"(|([\s,\]\}]\PC*))"#,
) {
test_regex(
*NON_STRING_PRIMITIVE,
RegexTest {
matches: &non_string_primitive,
trailing: &ends_non_string_primitive,
..Default::default()
}
)
.unwrap();
}
}
proptest! {
#![proptest_config(NO_PERSIST)]
#[test]
fn test_brace(
brace in r#"[\[\{\}\]]"#,
any_chars in r#"\PC*"#,
) {
test_regex(
*BRACE,
RegexTest { matches: &brace, trailing: &any_chars, ..Default::default() },
)
.unwrap();
}
}
proptest! {
#![proptest_config(NO_PERSIST)]
#[test]
fn test_comma(
comma in r#","#,
any_chars in r#"\PC*"#,
) {
test_regex(
*COMMA,
RegexTest { matches: &comma, trailing: &any_chars, ..Default::default() },
)
.unwrap();
}
}
proptest! {
#![proptest_config(NO_PERSIST)]
#[test]
fn test_colon(
colon in r#":"#,
any_chars in r#"\PC*"#,
) {
test_capture(
&*COLON,
None,
RegexTest { matches: &colon, trailing: &any_chars, ..Default::default() },
)
.unwrap();
}
}
#[test]
fn test_regex_line_comment() {
test_regex(
*LINE_COMMENT_SLASHES,
RegexTest {
matches: "//",
next_regex: Some(&*LINE_COMMENT),
next_matches: " some line comment",
trailing: "",
..Default::default()
},
)
.unwrap();
test_regex(
*LINE_COMMENT_SLASHES,
RegexTest {
matches: "//",
next_regex: Some(&*LINE_COMMENT),
next_matches: " some line comment",
trailing: "\n more lines",
..Default::default()
},
)
.unwrap();
test_regex(
*LINE_COMMENT_SLASHES,
RegexTest {
matches: "//",
next_regex: Some(&*LINE_COMMENT),
trailing: "\nan empty line comment",
..Default::default()
},
)
.unwrap();
test_regex(
*LINE_COMMENT_SLASHES,
RegexTest {
matches: "//",
next_regex: Some(&*LINE_COMMENT),
next_matches: "/\t some doc comment",
trailing: "\nmultiple lines\nare here\n",
..Default::default()
},
)
.unwrap();
}
#[test]
fn test_regex_block_comment() {
test_regex(
*OPEN_BLOCK_COMMENT,
RegexTest {
matches: "/*",
next_regex: Some(&*BLOCK_COMMENT),
next_matches: " this is a single line block comment ",
next_suffix: "*/",
trailing: "\n\nproperty: ignored",
..Default::default()
},
)
.unwrap();
test_regex(
*OPEN_BLOCK_COMMENT,
RegexTest {
matches: "/*",
next_regex: Some(&*BLOCK_COMMENT),
next_matches: " this is a
multiline block comment",
next_suffix: "*/",
trailing: "\n\nproperty: ignored",
..Default::default()
},
)
.unwrap();
test_regex(
*OPEN_BLOCK_COMMENT,
RegexTest {
matches: "/*",
next_regex: Some(&*BLOCK_COMMENT),
next_matches: "",
next_suffix: "*/",
trailing: " to test an empty block comment",
..Default::default()
},
)
.unwrap();
}
#[test]
fn test_regex_non_string_primitive() {
test_regex(*NON_STRING_PRIMITIVE, RegexTest { matches: "null", ..Default::default() })
.unwrap();
test_regex(
*NON_STRING_PRIMITIVE,
RegexTest { matches: "NULL", error: Some("capture failed"), ..Default::default() },
)
.unwrap();
test_regex(
*NON_STRING_PRIMITIVE,
RegexTest { matches: "nullify", error: Some("capture failed"), ..Default::default() },
)
.unwrap();
test_regex(*NON_STRING_PRIMITIVE, RegexTest { matches: "true", ..Default::default() })
.unwrap();
test_regex(
*NON_STRING_PRIMITIVE,
RegexTest { matches: "True", error: Some("capture failed"), ..Default::default() },
)
.unwrap();
test_regex(
*NON_STRING_PRIMITIVE,
RegexTest { matches: "truest", error: Some("capture failed"), ..Default::default() },
)
.unwrap();
test_regex(*NON_STRING_PRIMITIVE, RegexTest { matches: "false", ..Default::default() })
.unwrap();
for prefix in &["", "-", "+"] {
for exp_prefix in &["", "-", "+"] {
test_regex(
*NON_STRING_PRIMITIVE,
RegexTest {
matches: &(prefix.to_string() + "123e" + exp_prefix + "456"),
..Default::default()
},
)
.unwrap();
test_regex(
*NON_STRING_PRIMITIVE,
RegexTest {
matches: &(prefix.to_string() + "123E" + exp_prefix + "456"),
..Default::default()
},
)
.unwrap();
}
test_regex(
*NON_STRING_PRIMITIVE,
RegexTest { matches: &(prefix.to_string() + "0x1a2b3e4f"), ..Default::default() },
)
.unwrap();
test_regex(
*NON_STRING_PRIMITIVE,
RegexTest { matches: &(prefix.to_string() + "0X1a2b3e4f"), ..Default::default() },
)
.unwrap();
test_regex(
*NON_STRING_PRIMITIVE,
RegexTest { matches: &(prefix.to_string() + "0x1A2B3E4F"), ..Default::default() },
)
.unwrap();
test_regex(
*NON_STRING_PRIMITIVE,
RegexTest { matches: &(prefix.to_string() + "0X1a2B3e4F"), ..Default::default() },
)
.unwrap();
test_regex(
*NON_STRING_PRIMITIVE,
RegexTest {
matches: &(prefix.to_string() + "0x1a2b3e4fg"),
error: Some("capture failed"),
..Default::default()
},
)
.unwrap();
test_regex(
*NON_STRING_PRIMITIVE,
RegexTest {
matches: &(prefix.to_string() + "0X"),
error: Some("capture failed"),
..Default::default()
},
)
.unwrap();
test_regex(*NON_STRING_PRIMITIVE, RegexTest { matches: "NaN", ..Default::default() })
.unwrap();
test_regex(
*NON_STRING_PRIMITIVE,
RegexTest { matches: "NAN", error: Some("capture failed"), ..Default::default() },
)
.unwrap();
test_regex(
*NON_STRING_PRIMITIVE,
RegexTest { matches: "NaN0", error: Some("capture failed"), ..Default::default() },
)
.unwrap();
test_regex(
*NON_STRING_PRIMITIVE,
RegexTest { matches: "Infinity", ..Default::default() },
)
.unwrap();
test_regex(
*NON_STRING_PRIMITIVE,
RegexTest {
matches: "infinity",
error: Some("capture failed"),
..Default::default()
},
)
.unwrap();
test_regex(
*NON_STRING_PRIMITIVE,
RegexTest {
matches: "Infinity_",
error: Some("capture failed"),
..Default::default()
},
)
.unwrap();
test_regex(
*NON_STRING_PRIMITIVE,
RegexTest { matches: &(prefix.to_string() + "0"), ..Default::default() },
)
.unwrap();
test_regex(
*NON_STRING_PRIMITIVE,
RegexTest {
matches: &(prefix.to_string() + "1234567890123456789012345678901234567890"),
..Default::default()
},
)
.unwrap();
test_regex(
*NON_STRING_PRIMITIVE,
RegexTest { matches: &(prefix.to_string() + "12345.67890"), ..Default::default() },
)
.unwrap();
test_regex(
*NON_STRING_PRIMITIVE,
RegexTest { matches: &(prefix.to_string() + ".67890"), ..Default::default() },
)
.unwrap();
test_regex(
*NON_STRING_PRIMITIVE,
RegexTest { matches: &(prefix.to_string() + "12345."), ..Default::default() },
)
.unwrap();
}
}
#[test]
fn test_regex_unquoted_property_name() {
test_regex(
*UNQUOTED_PROPERTY_NAME_AND_COLON,
RegexTest {
matches: "propname",
suffix: ":",
trailing: " 'some property value',",
..Default::default()
},
)
.unwrap();
test_regex(
*UNQUOTED_PROPERTY_NAME_AND_COLON,
RegexTest {
matches: "propname",
suffix: " :",
trailing: " 'some property value',",
..Default::default()
},
)
.unwrap();
test_regex(
*UNQUOTED_PROPERTY_NAME_AND_COLON,
RegexTest {
error: Some("capture failed"),
matches: "99propname",
suffix: ":",
trailing: " 'property names do not start with digits,",
..Default::default()
},
)
.unwrap();
}
#[test]
fn test_regex_string() {
test_regex(
*OPEN_QUOTE,
RegexTest {
matches: "'",
next_regex: Some(&*SINGLE_QUOTED),
next_matches: "this is a simple single-quoted string",
next_suffix: "'",
trailing: "",
..Default::default()
},
)
.unwrap();
test_regex(
*OPEN_QUOTE,
RegexTest {
matches: "'",
next_regex: Some(&*SINGLE_QUOTED),
next_matches: " this is a \\
multiline \"text\" string",
next_suffix: "'",
trailing: ", end of value",
..Default::default()
},
)
.unwrap();
test_regex(
*OPEN_QUOTE,
RegexTest {
matches: "\"",
next_regex: Some(&*DOUBLE_QUOTED),
next_matches: "this is a simple double-quoted string",
next_suffix: "\"",
trailing: "",
..Default::default()
},
)
.unwrap();
test_regex(
*OPEN_QUOTE,
RegexTest {
matches: "\"",
next_regex: Some(&*DOUBLE_QUOTED),
next_matches: " this is a \\
multiline 'text' string with escaped \\\" double-quote",
next_suffix: "\"",
trailing: ", end of value",
..Default::default()
},
)
.unwrap();
test_regex(
*OPEN_QUOTE,
RegexTest {
matches: "\"",
next_regex: Some(&*DOUBLE_QUOTED),
next_matches: "",
next_suffix: "\"",
trailing: ", to test empty string",
..Default::default()
},
)
.unwrap();
}
#[test]
fn test_regex_braces() {
test_regex(*BRACE, RegexTest { matches: "[", trailing: " 1234 ]", ..Default::default() })
.unwrap();
test_regex(*BRACE, RegexTest { matches: "[", trailing: "true]", ..Default::default() })
.unwrap();
test_regex(
*BRACE,
RegexTest { matches: "[", trailing: "\n 'item',\n 'item2'\n]", ..Default::default() },
)
.unwrap();
test_regex(*BRACE, RegexTest { matches: "]", trailing: ",[1234],", ..Default::default() })
.unwrap();
test_regex(*BRACE, RegexTest { matches: "{", trailing: " 1234 }", ..Default::default() })
.unwrap();
test_regex(*BRACE, RegexTest { matches: "{", trailing: "true}", ..Default::default() })
.unwrap();
test_regex(
*BRACE,
RegexTest { matches: "{", trailing: "\n 'item',\n 'item2'\n}", ..Default::default() },
)
.unwrap();
test_regex(*BRACE, RegexTest { matches: "}", trailing: ",{1234},", ..Default::default() })
.unwrap();
}
#[test]
fn test_regex_command_colon() {
test_regex(
*COMMA,
RegexTest { matches: ",", trailing: "\n 'item',\n 'item2'\n}", ..Default::default() },
)
.unwrap();
test_regex(*COMMA, RegexTest { matches: ",", trailing: "{1234},", ..Default::default() })
.unwrap();
test_capture(&*COLON, None, RegexTest { matches: ":", ..Default::default() }).unwrap();
test_capture(&*COLON, None, RegexTest { matches: " \t :", ..Default::default() }).unwrap();
test_capture(
&*COLON,
None,
RegexTest { error: Some("capture failed"), matches: " \n :", ..Default::default() },
)
.unwrap();
}
#[test]
fn test_enums() {
let line_comment = Comment::Line("a line comment".to_owned());
assert!(line_comment.is_line());
let block_comment =
Comment::Block { lines: vec!["a block".into(), "comment".into()], align: true };
assert!(block_comment.is_block());
let primitive_value = Primitive::new("l33t".to_owned(), vec![]);
assert!(primitive_value.is_primitive());
let array_value = Array::new(vec![]);
assert!(array_value.is_array());
let object_value = Object::new(vec![]);
assert!(object_value.is_object());
}
#[test]
fn test_document_exceeds_nesting_limit() {
let mut parser = Parser::new(&None);
parser.set_nesting_limit(5);
let good_buffer = r##"{
list_of_lists_of_lists: [[[]]]
}"##;
parser.parse_from_location(&good_buffer, 8, 15).expect("should NOT exceed nesting limit");
let bad_buffer = r##"{
list_of_lists_of_lists: [[[[]]]]
}"##;
let err = parser
.parse_from_location(&bad_buffer, 8, 15)
.expect_err("should exceed nesting limit");
match err {
Error::Parse(_, message) => {
assert_eq!(
message,
r##"The given JSON5 document exceeds the parser's nesting limit of 5:
list_of_lists_of_lists: [[[[]]]]
^"##
)
}
_ => panic!("expected a parser error"),
}
}
#[test]
fn test_parse_from_location_error_location() {
let filename = Some("mixed_content.md".to_string());
let mixed_document = r##"
Mixed Content Doc
=================
This is a document with embedded JSON5 content.
```json5
json5_value = {
// The next line should generate a parser error
999,
}
```
End of mixed content document.
"##;
let json5_slice =
&mixed_document[mixed_document.find("{").unwrap()..mixed_document.find("}").unwrap()];
let mut parser = Parser::new(&filename);
let err = parser
.parse_from_location(json5_slice, 8, 15)
.expect_err("check error message for location");
match err {
Error::Parse(Some(loc), message) => {
assert_eq!(loc.file, Some("mixed_content.md".to_owned()));
assert_eq!(loc.line, 10);
assert_eq!(loc.col, 5);
assert_eq!(
message,
r##"Object values require property names:
999,
^~~"##
)
}
_ => panic!("expected a parser error"),
}
}
#[test]
fn test_doc_with_nulls() {
let mut parser = Parser::new(&None);
let buffer = "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[////[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]";
let err = parser.parse(&buffer).expect_err("should fail");
match err {
Error::Parse(_, message) => {
assert!(message.starts_with("Mismatched braces in the document:"));
}
_ => panic!("expected a parser error"),
}
}
}