1use alloc::{
2 format,
3 string::{String, ToString},
4 vec,
5 vec::Vec,
6};
78use crate::{ast, hir};
910/// This error type encompasses any error that can be returned by this crate.
11///
12/// This error type is marked as `non_exhaustive`. This means that adding a
13/// new variant is not considered a breaking change.
14#[non_exhaustive]
15#[derive(Clone, Debug, Eq, PartialEq)]
16pub enum Error {
17/// An error that occurred while translating concrete syntax into abstract
18 /// syntax (AST).
19Parse(ast::Error),
20/// An error that occurred while translating abstract syntax into a high
21 /// level intermediate representation (HIR).
22Translate(hir::Error),
23}
2425impl From<ast::Error> for Error {
26fn from(err: ast::Error) -> Error {
27 Error::Parse(err)
28 }
29}
3031impl From<hir::Error> for Error {
32fn from(err: hir::Error) -> Error {
33 Error::Translate(err)
34 }
35}
3637#[cfg(feature = "std")]
38impl std::error::Error for Error {}
3940impl core::fmt::Display for Error {
41fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
42match *self {
43 Error::Parse(ref x) => x.fmt(f),
44 Error::Translate(ref x) => x.fmt(f),
45 }
46 }
47}
4849/// A helper type for formatting nice error messages.
50///
51/// This type is responsible for reporting regex parse errors in a nice human
52/// readable format. Most of its complexity is from interspersing notational
53/// markers pointing out the position where an error occurred.
54#[derive(Debug)]
55pub struct Formatter<'e, E> {
56/// The original regex pattern in which the error occurred.
57pattern: &'e str,
58/// The error kind. It must impl fmt::Display.
59err: &'e E,
60/// The primary span of the error.
61span: &'e ast::Span,
62/// An auxiliary and optional span, in case the error needs to point to
63 /// two locations (e.g., when reporting a duplicate capture group name).
64aux_span: Option<&'e ast::Span>,
65}
6667impl<'e> From<&'e ast::Error> for Formatter<'e, ast::ErrorKind> {
68fn from(err: &'e ast::Error) -> Self {
69 Formatter {
70 pattern: err.pattern(),
71 err: err.kind(),
72 span: err.span(),
73 aux_span: err.auxiliary_span(),
74 }
75 }
76}
7778impl<'e> From<&'e hir::Error> for Formatter<'e, hir::ErrorKind> {
79fn from(err: &'e hir::Error) -> Self {
80 Formatter {
81 pattern: err.pattern(),
82 err: err.kind(),
83 span: err.span(),
84 aux_span: None,
85 }
86 }
87}
8889impl<'e, E: core::fmt::Display> core::fmt::Display for Formatter<'e, E> {
90fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
91let spans = Spans::from_formatter(self);
92if self.pattern.contains('\n') {
93let divider = repeat_char('~', 79);
9495writeln!(f, "regex parse error:")?;
96writeln!(f, "{}", divider)?;
97let notated = spans.notate();
98write!(f, "{}", notated)?;
99writeln!(f, "{}", divider)?;
100// If we have error spans that cover multiple lines, then we just
101 // note the line numbers.
102if !spans.multi_line.is_empty() {
103let mut notes = vec![];
104for span in &spans.multi_line {
105 notes.push(format!(
106"on line {} (column {}) through line {} (column {})",
107 span.start.line,
108 span.start.column,
109 span.end.line,
110 span.end.column - 1
111));
112 }
113writeln!(f, "{}", notes.join("\n"))?;
114 }
115write!(f, "error: {}", self.err)?;
116 } else {
117writeln!(f, "regex parse error:")?;
118let notated = Spans::from_formatter(self).notate();
119write!(f, "{}", notated)?;
120write!(f, "error: {}", self.err)?;
121 }
122Ok(())
123 }
124}
125126/// This type represents an arbitrary number of error spans in a way that makes
127/// it convenient to notate the regex pattern. ("Notate" means "point out
128/// exactly where the error occurred in the regex pattern.")
129///
130/// Technically, we can only ever have two spans given our current error
131/// structure. However, after toiling with a specific algorithm for handling
132/// two spans, it became obvious that an algorithm to handle an arbitrary
133/// number of spans was actually much simpler.
134struct Spans<'p> {
135/// The original regex pattern string.
136pattern: &'p str,
137/// The total width that should be used for line numbers. The width is
138 /// used for left padding the line numbers for alignment.
139 ///
140 /// A value of `0` means line numbers should not be displayed. That is,
141 /// the pattern is itself only one line.
142line_number_width: usize,
143/// All error spans that occur on a single line. This sequence always has
144 /// length equivalent to the number of lines in `pattern`, where the index
145 /// of the sequence represents a line number, starting at `0`. The spans
146 /// in each line are sorted in ascending order.
147by_line: Vec<Vec<ast::Span>>,
148/// All error spans that occur over one or more lines. That is, the start
149 /// and end position of the span have different line numbers. The spans are
150 /// sorted in ascending order.
151multi_line: Vec<ast::Span>,
152}
153154impl<'p> Spans<'p> {
155/// Build a sequence of spans from a formatter.
156fn from_formatter<'e, E: core::fmt::Display>(
157 fmter: &'p Formatter<'e, E>,
158 ) -> Spans<'p> {
159let mut line_count = fmter.pattern.lines().count();
160// If the pattern ends with a `\n` literal, then our line count is
161 // off by one, since a span can occur immediately after the last `\n`,
162 // which is consider to be an additional line.
163if fmter.pattern.ends_with('\n') {
164 line_count += 1;
165 }
166let line_number_width =
167if line_count <= 1 { 0 } else { line_count.to_string().len() };
168let mut spans = Spans {
169 pattern: &fmter.pattern,
170 line_number_width,
171 by_line: vec![vec![]; line_count],
172 multi_line: vec![],
173 };
174 spans.add(fmter.span.clone());
175if let Some(span) = fmter.aux_span {
176 spans.add(span.clone());
177 }
178 spans
179 }
180181/// Add the given span to this sequence, putting it in the right place.
182fn add(&mut self, span: ast::Span) {
183// This is grossly inefficient since we sort after each add, but right
184 // now, we only ever add two spans at most.
185if span.is_one_line() {
186let i = span.start.line - 1; // because lines are 1-indexed
187self.by_line[i].push(span);
188self.by_line[i].sort();
189 } else {
190self.multi_line.push(span);
191self.multi_line.sort();
192 }
193 }
194195/// Notate the pattern string with carents (`^`) pointing at each span
196 /// location. This only applies to spans that occur within a single line.
197fn notate(&self) -> String {
198let mut notated = String::new();
199for (i, line) in self.pattern.lines().enumerate() {
200if self.line_number_width > 0 {
201 notated.push_str(&self.left_pad_line_number(i + 1));
202 notated.push_str(": ");
203 } else {
204 notated.push_str(" ");
205 }
206 notated.push_str(line);
207 notated.push('\n');
208if let Some(notes) = self.notate_line(i) {
209 notated.push_str(¬es);
210 notated.push('\n');
211 }
212 }
213 notated
214 }
215216/// Return notes for the line indexed at `i` (zero-based). If there are no
217 /// spans for the given line, then `None` is returned. Otherwise, an
218 /// appropriately space padded string with correctly positioned `^` is
219 /// returned, accounting for line numbers.
220fn notate_line(&self, i: usize) -> Option<String> {
221let spans = &self.by_line[i];
222if spans.is_empty() {
223return None;
224 }
225let mut notes = String::new();
226for _ in 0..self.line_number_padding() {
227 notes.push(' ');
228 }
229let mut pos = 0;
230for span in spans {
231for _ in pos..(span.start.column - 1) {
232 notes.push(' ');
233 pos += 1;
234 }
235let note_len = span.end.column.saturating_sub(span.start.column);
236for _ in 0..core::cmp::max(1, note_len) {
237 notes.push('^');
238 pos += 1;
239 }
240 }
241Some(notes)
242 }
243244/// Left pad the given line number with spaces such that it is aligned with
245 /// other line numbers.
246fn left_pad_line_number(&self, n: usize) -> String {
247let n = n.to_string();
248let pad = self.line_number_width.checked_sub(n.len()).unwrap();
249let mut result = repeat_char(' ', pad);
250 result.push_str(&n);
251 result
252 }
253254/// Return the line number padding beginning at the start of each line of
255 /// the pattern.
256 ///
257 /// If the pattern is only one line, then this returns a fixed padding
258 /// for visual indentation.
259fn line_number_padding(&self) -> usize {
260if self.line_number_width == 0 {
2614
262} else {
2632 + self.line_number_width
264 }
265 }
266}
267268fn repeat_char(c: char, count: usize) -> String {
269 core::iter::repeat(c).take(count).collect()
270}
271272#[cfg(test)]
273mod tests {
274use alloc::string::ToString;
275276use crate::ast::parse::Parser;
277278fn assert_panic_message(pattern: &str, expected_msg: &str) {
279let result = Parser::new().parse(pattern);
280match result {
281Ok(_) => {
282panic!("regex should not have parsed");
283 }
284Err(err) => {
285assert_eq!(err.to_string(), expected_msg.trim());
286 }
287 }
288 }
289290// See: https://github.com/rust-lang/regex/issues/464
291#[test]
292fn regression_464() {
293let err = Parser::new().parse("a{\n").unwrap_err();
294// This test checks that the error formatter doesn't panic.
295assert!(!err.to_string().is_empty());
296 }
297298// See: https://github.com/rust-lang/regex/issues/545
299#[test]
300fn repetition_quantifier_expects_a_valid_decimal() {
301 assert_panic_message(
302r"\\u{[^}]*}",
303r#"
304regex parse error:
305 \\u{[^}]*}
306 ^
307error: repetition quantifier expects a valid decimal
308"#,
309 );
310 }
311}