1use pest::iterators::Pair;
2use pest::Parser as P;
3use pest_derive::Parser;
4use serde::de;
5use serde::forward_to_deserialize_any;
6use std::char;
7use std::collections::VecDeque;
8use std::f64;
9use std::io::Read;
10
11use crate::error::{self, Error, Result};
12
13#[derive(Parser)]
14#[grammar_inline = r#"
15// see https://spec.json5.org/#syntactic-grammar and
16// https://spec.json5.org/#lexical-grammar
17
18COMMENT = _{ "/*" ~ (!"*/" ~ ANY)* ~ "*/" | "//" ~ (!line_terminator ~ ANY)* }
19
20WHITESPACE = _{
21 "\u{0009}" |
22 "\u{000B}" |
23 "\u{000C}" |
24 "\u{0020}" |
25 "\u{00A0}" |
26 "\u{FEFF}" |
27 SPACE_SEPARATOR |
28 line_terminator
29}
30
31array = { "[" ~ "]" | "[" ~ value ~ ("," ~ value)* ~ ","? ~ "]" }
32
33boolean = @{ "true" | "false" }
34
35char_escape_sequence = @{ single_escape_char | non_escape_char }
36
37char_literal = @{ !("\\" | line_terminator) ~ ANY }
38
39decimal_integer_literal = _{ "0" | ASCII_NONZERO_DIGIT ~ ASCII_DIGIT* }
40
41decimal_literal = _{
42 decimal_integer_literal ~ "." ~ ASCII_DIGIT* ~ exponent_part? |
43 "." ~ ASCII_DIGIT+~ exponent_part? |
44 decimal_integer_literal ~ exponent_part?
45}
46
47double_quote_char = _{
48 "\\" ~ escape_sequence |
49 line_continuation |
50 !"\"" ~ char_literal
51}
52
53escape_char = _{ single_escape_char | ASCII_DIGIT | "x" | "u" }
54
55escape_sequence = _{
56 char_escape_sequence |
57 nul_escape_sequence |
58 "x" ~ hex_escape_sequence |
59 "u" ~ unicode_escape_sequence
60}
61
62exponent_part = _{ ^"e" ~ ("+" | "-")? ~ ASCII_DIGIT+ }
63
64hex_escape_sequence = @{ ASCII_HEX_DIGIT{2} }
65
66hex_integer_literal = _{ ^"0x" ~ ASCII_HEX_DIGIT+ }
67
68identifier = ${ identifier_start ~ identifier_part* }
69
70identifier_part = _{
71 identifier_start |
72 &(
73 NONSPACING_MARK |
74 DIACRITIC | // not sure about this, spec says "Combining spacing mark (Mc)"
75 DECIMAL_NUMBER |
76 CONNECTOR_PUNCTUATION |
77 "\u{200C}" |
78 "\u{200D}"
79 ) ~ char_literal
80}
81
82identifier_start = _{
83 &(unicode_letter | "$" | "_") ~ char_literal |
84 "\\u" ~ unicode_escape_sequence
85}
86
87key = _{ identifier | string }
88
89line_continuation = _{ "\\" ~ line_terminator_sequence }
90
91line_terminator = _{ "\u{000A}" | "\u{000D}" | "\u{2028}" | "\u{2029}" }
92
93line_terminator_sequence = _{ "\u{000D}" ~ "\u{000A}" | line_terminator }
94
95non_escape_char = _{ !(escape_char | line_terminator) ~ ANY }
96
97nul_escape_sequence = @{ "0" }
98
99null = @{ "null" }
100
101number = @{ ("+" | "-")? ~ numeric_literal }
102
103numeric_literal = _{
104 hex_integer_literal |
105 decimal_literal |
106 "Infinity" |
107 "NaN"
108}
109
110object = { "{" ~ "}" | "{" ~ pair ~ ("," ~ pair)* ~ ","? ~ "}" }
111
112pair = _{ key ~ ":" ~ value }
113
114single_escape_char = _{ "'" | "\"" | "\\" | "b" | "f" | "n" | "r" | "t" | "v" }
115
116single_quote_char = _{
117 "\\" ~ escape_sequence |
118 line_continuation |
119 !"'" ~ char_literal
120}
121
122string = ${ "\"" ~ double_quote_char* ~ "\"" | "'" ~ single_quote_char* ~ "'" }
123
124text = _{ SOI ~ value ~ EOI }
125
126unicode_escape_sequence = @{ ASCII_HEX_DIGIT{4} }
127
128unicode_letter = _{
129 UPPERCASE_LETTER |
130 LOWERCASE_LETTER |
131 TITLECASE_LETTER |
132 MODIFIER_LETTER |
133 OTHER_LETTER |
134 LETTER_NUMBER
135}
136
137value = _{ null | boolean | string | number | object | array }
138"#]
139struct Parser;
140
141pub fn from_str<'a, T>(s: &'a str) -> Result<T>
144where
145 T: de::Deserialize<'a>,
146{
147 let mut deserializer = Deserializer::from_str(s)?;
148 T::deserialize(&mut deserializer)
149}
150
151pub fn from_slice<'a, T>(s: &'a [u8]) -> Result<T>
154where
155 T: de::Deserialize<'a>,
156{
157 let valid_utf8 = std::str::from_utf8(s)?;
158 let mut deserializer = Deserializer::from_str(valid_utf8)?;
159 T::deserialize(&mut deserializer)
160}
161
162pub fn from_reader<R, T>(mut reader: R) -> Result<T>
165where
166 T: serde::de::DeserializeOwned,
167 R: Read,
168{
169 let mut data = String::default();
170 reader.read_to_string(&mut data)?;
171 from_str(&data)
172}
173
174pub struct Deserializer<'de> {
176 pair: Option<Pair<'de, Rule>>,
177}
178
179impl<'de> Deserializer<'de> {
180 #[allow(clippy::should_implement_trait)]
183 pub fn from_str(input: &'de str) -> Result<Self> {
184 let pair = Parser::parse(Rule::text, input)?.next().unwrap();
185 Ok(Deserializer::from_pair(pair))
186 }
187
188 fn from_pair(pair: Pair<'de, Rule>) -> Self {
189 Deserializer { pair: Some(pair) }
190 }
191}
192
193impl<'de> de::Deserializer<'de> for &mut Deserializer<'de> {
194 type Error = Error;
195
196 fn deserialize_any<V>(self, visitor: V) -> Result<V::Value>
197 where
198 V: de::Visitor<'de>,
199 {
200 let pair = self.pair.take().unwrap();
201 let span = pair.as_span();
202 let mut res = (move || match pair.as_rule() {
203 Rule::null => visitor.visit_unit(),
204 Rule::boolean => visitor.visit_bool(parse_bool(&pair)),
205 Rule::string | Rule::identifier => visitor.visit_string(parse_string(pair)?),
206 Rule::number => {
207 if is_int(pair.as_str()) {
208 visitor.visit_i64(parse_integer(&pair)?)
209 } else {
210 visitor.visit_f64(parse_number(&pair)?)
211 }
212 }
213 Rule::array => visitor.visit_seq(Seq::new(pair)),
214 Rule::object => visitor.visit_map(Map::new(pair)),
215 _ => unreachable!(),
216 })();
217 error::set_location(&mut res, &span);
218 res
219 }
220
221 fn deserialize_enum<V>(
222 self,
223 _name: &'static str,
224 _variants: &'static [&'static str],
225 visitor: V,
226 ) -> Result<V::Value>
227 where
228 V: de::Visitor<'de>,
229 {
230 let pair = self.pair.take().unwrap();
231 let span = pair.as_span();
232 let mut res = visitor.visit_enum(Enum { pair });
233 error::set_location(&mut res, &span);
234 res
235 }
236
237 fn deserialize_i8<V>(self, visitor: V) -> Result<V::Value>
240 where
241 V: de::Visitor<'de>,
242 {
243 let pair = self.pair.take().unwrap();
244 let span = pair.as_span();
245 let mut res = (move || visitor.visit_i8(parse_number(&pair)? as i8))();
246 error::set_location(&mut res, &span);
247 res
248 }
249
250 fn deserialize_i16<V>(self, visitor: V) -> Result<V::Value>
251 where
252 V: de::Visitor<'de>,
253 {
254 let pair = self.pair.take().unwrap();
255 let span = pair.as_span();
256 let mut res = (move || visitor.visit_i16(parse_number(&pair)? as i16))();
257 error::set_location(&mut res, &span);
258 res
259 }
260
261 fn deserialize_i32<V>(self, visitor: V) -> Result<V::Value>
262 where
263 V: de::Visitor<'de>,
264 {
265 let pair = self.pair.take().unwrap();
266 let span = pair.as_span();
267 let mut res = (move || visitor.visit_i32(parse_number(&pair)? as i32))();
268 error::set_location(&mut res, &span);
269 res
270 }
271
272 fn deserialize_i64<V>(self, visitor: V) -> Result<V::Value>
273 where
274 V: de::Visitor<'de>,
275 {
276 let pair = self.pair.take().unwrap();
277 let span = pair.as_span();
278 let mut res = (move || visitor.visit_i64(parse_number(&pair)? as i64))();
279 error::set_location(&mut res, &span);
280 res
281 }
282
283 fn deserialize_i128<V>(self, visitor: V) -> Result<V::Value>
284 where
285 V: de::Visitor<'de>,
286 {
287 let pair = self.pair.take().unwrap();
288 let span = pair.as_span();
289 let mut res = (move || visitor.visit_i128(parse_number(&pair)? as i128))();
290 error::set_location(&mut res, &span);
291 res
292 }
293
294 fn deserialize_u8<V>(self, visitor: V) -> Result<V::Value>
295 where
296 V: de::Visitor<'de>,
297 {
298 let pair = self.pair.take().unwrap();
299 let span = pair.as_span();
300 let mut res = (move || visitor.visit_u8(parse_number(&pair)? as u8))();
301 error::set_location(&mut res, &span);
302 res
303 }
304
305 fn deserialize_u16<V>(self, visitor: V) -> Result<V::Value>
306 where
307 V: de::Visitor<'de>,
308 {
309 let pair = self.pair.take().unwrap();
310 let span = pair.as_span();
311 let mut res = (move || visitor.visit_u16(parse_number(&pair)? as u16))();
312 error::set_location(&mut res, &span);
313 res
314 }
315
316 fn deserialize_u32<V>(self, visitor: V) -> Result<V::Value>
317 where
318 V: de::Visitor<'de>,
319 {
320 let pair = self.pair.take().unwrap();
321 let span = pair.as_span();
322 let mut res = (move || visitor.visit_u32(parse_number(&pair)? as u32))();
323 error::set_location(&mut res, &span);
324 res
325 }
326
327 fn deserialize_u64<V>(self, visitor: V) -> Result<V::Value>
328 where
329 V: de::Visitor<'de>,
330 {
331 let pair = self.pair.take().unwrap();
332 let span = pair.as_span();
333 let mut res = (move || visitor.visit_u64(parse_number(&pair)? as u64))();
334 error::set_location(&mut res, &span);
335 res
336 }
337
338 fn deserialize_u128<V>(self, visitor: V) -> Result<V::Value>
339 where
340 V: de::Visitor<'de>,
341 {
342 let pair = self.pair.take().unwrap();
343 let span = pair.as_span();
344 let mut res = (move || visitor.visit_u128(parse_number(&pair)? as u128))();
345 error::set_location(&mut res, &span);
346 res
347 }
348
349 fn deserialize_f32<V>(self, visitor: V) -> Result<V::Value>
350 where
351 V: de::Visitor<'de>,
352 {
353 let pair = self.pair.take().unwrap();
354 let span = pair.as_span();
355 let mut res = (move || visitor.visit_f32(parse_number(&pair)? as f32))();
356 error::set_location(&mut res, &span);
357 res
358 }
359
360 fn deserialize_f64<V>(self, visitor: V) -> Result<V::Value>
361 where
362 V: de::Visitor<'de>,
363 {
364 let pair = self.pair.take().unwrap();
365 let span = pair.as_span();
366 let mut res = (move || visitor.visit_f64(parse_number(&pair)?))();
367 error::set_location(&mut res, &span);
368 res
369 }
370
371 fn deserialize_option<V>(self, visitor: V) -> Result<V::Value>
372 where
373 V: de::Visitor<'de>,
374 {
375 let pair = self.pair.take().unwrap();
376 let span = pair.as_span();
377 let mut res = match pair.as_rule() {
378 Rule::null => visitor.visit_none(),
379 _ => visitor.visit_some(&mut Deserializer::from_pair(pair)),
380 };
381 error::set_location(&mut res, &span);
382 res
383 }
384
385 fn deserialize_newtype_struct<V>(self, _name: &str, visitor: V) -> Result<V::Value>
386 where
387 V: de::Visitor<'de>,
388 {
389 let span = self.pair.as_ref().unwrap().as_span();
390 let mut res = visitor.visit_newtype_struct(self);
391 error::set_location(&mut res, &span);
392 res
393 }
394
395 forward_to_deserialize_any! {
396 bool char str string bytes byte_buf unit unit_struct seq
397 tuple tuple_struct map struct identifier ignored_any
398 }
399}
400
401fn parse_bool(pair: &Pair<'_, Rule>) -> bool {
402 match pair.as_str() {
403 "true" => true,
404 "false" => false,
405 _ => unreachable!(),
406 }
407}
408
409fn parse_string(pair: Pair<'_, Rule>) -> Result<String> {
410 let span = pair.as_span();
411 let mut res = pair
412 .into_inner()
413 .map(|component| match component.as_rule() {
414 Rule::char_literal => Ok(String::from(component.as_str())),
415 Rule::char_escape_sequence => Ok(parse_char_escape_sequence(&component)),
416 Rule::nul_escape_sequence => Ok(String::from("\u{0000}")),
417 Rule::hex_escape_sequence | Rule::unicode_escape_sequence => {
418 let hex_escape = parse_hex(component.as_str())?;
419 match char::from_u32(hex_escape) {
420 Some(s) => Ok(s.to_string()),
421 None => Err(de::Error::custom("error parsing hex prefix")),
422 }
423 }
424 _ => unreachable!(),
425 })
426 .collect();
427 error::set_location(&mut res, &span);
428 res
429}
430
431fn parse_char_escape_sequence(pair: &Pair<'_, Rule>) -> String {
432 String::from(match pair.as_str() {
433 "b" => "\u{0008}",
434 "f" => "\u{000C}",
435 "n" => "\n",
436 "r" => "\r",
437 "t" => "\t",
438 "v" => "\u{000B}",
439 c => c,
440 })
441}
442
443fn parse_number(pair: &Pair<'_, Rule>) -> Result<f64> {
444 match pair.as_str() {
445 "Infinity" => Ok(f64::INFINITY),
446 "-Infinity" => Ok(f64::NEG_INFINITY),
447 "NaN" | "-NaN" => Ok(f64::NAN),
448 s if is_hex_literal(s) => parse_hex(&s[2..]).map(f64::from),
449 s => {
450 if let Ok(r) = s.parse::<f64>() {
451 if r.is_finite() {
452 Ok(r)
453 } else {
454 Err(de::Error::custom("error parsing number: too large"))
455 }
456 } else {
457 Err(de::Error::custom("error parsing number"))
458 }
459 }
460 }
461}
462
463fn parse_integer(pair: &Pair<'_, Rule>) -> Result<i64> {
464 match pair.as_str() {
465 s if is_hex_literal(s) => Ok(parse_hex(&s[2..])? as i64),
466 s => s
467 .parse()
468 .map_err(|_| de::Error::custom("error parsing integer")),
469 }
470}
471
472fn is_int(s: &str) -> bool {
473 !s.contains('.')
474 && (is_hex_literal(s)
475 || (!s.contains('e')
476 && !s.contains('E')
477 && !s.contains("Infinity")
478 && !s.contains("NaN")))
479}
480
481fn parse_hex(s: &str) -> Result<u32> {
482 u32::from_str_radix(s, 16).map_err(|_| de::Error::custom("error parsing hex"))
483}
484
485fn is_hex_literal(s: &str) -> bool {
486 s.len() > 2 && (&s[..2] == "0x" || &s[..2] == "0X")
487}
488
489struct Seq<'de> {
490 pairs: VecDeque<Pair<'de, Rule>>,
491}
492
493impl<'de> Seq<'de> {
494 pub fn new(pair: Pair<'de, Rule>) -> Self {
495 Self {
496 pairs: pair.into_inner().collect(),
497 }
498 }
499}
500
501impl<'de> de::SeqAccess<'de> for Seq<'de> {
502 type Error = Error;
503
504 fn size_hint(&self) -> Option<usize> {
505 Some(self.pairs.len())
506 }
507
508 fn next_element_seed<T>(&mut self, seed: T) -> Result<Option<T::Value>>
509 where
510 T: de::DeserializeSeed<'de>,
511 {
512 if let Some(pair) = self.pairs.pop_front() {
513 seed.deserialize(&mut Deserializer::from_pair(pair))
514 .map(Some)
515 } else {
516 Ok(None)
517 }
518 }
519}
520
521struct Map<'de> {
522 pairs: VecDeque<Pair<'de, Rule>>,
523}
524
525impl<'de> Map<'de> {
526 pub fn new(pair: Pair<'de, Rule>) -> Self {
527 Self {
528 pairs: pair.into_inner().collect(),
529 }
530 }
531}
532
533impl<'de> de::MapAccess<'de> for Map<'de> {
534 type Error = Error;
535
536 fn size_hint(&self) -> Option<usize> {
537 Some(self.pairs.len() / 2)
538 }
539
540 fn next_key_seed<K>(&mut self, seed: K) -> Result<Option<K::Value>>
541 where
542 K: de::DeserializeSeed<'de>,
543 {
544 if let Some(pair) = self.pairs.pop_front() {
545 seed.deserialize(&mut Deserializer::from_pair(pair))
546 .map(Some)
547 } else {
548 Ok(None)
549 }
550 }
551
552 fn next_value_seed<V>(&mut self, seed: V) -> Result<V::Value>
553 where
554 V: de::DeserializeSeed<'de>,
555 {
556 seed.deserialize(&mut Deserializer::from_pair(
557 self.pairs.pop_front().unwrap(),
558 ))
559 }
560}
561
562struct Enum<'de> {
563 pair: Pair<'de, Rule>,
564}
565
566impl<'de> de::EnumAccess<'de> for Enum<'de> {
567 type Error = Error;
568 type Variant = Variant<'de>;
569
570 fn variant_seed<V>(self, seed: V) -> Result<(V::Value, Self::Variant)>
571 where
572 V: de::DeserializeSeed<'de>,
573 {
574 let span = self.pair.as_span();
575 let mut res = (move || match self.pair.as_rule() {
576 Rule::string => {
577 let tag = seed.deserialize(&mut Deserializer::from_pair(self.pair))?;
578 Ok((tag, Variant { pair: None }))
579 }
580 Rule::object => {
581 let mut pairs = self.pair.into_inner();
582
583 if let Some(tag_pair) = pairs.next() {
584 let tag = seed.deserialize(&mut Deserializer::from_pair(tag_pair))?;
585 Ok((tag, Variant { pair: pairs.next() }))
586 } else {
587 Err(de::Error::custom("expected a nonempty object"))
588 }
589 }
590 _ => Err(de::Error::custom("expected a string or an object")),
591 })();
592 error::set_location(&mut res, &span);
593 res
594 }
595}
596
597struct Variant<'de> {
598 pair: Option<Pair<'de, Rule>>,
599}
600
601impl<'de> de::VariantAccess<'de> for Variant<'de> {
602 type Error = Error;
603
604 fn unit_variant(self) -> Result<()> {
605 if let Some(pair) = self.pair {
606 serde::Deserialize::deserialize(&mut Deserializer::from_pair(pair))
607 } else {
608 Ok(())
609 }
610 }
611
612 fn newtype_variant_seed<T>(self, seed: T) -> Result<T::Value>
613 where
614 T: de::DeserializeSeed<'de>,
615 {
616 seed.deserialize(&mut Deserializer::from_pair(self.pair.unwrap()))
617 }
618
619 fn tuple_variant<V>(self, _len: usize, visitor: V) -> Result<V::Value>
620 where
621 V: de::Visitor<'de>,
622 {
623 match self.pair {
624 Some(pair) => match pair.as_rule() {
625 Rule::array => visitor.visit_seq(Seq::new(pair)),
626 _ => Err(de::Error::custom("expected an array")),
627 },
628 None => Err(de::Error::custom("expected an array")),
629 }
630 }
631
632 fn struct_variant<V>(self, _fields: &'static [&'static str], visitor: V) -> Result<V::Value>
633 where
634 V: de::Visitor<'de>,
635 {
636 match self.pair {
637 Some(pair) => match pair.as_rule() {
638 Rule::object => visitor.visit_map(Map::new(pair)),
639 _ => Err(de::Error::custom("expected an object")),
640 },
641 None => Err(de::Error::custom("expected an object")),
642 }
643 }
644}