serde_json5/
de.rs

1use pest::iterators::Pair;
2use pest::Parser as P;
3use pest_derive::Parser;
4use serde::de;
5use serde::forward_to_deserialize_any;
6use std::char;
7use std::collections::VecDeque;
8use std::f64;
9use std::io::Read;
10
11use crate::error::{self, Error, Result};
12
13#[derive(Parser)]
14#[grammar_inline = r#"
15// see https://spec.json5.org/#syntactic-grammar and
16// https://spec.json5.org/#lexical-grammar
17
18COMMENT = _{ "/*" ~ (!"*/" ~ ANY)* ~ "*/" | "//" ~ (!line_terminator ~ ANY)* }
19
20WHITESPACE = _{
21  "\u{0009}" |
22  "\u{000B}" |
23  "\u{000C}" |
24  "\u{0020}" |
25  "\u{00A0}" |
26  "\u{FEFF}" |
27  SPACE_SEPARATOR |
28  line_terminator
29}
30
31array = { "[" ~ "]" | "[" ~ value ~ ("," ~ value)* ~ ","? ~ "]" }
32
33boolean = @{ "true" | "false" }
34
35char_escape_sequence = @{ single_escape_char | non_escape_char }
36
37char_literal = @{ !("\\" | line_terminator) ~ ANY }
38
39decimal_integer_literal = _{ "0" | ASCII_NONZERO_DIGIT ~ ASCII_DIGIT* }
40
41decimal_literal = _{
42  decimal_integer_literal ~ "." ~ ASCII_DIGIT* ~ exponent_part? |
43  "." ~ ASCII_DIGIT+~ exponent_part? |
44  decimal_integer_literal ~ exponent_part?
45}
46
47double_quote_char = _{
48  "\\" ~ escape_sequence |
49  line_continuation |
50  !"\"" ~ char_literal
51}
52
53escape_char = _{ single_escape_char | ASCII_DIGIT | "x" | "u" }
54
55escape_sequence = _{
56  char_escape_sequence |
57  nul_escape_sequence |
58  "x" ~ hex_escape_sequence |
59  "u" ~ unicode_escape_sequence
60}
61
62exponent_part = _{ ^"e" ~ ("+" | "-")? ~ ASCII_DIGIT+ }
63
64hex_escape_sequence = @{ ASCII_HEX_DIGIT{2} }
65
66hex_integer_literal = _{ ^"0x" ~ ASCII_HEX_DIGIT+ }
67
68identifier = ${ identifier_start ~ identifier_part* }
69
70identifier_part = _{
71  identifier_start |
72  &(
73    NONSPACING_MARK |
74    DIACRITIC | // not sure about this, spec says "Combining spacing mark (Mc)"
75    DECIMAL_NUMBER |
76    CONNECTOR_PUNCTUATION |
77    "\u{200C}" |
78    "\u{200D}"
79  ) ~ char_literal
80}
81
82identifier_start = _{
83  &(unicode_letter | "$" | "_") ~ char_literal |
84  "\\u" ~ unicode_escape_sequence
85}
86
87key = _{ identifier | string }
88
89line_continuation = _{ "\\" ~ line_terminator_sequence }
90
91line_terminator = _{ "\u{000A}" | "\u{000D}" | "\u{2028}" | "\u{2029}" }
92
93line_terminator_sequence = _{ "\u{000D}" ~ "\u{000A}" | line_terminator }
94
95non_escape_char = _{ !(escape_char | line_terminator) ~ ANY }
96
97nul_escape_sequence = @{ "0" }
98
99null = @{ "null" }
100
101number = @{ ("+" | "-")? ~ numeric_literal }
102
103numeric_literal = _{
104  hex_integer_literal |
105  decimal_literal |
106  "Infinity" |
107  "NaN"
108}
109
110object = { "{" ~ "}" | "{" ~ pair ~ ("," ~ pair)* ~ ","? ~ "}" }
111
112pair = _{ key ~ ":" ~ value }
113
114single_escape_char = _{ "'" | "\"" | "\\" | "b" | "f" | "n" | "r" | "t" | "v" }
115
116single_quote_char = _{
117  "\\" ~ escape_sequence |
118  line_continuation |
119  !"'" ~ char_literal
120}
121
122string = ${ "\"" ~ double_quote_char* ~ "\"" | "'" ~ single_quote_char* ~ "'" }
123
124text = _{ SOI ~ value ~ EOI }
125
126unicode_escape_sequence = @{ ASCII_HEX_DIGIT{4} }
127
128unicode_letter = _{
129  UPPERCASE_LETTER |
130  LOWERCASE_LETTER |
131  TITLECASE_LETTER |
132  MODIFIER_LETTER |
133  OTHER_LETTER |
134  LETTER_NUMBER
135}
136
137value = _{ null | boolean | string | number | object | array }
138"#]
139struct Parser;
140
141/// Deserialize an instance of type `T` from a string of JSON5 text. Can fail if the input is
142/// invalid JSON5, or doesn’t match the structure of the target type.
143pub fn from_str<'a, T>(s: &'a str) -> Result<T>
144where
145    T: de::Deserialize<'a>,
146{
147    let mut deserializer = Deserializer::from_str(s)?;
148    T::deserialize(&mut deserializer)
149}
150
151/// Deserialize an instance of type `T` from a slice of JSON5 text. Can fail if the input is
152/// invalid JSON5, or doesn&rsquo;t match the structure of the target type.
153pub fn from_slice<'a, T>(s: &'a [u8]) -> Result<T>
154where
155    T: de::Deserialize<'a>,
156{
157    let valid_utf8 = std::str::from_utf8(s)?;
158    let mut deserializer = Deserializer::from_str(valid_utf8)?;
159    T::deserialize(&mut deserializer)
160}
161
162/// Deserialize an instance of type `T` from any implementation of Read.  Can fail if the input is
163/// invalid JSON5, or doesn&rsquo;t match the structure of the target type.
164pub fn from_reader<R, T>(mut reader: R) -> Result<T>
165where
166    T: serde::de::DeserializeOwned,
167    R: Read,
168{
169    let mut data = String::default();
170    reader.read_to_string(&mut data)?;
171    from_str(&data)
172}
173
174/// A Deserializes JSON data into a Rust value.
175pub struct Deserializer<'de> {
176    pair: Option<Pair<'de, Rule>>,
177}
178
179impl<'de> Deserializer<'de> {
180    /// Creates a JSON5 deserializer from a `&str`. This parses the input at construction time, so
181    /// can fail if the input is not valid JSON5.
182    #[allow(clippy::should_implement_trait)]
183    pub fn from_str(input: &'de str) -> Result<Self> {
184        let pair = Parser::parse(Rule::text, input)?.next().unwrap();
185        Ok(Deserializer::from_pair(pair))
186    }
187
188    fn from_pair(pair: Pair<'de, Rule>) -> Self {
189        Deserializer { pair: Some(pair) }
190    }
191}
192
193impl<'de> de::Deserializer<'de> for &mut Deserializer<'de> {
194    type Error = Error;
195
196    fn deserialize_any<V>(self, visitor: V) -> Result<V::Value>
197    where
198        V: de::Visitor<'de>,
199    {
200        let pair = self.pair.take().unwrap();
201        let span = pair.as_span();
202        let mut res = (move || match pair.as_rule() {
203            Rule::null => visitor.visit_unit(),
204            Rule::boolean => visitor.visit_bool(parse_bool(&pair)),
205            Rule::string | Rule::identifier => visitor.visit_string(parse_string(pair)?),
206            Rule::number => {
207                if is_int(pair.as_str()) {
208                    visitor.visit_i64(parse_integer(&pair)?)
209                } else {
210                    visitor.visit_f64(parse_number(&pair)?)
211                }
212            }
213            Rule::array => visitor.visit_seq(Seq::new(pair)),
214            Rule::object => visitor.visit_map(Map::new(pair)),
215            _ => unreachable!(),
216        })();
217        error::set_location(&mut res, &span);
218        res
219    }
220
221    fn deserialize_enum<V>(
222        self,
223        _name: &'static str,
224        _variants: &'static [&'static str],
225        visitor: V,
226    ) -> Result<V::Value>
227    where
228        V: de::Visitor<'de>,
229    {
230        let pair = self.pair.take().unwrap();
231        let span = pair.as_span();
232        let mut res = visitor.visit_enum(Enum { pair });
233        error::set_location(&mut res, &span);
234        res
235    }
236
237    // The below will get us the right types, but won't necessarily give
238    // meaningful results if the source is out of the range of the target type.
239    fn deserialize_i8<V>(self, visitor: V) -> Result<V::Value>
240    where
241        V: de::Visitor<'de>,
242    {
243        let pair = self.pair.take().unwrap();
244        let span = pair.as_span();
245        let mut res = (move || visitor.visit_i8(parse_number(&pair)? as i8))();
246        error::set_location(&mut res, &span);
247        res
248    }
249
250    fn deserialize_i16<V>(self, visitor: V) -> Result<V::Value>
251    where
252        V: de::Visitor<'de>,
253    {
254        let pair = self.pair.take().unwrap();
255        let span = pair.as_span();
256        let mut res = (move || visitor.visit_i16(parse_number(&pair)? as i16))();
257        error::set_location(&mut res, &span);
258        res
259    }
260
261    fn deserialize_i32<V>(self, visitor: V) -> Result<V::Value>
262    where
263        V: de::Visitor<'de>,
264    {
265        let pair = self.pair.take().unwrap();
266        let span = pair.as_span();
267        let mut res = (move || visitor.visit_i32(parse_number(&pair)? as i32))();
268        error::set_location(&mut res, &span);
269        res
270    }
271
272    fn deserialize_i64<V>(self, visitor: V) -> Result<V::Value>
273    where
274        V: de::Visitor<'de>,
275    {
276        let pair = self.pair.take().unwrap();
277        let span = pair.as_span();
278        let mut res = (move || visitor.visit_i64(parse_number(&pair)? as i64))();
279        error::set_location(&mut res, &span);
280        res
281    }
282
283    fn deserialize_i128<V>(self, visitor: V) -> Result<V::Value>
284    where
285        V: de::Visitor<'de>,
286    {
287        let pair = self.pair.take().unwrap();
288        let span = pair.as_span();
289        let mut res = (move || visitor.visit_i128(parse_number(&pair)? as i128))();
290        error::set_location(&mut res, &span);
291        res
292    }
293
294    fn deserialize_u8<V>(self, visitor: V) -> Result<V::Value>
295    where
296        V: de::Visitor<'de>,
297    {
298        let pair = self.pair.take().unwrap();
299        let span = pair.as_span();
300        let mut res = (move || visitor.visit_u8(parse_number(&pair)? as u8))();
301        error::set_location(&mut res, &span);
302        res
303    }
304
305    fn deserialize_u16<V>(self, visitor: V) -> Result<V::Value>
306    where
307        V: de::Visitor<'de>,
308    {
309        let pair = self.pair.take().unwrap();
310        let span = pair.as_span();
311        let mut res = (move || visitor.visit_u16(parse_number(&pair)? as u16))();
312        error::set_location(&mut res, &span);
313        res
314    }
315
316    fn deserialize_u32<V>(self, visitor: V) -> Result<V::Value>
317    where
318        V: de::Visitor<'de>,
319    {
320        let pair = self.pair.take().unwrap();
321        let span = pair.as_span();
322        let mut res = (move || visitor.visit_u32(parse_number(&pair)? as u32))();
323        error::set_location(&mut res, &span);
324        res
325    }
326
327    fn deserialize_u64<V>(self, visitor: V) -> Result<V::Value>
328    where
329        V: de::Visitor<'de>,
330    {
331        let pair = self.pair.take().unwrap();
332        let span = pair.as_span();
333        let mut res = (move || visitor.visit_u64(parse_number(&pair)? as u64))();
334        error::set_location(&mut res, &span);
335        res
336    }
337
338    fn deserialize_u128<V>(self, visitor: V) -> Result<V::Value>
339    where
340        V: de::Visitor<'de>,
341    {
342        let pair = self.pair.take().unwrap();
343        let span = pair.as_span();
344        let mut res = (move || visitor.visit_u128(parse_number(&pair)? as u128))();
345        error::set_location(&mut res, &span);
346        res
347    }
348
349    fn deserialize_f32<V>(self, visitor: V) -> Result<V::Value>
350    where
351        V: de::Visitor<'de>,
352    {
353        let pair = self.pair.take().unwrap();
354        let span = pair.as_span();
355        let mut res = (move || visitor.visit_f32(parse_number(&pair)? as f32))();
356        error::set_location(&mut res, &span);
357        res
358    }
359
360    fn deserialize_f64<V>(self, visitor: V) -> Result<V::Value>
361    where
362        V: de::Visitor<'de>,
363    {
364        let pair = self.pair.take().unwrap();
365        let span = pair.as_span();
366        let mut res = (move || visitor.visit_f64(parse_number(&pair)?))();
367        error::set_location(&mut res, &span);
368        res
369    }
370
371    fn deserialize_option<V>(self, visitor: V) -> Result<V::Value>
372    where
373        V: de::Visitor<'de>,
374    {
375        let pair = self.pair.take().unwrap();
376        let span = pair.as_span();
377        let mut res = match pair.as_rule() {
378            Rule::null => visitor.visit_none(),
379            _ => visitor.visit_some(&mut Deserializer::from_pair(pair)),
380        };
381        error::set_location(&mut res, &span);
382        res
383    }
384
385    fn deserialize_newtype_struct<V>(self, _name: &str, visitor: V) -> Result<V::Value>
386    where
387        V: de::Visitor<'de>,
388    {
389        let span = self.pair.as_ref().unwrap().as_span();
390        let mut res = visitor.visit_newtype_struct(self);
391        error::set_location(&mut res, &span);
392        res
393    }
394
395    forward_to_deserialize_any! {
396        bool char str string bytes byte_buf unit unit_struct seq
397        tuple tuple_struct map struct identifier ignored_any
398    }
399}
400
401fn parse_bool(pair: &Pair<'_, Rule>) -> bool {
402    match pair.as_str() {
403        "true" => true,
404        "false" => false,
405        _ => unreachable!(),
406    }
407}
408
409fn parse_string(pair: Pair<'_, Rule>) -> Result<String> {
410    let span = pair.as_span();
411    let mut res = pair
412        .into_inner()
413        .map(|component| match component.as_rule() {
414            Rule::char_literal => Ok(String::from(component.as_str())),
415            Rule::char_escape_sequence => Ok(parse_char_escape_sequence(&component)),
416            Rule::nul_escape_sequence => Ok(String::from("\u{0000}")),
417            Rule::hex_escape_sequence | Rule::unicode_escape_sequence => {
418                let hex_escape = parse_hex(component.as_str())?;
419                match char::from_u32(hex_escape) {
420                    Some(s) => Ok(s.to_string()),
421                    None => Err(de::Error::custom("error parsing hex prefix")),
422                }
423            }
424            _ => unreachable!(),
425        })
426        .collect();
427    error::set_location(&mut res, &span);
428    res
429}
430
431fn parse_char_escape_sequence(pair: &Pair<'_, Rule>) -> String {
432    String::from(match pair.as_str() {
433        "b" => "\u{0008}",
434        "f" => "\u{000C}",
435        "n" => "\n",
436        "r" => "\r",
437        "t" => "\t",
438        "v" => "\u{000B}",
439        c => c,
440    })
441}
442
443fn parse_number(pair: &Pair<'_, Rule>) -> Result<f64> {
444    match pair.as_str() {
445        "Infinity" => Ok(f64::INFINITY),
446        "-Infinity" => Ok(f64::NEG_INFINITY),
447        "NaN" | "-NaN" => Ok(f64::NAN),
448        s if is_hex_literal(s) => parse_hex(&s[2..]).map(f64::from),
449        s => {
450            if let Ok(r) = s.parse::<f64>() {
451                if r.is_finite() {
452                    Ok(r)
453                } else {
454                    Err(de::Error::custom("error parsing number: too large"))
455                }
456            } else {
457                Err(de::Error::custom("error parsing number"))
458            }
459        }
460    }
461}
462
463fn parse_integer(pair: &Pair<'_, Rule>) -> Result<i64> {
464    match pair.as_str() {
465        s if is_hex_literal(s) => Ok(parse_hex(&s[2..])? as i64),
466        s => s
467            .parse()
468            .map_err(|_| de::Error::custom("error parsing integer")),
469    }
470}
471
472fn is_int(s: &str) -> bool {
473    !s.contains('.')
474        && (is_hex_literal(s)
475            || (!s.contains('e')
476                && !s.contains('E')
477                && !s.contains("Infinity")
478                && !s.contains("NaN")))
479}
480
481fn parse_hex(s: &str) -> Result<u32> {
482    u32::from_str_radix(s, 16).map_err(|_| de::Error::custom("error parsing hex"))
483}
484
485fn is_hex_literal(s: &str) -> bool {
486    s.len() > 2 && (&s[..2] == "0x" || &s[..2] == "0X")
487}
488
489struct Seq<'de> {
490    pairs: VecDeque<Pair<'de, Rule>>,
491}
492
493impl<'de> Seq<'de> {
494    pub fn new(pair: Pair<'de, Rule>) -> Self {
495        Self {
496            pairs: pair.into_inner().collect(),
497        }
498    }
499}
500
501impl<'de> de::SeqAccess<'de> for Seq<'de> {
502    type Error = Error;
503
504    fn size_hint(&self) -> Option<usize> {
505        Some(self.pairs.len())
506    }
507
508    fn next_element_seed<T>(&mut self, seed: T) -> Result<Option<T::Value>>
509    where
510        T: de::DeserializeSeed<'de>,
511    {
512        if let Some(pair) = self.pairs.pop_front() {
513            seed.deserialize(&mut Deserializer::from_pair(pair))
514                .map(Some)
515        } else {
516            Ok(None)
517        }
518    }
519}
520
521struct Map<'de> {
522    pairs: VecDeque<Pair<'de, Rule>>,
523}
524
525impl<'de> Map<'de> {
526    pub fn new(pair: Pair<'de, Rule>) -> Self {
527        Self {
528            pairs: pair.into_inner().collect(),
529        }
530    }
531}
532
533impl<'de> de::MapAccess<'de> for Map<'de> {
534    type Error = Error;
535
536    fn size_hint(&self) -> Option<usize> {
537        Some(self.pairs.len() / 2)
538    }
539
540    fn next_key_seed<K>(&mut self, seed: K) -> Result<Option<K::Value>>
541    where
542        K: de::DeserializeSeed<'de>,
543    {
544        if let Some(pair) = self.pairs.pop_front() {
545            seed.deserialize(&mut Deserializer::from_pair(pair))
546                .map(Some)
547        } else {
548            Ok(None)
549        }
550    }
551
552    fn next_value_seed<V>(&mut self, seed: V) -> Result<V::Value>
553    where
554        V: de::DeserializeSeed<'de>,
555    {
556        seed.deserialize(&mut Deserializer::from_pair(
557            self.pairs.pop_front().unwrap(),
558        ))
559    }
560}
561
562struct Enum<'de> {
563    pair: Pair<'de, Rule>,
564}
565
566impl<'de> de::EnumAccess<'de> for Enum<'de> {
567    type Error = Error;
568    type Variant = Variant<'de>;
569
570    fn variant_seed<V>(self, seed: V) -> Result<(V::Value, Self::Variant)>
571    where
572        V: de::DeserializeSeed<'de>,
573    {
574        let span = self.pair.as_span();
575        let mut res = (move || match self.pair.as_rule() {
576            Rule::string => {
577                let tag = seed.deserialize(&mut Deserializer::from_pair(self.pair))?;
578                Ok((tag, Variant { pair: None }))
579            }
580            Rule::object => {
581                let mut pairs = self.pair.into_inner();
582
583                if let Some(tag_pair) = pairs.next() {
584                    let tag = seed.deserialize(&mut Deserializer::from_pair(tag_pair))?;
585                    Ok((tag, Variant { pair: pairs.next() }))
586                } else {
587                    Err(de::Error::custom("expected a nonempty object"))
588                }
589            }
590            _ => Err(de::Error::custom("expected a string or an object")),
591        })();
592        error::set_location(&mut res, &span);
593        res
594    }
595}
596
597struct Variant<'de> {
598    pair: Option<Pair<'de, Rule>>,
599}
600
601impl<'de> de::VariantAccess<'de> for Variant<'de> {
602    type Error = Error;
603
604    fn unit_variant(self) -> Result<()> {
605        if let Some(pair) = self.pair {
606            serde::Deserialize::deserialize(&mut Deserializer::from_pair(pair))
607        } else {
608            Ok(())
609        }
610    }
611
612    fn newtype_variant_seed<T>(self, seed: T) -> Result<T::Value>
613    where
614        T: de::DeserializeSeed<'de>,
615    {
616        seed.deserialize(&mut Deserializer::from_pair(self.pair.unwrap()))
617    }
618
619    fn tuple_variant<V>(self, _len: usize, visitor: V) -> Result<V::Value>
620    where
621        V: de::Visitor<'de>,
622    {
623        match self.pair {
624            Some(pair) => match pair.as_rule() {
625                Rule::array => visitor.visit_seq(Seq::new(pair)),
626                _ => Err(de::Error::custom("expected an array")),
627            },
628            None => Err(de::Error::custom("expected an array")),
629        }
630    }
631
632    fn struct_variant<V>(self, _fields: &'static [&'static str], visitor: V) -> Result<V::Value>
633    where
634        V: de::Visitor<'de>,
635    {
636        match self.pair {
637            Some(pair) => match pair.as_rule() {
638                Rule::object => visitor.visit_map(Map::new(pair)),
639                _ => Err(de::Error::custom("expected an object")),
640            },
641            None => Err(de::Error::custom("expected an object")),
642        }
643    }
644}