json5format/
lib.rs

1// Copyright (c) 2020 Google LLC All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5//! A stylized formatter for [JSON5](https://json5.org) ("JSON for Humans") documents.
6//!
7//! The intent of this formatter is to rewrite a given valid JSON5 document, restructuring the
8//! output (if required) to conform to a consistent style.
9//!
10//! The resulting document should preserve all data precision, data format representations, and
11//! semantic intent. Readability should be maintained, if not improved by the consistency within and
12//! across documents.
13//!
14//! Most importantly, all JSON5 comments should be preserved, maintaining the
15//! positional relationship with the JSON5 data elements they were intended to document.
16//!
17//! # Example
18//!
19//! ```rust
20//!   use json5format::*;
21//!   use maplit::hashmap;
22//!   use maplit::hashset;
23//!
24//!   let json5=r##"{
25//!       "name": {
26//!           "last": "Smith",
27//!           "first": "John",
28//!           "middle": "Jacob"
29//!       },
30//!       "children": [
31//!           "Buffy",
32//!           "Biff",
33//!           "Balto"
34//!       ],
35//!       // Consider adding a note field to the `other` contact option
36//!       "contact_options": [
37//!           {
38//!               "home": {
39//!                   "email": "jj@notreallygmail.com",   // This was the original user id.
40//!                                                       // Now user id's are hash values.
41//!                   "phone": "212-555-4321"
42//!               },
43//!               "other": {
44//!                   "email": "volunteering@serviceprojectsrus.org"
45//!               },
46//!               "work": {
47//!                   "phone": "212-555-1234",
48//!                   "email": "john.j.smith@worksforme.gov"
49//!               }
50//!           }
51//!       ],
52//!       "address": {
53//!           "city": "Anytown",
54//!           "country": "USA",
55//!           "state": "New York",
56//!           "street": "101 Main Street"
57//!           /* Update schema to support multiple addresses:
58//!              "work": {
59//!                  "city": "Anytown",
60//!                  "country": "USA",
61//!                  "state": "New York",
62//!                  "street": "101 Main Street"
63//!              }
64//!           */
65//!       }
66//!   }
67//!   "##;
68//!
69//!   let options = FormatOptions {
70//!       indent_by: 2,
71//!       collapse_containers_of_one: true,
72//!       options_by_path: hashmap! {
73//!           "/*" => hashset! {
74//!               PathOption::PropertyNameOrder(vec![
75//!                   "name",
76//!                   "address",
77//!                   "contact_options",
78//!               ]),
79//!           },
80//!           "/*/name" => hashset! {
81//!               PathOption::PropertyNameOrder(vec![
82//!                   "first",
83//!                   "middle",
84//!                   "last",
85//!                   "suffix",
86//!               ]),
87//!           },
88//!           "/*/children" => hashset! {
89//!               PathOption::SortArrayItems(true),
90//!           },
91//!           "/*/*/*" => hashset! {
92//!               PathOption::PropertyNameOrder(vec![
93//!                   "work",
94//!                   "home",
95//!                   "other",
96//!               ]),
97//!           },
98//!           "/*/*/*/*" => hashset! {
99//!               PathOption::PropertyNameOrder(vec![
100//!                   "phone",
101//!                   "email",
102//!               ]),
103//!           },
104//!       },
105//!       ..Default::default()
106//!   };
107//!
108//!   let filename = "new_contact.json5".to_string();
109//!
110//!   let format = Json5Format::with_options(options)?;
111//!   let parsed_document = ParsedDocument::from_str(&json5, Some(filename))?;
112//!   let bytes: Vec<u8> = format.to_utf8(&parsed_document)?;
113//!
114//!   assert_eq!(std::str::from_utf8(&bytes)?, r##"{
115//!   name: {
116//!     first: "John",
117//!     middle: "Jacob",
118//!     last: "Smith",
119//!   },
120//!   address: {
121//!     city: "Anytown",
122//!     country: "USA",
123//!     state: "New York",
124//!     street: "101 Main Street",
125//!
126//!     /* Update schema to support multiple addresses:
127//!        "work": {
128//!            "city": "Anytown",
129//!            "country": "USA",
130//!            "state": "New York",
131//!            "street": "101 Main Street"
132//!        }
133//!     */
134//!   },
135//!
136//!   // Consider adding a note field to the `other` contact option
137//!   contact_options: [
138//!     {
139//!       work: {
140//!         phone: "212-555-1234",
141//!         email: "john.j.smith@worksforme.gov",
142//!       },
143//!       home: {
144//!         phone: "212-555-4321",
145//!         email: "jj@notreallygmail.com", // This was the original user id.
146//!                                         // Now user id's are hash values.
147//!       },
148//!       other: { email: "volunteering@serviceprojectsrus.org" },
149//!     },
150//!   ],
151//!   children: [
152//!     "Balto",
153//!     "Biff",
154//!     "Buffy",
155//!   ],
156//! }
157//! "##);
158//! # Ok::<(),anyhow::Error>(())
159//! ```
160//!
161//! # Formatter Actions
162//!
163//! When the options above are applied to the input, the formatter will make the following changes:
164//!
165//!   * The formatted document will be indented by 2 spaces.
166//!   * Quotes are removed from all property names (since they are all legal ECMAScript identifiers)
167//!   * The top-level properties will be reordered to [`name`, `address`, `contact_options`]. Since
168//!     property name `children` was not included in the sort order, it will be placed at the end.
169//!   * The `name` properties will be reordered to [`first`, `middle`, `last`].
170//!   * The properties of the unnamed object in array `contact_options` will be reordered to
171//!     [`work`, `home`, `other`].
172//!   * The properties of the `work`, `home`, and `other` objects will be reordered to
173//!     [`phone`, `email`].
174//!   * The `children` names array of string primitives will be sorted.
175//!   * All elements (except the top-level object, represented by the outermost curly braces) will
176//!     end with a comma.
177//!   * Since the `contact_options` descendant element `other` has only one property, the `other`
178//!     object structure will collapse to a single line, with internal trailing comma suppressed.
179//!   * The line comment will retain its relative position, above `contact_options`.
180//!   * The block comment will retain its relative position, inside and at the end of the `address`
181//!     object.
182//!   * The end-of-line comment after `home`/`email` will retain its relative location (appended at
183//!     the end of the `email` value) and any subsequent line comments with the same vertical
184//!     alignment are also retained, and vertically adjusted to be left-aligned with the new
185//!     position of the first comment line.
186//!
187//! # Formatter Behavior Details
188//!
189//! For reference, the following sections detail how the JSON5 formatter verifies and processes
190//! JSON5 content.
191//!
192//! ## Syntax Validation
193//!
194//! * Structural syntax is checked, such as validating matching braces, property name-colon-value
195//!   syntax, enforced separation of values by commas, properly quoted strings, and both block and
196//!   line comment extraction.
197//! * Non-string literal value syntax is checked (null, true, false, and the various legal formats
198//!   for JSON5 Numbers).
199//! * Syntax errors produce error messages with the line and column where the problem
200//!   was encountered.
201//!
202//! ## Property Names
203//!
204//! * Duplicate property names are retained, but may constitute errors in higher-level JSON5
205//!   parsers or schema-specific deserializers.
206//! * All JSON5 unquoted property name characters are supported, including '$' and '_'. Digits are
207//!   the only valid property name character that cannot be the first character. Property names
208//!   can also be represented as quoted strings. All valid JSON5 strings, if quoted, are valid
209//!   property names (including multi-line strings and quoted numbers).
210//!
211//! Example:
212//! ```json
213//!     $_meta_prop: 'Has "double quotes" and \'single quotes\' and \
214//! multiple lines with escaped \\ backslash',
215//! ```
216//!
217//! ## Literal Values
218//!
219//! * JSON5 supports quoting strings (literal values or quoted property names) by either double (")
220//!   or single (') quote. The formatter does not change the quotes. Double-quoting is
221//!   conventional, but single quotes may be used when quoting strings containing double-quotes, and
222//!   leaving the single quotes as-is is preferred.
223//! * JSON5 literal values are retained as-is. Strings retain all spacing characters, including
224//!   escaped newlines. All other literals (unquoted tokens without spaces, such as false, null,
225//!   0.234, 1337, or l33t) are _not_ interpreted syntactically. Other schema-based tools and JSON5
226//!   deserializers may flag these invalid values.
227//!
228//! ## Optional Sorting
229//!
230//! * By default, array items and object properties retain their original order. (Some JSON arrays
231//!   are order-dependent, and sorting them indiscriminantly might change the meaning of the data.)
232//! * The formatter can automatically sort array items and object properties if enabled via
233//!   `FormatOptions`:
234//!   - To sort all arrays in the document, set
235//!     [FormatOptions.sort_array_items](struct.FormatOptions.html#structfield.sort_array_items) to
236//!     `true`
237//!   - To sort only specific arrays in the target schema, specify the schema location under
238//!     [FormatOptions.options_by_path](struct.FormatOptions.html#structfield.options_by_path), and
239//!     set its [SortArrayItems](enum.PathOption.html#variant.SortArrayItems) option.
240//!   - Properties are sorted based on an explicit user-supplied list of property names in the
241//!     preferred order, for objects at a specified path. Specify the object's location in the
242//!     target schema using
243//!     [FormatOptions.options_by_path](struct.FormatOptions.html#structfield.options_by_path), and
244//!     provide a vector of property name strings with the
245//!     [PropertyNameOrder](enum.PathOption.html#variant.PropertyNameOrder) option. Properties not
246//!     included in this option retain their original order, behind the explicitly ordered
247//!     properties, if any.
248//! * When sorting array items, the formatter only sorts array item literal values (strings,
249//!   numbers, bools, and null). Child arrays or objects are left in their original order, after
250//!   sorted literals, if any, within the same array.
251//! * Array items are sorted in case-insensitive unicode lexicographic order. **(Note that, since
252//!   the formatter does not parse unquoted literals, number types cannot be sorted numerically.)**
253//!   Items that are case-insensitively equal are re-compared and ordered case-sensitively with
254//!   respect to each other.
255//!
256//! ## Associated Comments
257//!
258//! * All comments immediately preceding an element (value or start of an array or object), and
259//!   trailing line comments (starting on the same line as the element, optionally continued on
260//!   successive lines if all line comments are left-aligned), are retained and move with the
261//!   associated item if the item is repositioned during sorting.
262//! * All line and block comments are retained. Typically, the comments are re-aligned vertically
263//!   (indented) with the values with which they were associated.
264//! * A single line comment appearing immediately after a JSON value (primitive or closing brace),
265//!   on the same line, will remain appended to that value on its line after re-formatting.
266//! * Spaces separate block comments from blocks of contiguous line comments associated with the
267//!   same entry.
268//! * Comments at the end of a list (after the last property or item) are retained at the end of
269//!   the same list.
270//! * Block comments with lines that extend to the left of the opening "/\*" are not re-aligned.
271//!
272//! ## Whitespace Handling
273//!
274//! * Unicode characters are allowed, and unicode space characters should retain their meaning
275//!   according to unicode standards.
276//! * All spaces inside single- or multi-line strings are retained. All spaces in comments are
277//!   retained *except* trailing spaces at the end of a line.
278//! * All other original spaces are removed.
279
280#![deny(missing_docs)]
281
282#[macro_use]
283mod error;
284
285mod content;
286mod formatter;
287mod options;
288mod parser;
289
290use {
291    crate::formatter::*, std::cell::RefCell, std::collections::HashMap, std::collections::HashSet,
292    std::rc::Rc,
293};
294
295pub use content::Array;
296pub use content::Comment;
297pub use content::Comments;
298pub use content::Object;
299pub use content::ParsedDocument;
300pub use content::Primitive;
301pub use content::Property;
302pub use content::Value;
303pub use error::Error;
304pub use error::Location;
305pub use options::FormatOptions;
306pub use options::PathOption;
307
308/// Format a JSON5 document, applying a consistent style, with given options.
309///
310/// See [FormatOptions](struct.FormatOptions.html) for style options, and confirm the defaults by
311/// reviewing the source of truth via the `src` link for
312/// [impl Default for FormatOptions](struct.FormatOptions.html#impl-Default).
313///
314/// # Format and Style (Default)
315///
316/// Unless FormatOptions are modified, the JSON5 formatter takes a JSON5 document (as a unicode
317/// String) and generates a new document with the following formatting:
318///
319/// * Indents 4 spaces.
320/// * Quotes are removed from property names if they are legal ECMAScript 5.1 identifiers. Property
321///   names that do not comply with ECMAScript identifier format requirements will retain their
322///   existing (single or double) quotes.
323/// * All property and item lists end with a trailing comma.
324/// * All property and item lists are broken down; that is, the braces are on separate lines and
325///   all values are indented.
326///
327/// ```json
328/// {
329///     key: "value",
330///     array: [
331///         3.145,
332///     ]
333/// }
334/// ```
335///
336/// # Arguments
337///   * buffer - A unicode string containing the original JSON5 document.
338///   * filename - An optional filename. Parsing errors typically include the filename (if given),
339///     and the line number and character column where the error was detected.
340///   * options - Format style options to override the default style, if provided.
341/// # Returns
342///   * The formatted result in UTF-8 encoded bytes.
343pub fn format(
344    buffer: &str,
345    filename: Option<String>,
346    options: Option<FormatOptions>,
347) -> Result<Vec<u8>, Error> {
348    let parsed_document = ParsedDocument::from_str(buffer, filename)?;
349    let options = match options {
350        Some(options) => options,
351        None => FormatOptions { ..Default::default() },
352    };
353    Json5Format::with_options(options)?.to_utf8(&parsed_document)
354}
355
356/// A JSON5 formatter that parses a valid JSON5 input buffer and produces a new, formatted document.
357pub struct Json5Format {
358    /// Options that alter how the formatter generates the formatted output. This instance of
359    /// FormatOptions is a subset of the FormatOptions passed to the `with_options` constructor.
360    /// The `options_by_path` are first removed, and then used to initialize the SubpathOptions
361    /// hierarchy rooted at the `document_root_options_ref`.
362    default_options: FormatOptions,
363
364    /// Depth-specific options applied at the document root and below.
365    document_root_options_ref: Rc<RefCell<SubpathOptions>>,
366}
367
368impl Json5Format {
369    /// Create and return a Json5Format, with the given options to be applied to the
370    /// [Json5Format::to_utf8()](struct.Json5Format.html#method.to_utf8) operation.
371    pub fn with_options(mut options: FormatOptions) -> Result<Self, Error> {
372        let mut document_root_options = SubpathOptions::new(&options);
373
374        // Typical JSON5 documents start and end with curly braces for a top-level unnamed
375        // object. This is by convention, and the Json5Format represents this
376        // top-level object as a single child in a conceptual array. The array square braces
377        // are not rendered, and by convention, the child object should not have a trailing
378        // comma, even if trailing commas are the default everywhere else in the document.
379        //
380        // Set the SubpathOptions for the document array items to prevent trailing commas.
381        document_root_options.options.trailing_commas = false;
382
383        let mut options_by_path =
384            options.options_by_path.drain().collect::<HashMap<&'static str, HashSet<PathOption>>>();
385
386        // Default options remain after draining the `options_by_path`
387        let default_options = options;
388
389        // Transfer the options_by_path from the given options into the SubpathOptions tree
390        // rooted at `document_options_root`.
391        for (path, path_options) in options_by_path.drain() {
392            let rc; // extend life of temporary
393            let mut borrowed; // extend life of temporary
394            let subpath_options = if path == "/" {
395                &mut document_root_options
396            } else if path.starts_with("/") {
397                rc = document_root_options.get_or_create_subpath_options(
398                    &path[1..].split('/').collect::<Vec<_>>(),
399                    &default_options,
400                );
401                borrowed = rc.borrow_mut();
402                &mut *borrowed
403            } else {
404                return Err(Error::configuration(format!(
405                    "PathOption path '{}' is invalid.",
406                    path
407                )));
408            };
409            subpath_options.override_default_options(&path_options);
410        }
411
412        Ok(Json5Format {
413            default_options,
414            document_root_options_ref: Rc::new(RefCell::new(document_root_options)),
415        })
416    }
417
418    /// Create and return a Json5Format, with the default settings.
419    pub fn new() -> Result<Self, Error> {
420        Self::with_options(FormatOptions { ..Default::default() })
421    }
422
423    /// Formats the parsed document into a new Vector of UTF8 bytes.
424    ///
425    /// # Arguments
426    ///   * `parsed_document` - The parsed state of the incoming document.
427    ///
428    /// # Example
429    ///
430    /// ```
431    /// # use json5format::*;
432    /// # let buffer = String::from("{}");
433    /// # let filename = String::from("example.json5");
434    /// let format = Json5Format::new()?;
435    /// let parsed_document = ParsedDocument::from_str(&buffer, Some(filename))?;
436    /// let bytes = format.to_utf8(&parsed_document)?;
437    /// # assert_eq!("{}\n", std::str::from_utf8(&bytes).unwrap());
438    /// # Ok::<(),anyhow::Error>(())
439    /// ```
440    pub fn to_utf8(&self, parsed_document: &ParsedDocument) -> Result<Vec<u8>, Error> {
441        let formatter =
442            Formatter::new(self.default_options.clone(), self.document_root_options_ref.clone());
443        formatter.format(parsed_document)
444    }
445
446    /// Formats the parsed document into a new String.
447    ///
448    /// # Arguments
449    ///   * `parsed_document` - The parsed state of the incoming document.
450    ///
451    /// # Example
452    ///
453    /// ```
454    /// # use json5format::*;
455    /// # fn main() -> std::result::Result<(), Error> {
456    /// # let buffer = String::from("{}");
457    /// # let filename = String::from("example.json5");
458    /// let format = Json5Format::new()?;
459    /// let parsed_document = ParsedDocument::from_str(&buffer, Some(filename))?;
460    /// let formatted = format.to_string(&parsed_document)?;
461    /// # assert_eq!("{}\n", formatted);
462    /// # Ok(())
463    /// # }
464    /// ```
465    pub fn to_string(&self, parsed_document: &ParsedDocument) -> Result<String, Error> {
466        String::from_utf8(self.to_utf8(parsed_document)?)
467            .map_err(|e| Error::internal(None, e.to_string()))
468    }
469}