json5format/lib.rs
1// Copyright (c) 2020 Google LLC All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5//! A stylized formatter for [JSON5](https://json5.org) ("JSON for Humans") documents.
6//!
7//! The intent of this formatter is to rewrite a given valid JSON5 document, restructuring the
8//! output (if required) to conform to a consistent style.
9//!
10//! The resulting document should preserve all data precision, data format representations, and
11//! semantic intent. Readability should be maintained, if not improved by the consistency within and
12//! across documents.
13//!
14//! Most importantly, all JSON5 comments should be preserved, maintaining the
15//! positional relationship with the JSON5 data elements they were intended to document.
16//!
17//! # Example
18//!
19//! ```rust
20//! use json5format::*;
21//! use maplit::hashmap;
22//! use maplit::hashset;
23//!
24//! let json5=r##"{
25//! "name": {
26//! "last": "Smith",
27//! "first": "John",
28//! "middle": "Jacob"
29//! },
30//! "children": [
31//! "Buffy",
32//! "Biff",
33//! "Balto"
34//! ],
35//! // Consider adding a note field to the `other` contact option
36//! "contact_options": [
37//! {
38//! "home": {
39//! "email": "jj@notreallygmail.com", // This was the original user id.
40//! // Now user id's are hash values.
41//! "phone": "212-555-4321"
42//! },
43//! "other": {
44//! "email": "volunteering@serviceprojectsrus.org"
45//! },
46//! "work": {
47//! "phone": "212-555-1234",
48//! "email": "john.j.smith@worksforme.gov"
49//! }
50//! }
51//! ],
52//! "address": {
53//! "city": "Anytown",
54//! "country": "USA",
55//! "state": "New York",
56//! "street": "101 Main Street"
57//! /* Update schema to support multiple addresses:
58//! "work": {
59//! "city": "Anytown",
60//! "country": "USA",
61//! "state": "New York",
62//! "street": "101 Main Street"
63//! }
64//! */
65//! }
66//! }
67//! "##;
68//!
69//! let options = FormatOptions {
70//! indent_by: 2,
71//! collapse_containers_of_one: true,
72//! options_by_path: hashmap! {
73//! "/*" => hashset! {
74//! PathOption::PropertyNameOrder(vec![
75//! "name",
76//! "address",
77//! "contact_options",
78//! ]),
79//! },
80//! "/*/name" => hashset! {
81//! PathOption::PropertyNameOrder(vec![
82//! "first",
83//! "middle",
84//! "last",
85//! "suffix",
86//! ]),
87//! },
88//! "/*/children" => hashset! {
89//! PathOption::SortArrayItems(true),
90//! },
91//! "/*/*/*" => hashset! {
92//! PathOption::PropertyNameOrder(vec![
93//! "work",
94//! "home",
95//! "other",
96//! ]),
97//! },
98//! "/*/*/*/*" => hashset! {
99//! PathOption::PropertyNameOrder(vec![
100//! "phone",
101//! "email",
102//! ]),
103//! },
104//! },
105//! ..Default::default()
106//! };
107//!
108//! let filename = "new_contact.json5".to_string();
109//!
110//! let format = Json5Format::with_options(options)?;
111//! let parsed_document = ParsedDocument::from_str(&json5, Some(filename))?;
112//! let bytes: Vec<u8> = format.to_utf8(&parsed_document)?;
113//!
114//! assert_eq!(std::str::from_utf8(&bytes)?, r##"{
115//! name: {
116//! first: "John",
117//! middle: "Jacob",
118//! last: "Smith",
119//! },
120//! address: {
121//! city: "Anytown",
122//! country: "USA",
123//! state: "New York",
124//! street: "101 Main Street",
125//!
126//! /* Update schema to support multiple addresses:
127//! "work": {
128//! "city": "Anytown",
129//! "country": "USA",
130//! "state": "New York",
131//! "street": "101 Main Street"
132//! }
133//! */
134//! },
135//!
136//! // Consider adding a note field to the `other` contact option
137//! contact_options: [
138//! {
139//! work: {
140//! phone: "212-555-1234",
141//! email: "john.j.smith@worksforme.gov",
142//! },
143//! home: {
144//! phone: "212-555-4321",
145//! email: "jj@notreallygmail.com", // This was the original user id.
146//! // Now user id's are hash values.
147//! },
148//! other: { email: "volunteering@serviceprojectsrus.org" },
149//! },
150//! ],
151//! children: [
152//! "Balto",
153//! "Biff",
154//! "Buffy",
155//! ],
156//! }
157//! "##);
158//! # Ok::<(),anyhow::Error>(())
159//! ```
160//!
161//! # Formatter Actions
162//!
163//! When the options above are applied to the input, the formatter will make the following changes:
164//!
165//! * The formatted document will be indented by 2 spaces.
166//! * Quotes are removed from all property names (since they are all legal ECMAScript identifiers)
167//! * The top-level properties will be reordered to [`name`, `address`, `contact_options`]. Since
168//! property name `children` was not included in the sort order, it will be placed at the end.
169//! * The `name` properties will be reordered to [`first`, `middle`, `last`].
170//! * The properties of the unnamed object in array `contact_options` will be reordered to
171//! [`work`, `home`, `other`].
172//! * The properties of the `work`, `home`, and `other` objects will be reordered to
173//! [`phone`, `email`].
174//! * The `children` names array of string primitives will be sorted.
175//! * All elements (except the top-level object, represented by the outermost curly braces) will
176//! end with a comma.
177//! * Since the `contact_options` descendant element `other` has only one property, the `other`
178//! object structure will collapse to a single line, with internal trailing comma suppressed.
179//! * The line comment will retain its relative position, above `contact_options`.
180//! * The block comment will retain its relative position, inside and at the end of the `address`
181//! object.
182//! * The end-of-line comment after `home`/`email` will retain its relative location (appended at
183//! the end of the `email` value) and any subsequent line comments with the same vertical
184//! alignment are also retained, and vertically adjusted to be left-aligned with the new
185//! position of the first comment line.
186//!
187//! # Formatter Behavior Details
188//!
189//! For reference, the following sections detail how the JSON5 formatter verifies and processes
190//! JSON5 content.
191//!
192//! ## Syntax Validation
193//!
194//! * Structural syntax is checked, such as validating matching braces, property name-colon-value
195//! syntax, enforced separation of values by commas, properly quoted strings, and both block and
196//! line comment extraction.
197//! * Non-string literal value syntax is checked (null, true, false, and the various legal formats
198//! for JSON5 Numbers).
199//! * Syntax errors produce error messages with the line and column where the problem
200//! was encountered.
201//!
202//! ## Property Names
203//!
204//! * Duplicate property names are retained, but may constitute errors in higher-level JSON5
205//! parsers or schema-specific deserializers.
206//! * All JSON5 unquoted property name characters are supported, including '$' and '_'. Digits are
207//! the only valid property name character that cannot be the first character. Property names
208//! can also be represented as quoted strings. All valid JSON5 strings, if quoted, are valid
209//! property names (including multi-line strings and quoted numbers).
210//!
211//! Example:
212//! ```json
213//! $_meta_prop: 'Has "double quotes" and \'single quotes\' and \
214//! multiple lines with escaped \\ backslash',
215//! ```
216//!
217//! ## Literal Values
218//!
219//! * JSON5 supports quoting strings (literal values or quoted property names) by either double (")
220//! or single (') quote. The formatter does not change the quotes. Double-quoting is
221//! conventional, but single quotes may be used when quoting strings containing double-quotes, and
222//! leaving the single quotes as-is is preferred.
223//! * JSON5 literal values are retained as-is. Strings retain all spacing characters, including
224//! escaped newlines. All other literals (unquoted tokens without spaces, such as false, null,
225//! 0.234, 1337, or l33t) are _not_ interpreted syntactically. Other schema-based tools and JSON5
226//! deserializers may flag these invalid values.
227//!
228//! ## Optional Sorting
229//!
230//! * By default, array items and object properties retain their original order. (Some JSON arrays
231//! are order-dependent, and sorting them indiscriminantly might change the meaning of the data.)
232//! * The formatter can automatically sort array items and object properties if enabled via
233//! `FormatOptions`:
234//! - To sort all arrays in the document, set
235//! [FormatOptions.sort_array_items](struct.FormatOptions.html#structfield.sort_array_items) to
236//! `true`
237//! - To sort only specific arrays in the target schema, specify the schema location under
238//! [FormatOptions.options_by_path](struct.FormatOptions.html#structfield.options_by_path), and
239//! set its [SortArrayItems](enum.PathOption.html#variant.SortArrayItems) option.
240//! - Properties are sorted based on an explicit user-supplied list of property names in the
241//! preferred order, for objects at a specified path. Specify the object's location in the
242//! target schema using
243//! [FormatOptions.options_by_path](struct.FormatOptions.html#structfield.options_by_path), and
244//! provide a vector of property name strings with the
245//! [PropertyNameOrder](enum.PathOption.html#variant.PropertyNameOrder) option. Properties not
246//! included in this option retain their original order, behind the explicitly ordered
247//! properties, if any.
248//! * When sorting array items, the formatter only sorts array item literal values (strings,
249//! numbers, bools, and null). Child arrays or objects are left in their original order, after
250//! sorted literals, if any, within the same array.
251//! * Array items are sorted in case-insensitive unicode lexicographic order. **(Note that, since
252//! the formatter does not parse unquoted literals, number types cannot be sorted numerically.)**
253//! Items that are case-insensitively equal are re-compared and ordered case-sensitively with
254//! respect to each other.
255//!
256//! ## Associated Comments
257//!
258//! * All comments immediately preceding an element (value or start of an array or object), and
259//! trailing line comments (starting on the same line as the element, optionally continued on
260//! successive lines if all line comments are left-aligned), are retained and move with the
261//! associated item if the item is repositioned during sorting.
262//! * All line and block comments are retained. Typically, the comments are re-aligned vertically
263//! (indented) with the values with which they were associated.
264//! * A single line comment appearing immediately after a JSON value (primitive or closing brace),
265//! on the same line, will remain appended to that value on its line after re-formatting.
266//! * Spaces separate block comments from blocks of contiguous line comments associated with the
267//! same entry.
268//! * Comments at the end of a list (after the last property or item) are retained at the end of
269//! the same list.
270//! * Block comments with lines that extend to the left of the opening "/\*" are not re-aligned.
271//!
272//! ## Whitespace Handling
273//!
274//! * Unicode characters are allowed, and unicode space characters should retain their meaning
275//! according to unicode standards.
276//! * All spaces inside single- or multi-line strings are retained. All spaces in comments are
277//! retained *except* trailing spaces at the end of a line.
278//! * All other original spaces are removed.
279
280#![deny(missing_docs)]
281
282#[macro_use]
283mod error;
284
285mod content;
286mod formatter;
287mod options;
288mod parser;
289
290use {
291 crate::formatter::*, std::cell::RefCell, std::collections::HashMap, std::collections::HashSet,
292 std::rc::Rc,
293};
294
295pub use content::Array;
296pub use content::Comment;
297pub use content::Comments;
298pub use content::Object;
299pub use content::ParsedDocument;
300pub use content::Primitive;
301pub use content::Property;
302pub use content::Value;
303pub use error::Error;
304pub use error::Location;
305pub use options::FormatOptions;
306pub use options::PathOption;
307
308/// Format a JSON5 document, applying a consistent style, with given options.
309///
310/// See [FormatOptions](struct.FormatOptions.html) for style options, and confirm the defaults by
311/// reviewing the source of truth via the `src` link for
312/// [impl Default for FormatOptions](struct.FormatOptions.html#impl-Default).
313///
314/// # Format and Style (Default)
315///
316/// Unless FormatOptions are modified, the JSON5 formatter takes a JSON5 document (as a unicode
317/// String) and generates a new document with the following formatting:
318///
319/// * Indents 4 spaces.
320/// * Quotes are removed from property names if they are legal ECMAScript 5.1 identifiers. Property
321/// names that do not comply with ECMAScript identifier format requirements will retain their
322/// existing (single or double) quotes.
323/// * All property and item lists end with a trailing comma.
324/// * All property and item lists are broken down; that is, the braces are on separate lines and
325/// all values are indented.
326///
327/// ```json
328/// {
329/// key: "value",
330/// array: [
331/// 3.145,
332/// ]
333/// }
334/// ```
335///
336/// # Arguments
337/// * buffer - A unicode string containing the original JSON5 document.
338/// * filename - An optional filename. Parsing errors typically include the filename (if given),
339/// and the line number and character column where the error was detected.
340/// * options - Format style options to override the default style, if provided.
341/// # Returns
342/// * The formatted result in UTF-8 encoded bytes.
343pub fn format(
344 buffer: &str,
345 filename: Option<String>,
346 options: Option<FormatOptions>,
347) -> Result<Vec<u8>, Error> {
348 let parsed_document = ParsedDocument::from_str(buffer, filename)?;
349 let options = match options {
350 Some(options) => options,
351 None => FormatOptions { ..Default::default() },
352 };
353 Json5Format::with_options(options)?.to_utf8(&parsed_document)
354}
355
356/// A JSON5 formatter that parses a valid JSON5 input buffer and produces a new, formatted document.
357pub struct Json5Format {
358 /// Options that alter how the formatter generates the formatted output. This instance of
359 /// FormatOptions is a subset of the FormatOptions passed to the `with_options` constructor.
360 /// The `options_by_path` are first removed, and then used to initialize the SubpathOptions
361 /// hierarchy rooted at the `document_root_options_ref`.
362 default_options: FormatOptions,
363
364 /// Depth-specific options applied at the document root and below.
365 document_root_options_ref: Rc<RefCell<SubpathOptions>>,
366}
367
368impl Json5Format {
369 /// Create and return a Json5Format, with the given options to be applied to the
370 /// [Json5Format::to_utf8()](struct.Json5Format.html#method.to_utf8) operation.
371 pub fn with_options(mut options: FormatOptions) -> Result<Self, Error> {
372 let mut document_root_options = SubpathOptions::new(&options);
373
374 // Typical JSON5 documents start and end with curly braces for a top-level unnamed
375 // object. This is by convention, and the Json5Format represents this
376 // top-level object as a single child in a conceptual array. The array square braces
377 // are not rendered, and by convention, the child object should not have a trailing
378 // comma, even if trailing commas are the default everywhere else in the document.
379 //
380 // Set the SubpathOptions for the document array items to prevent trailing commas.
381 document_root_options.options.trailing_commas = false;
382
383 let mut options_by_path =
384 options.options_by_path.drain().collect::<HashMap<&'static str, HashSet<PathOption>>>();
385
386 // Default options remain after draining the `options_by_path`
387 let default_options = options;
388
389 // Transfer the options_by_path from the given options into the SubpathOptions tree
390 // rooted at `document_options_root`.
391 for (path, path_options) in options_by_path.drain() {
392 let rc; // extend life of temporary
393 let mut borrowed; // extend life of temporary
394 let subpath_options = if path == "/" {
395 &mut document_root_options
396 } else if path.starts_with("/") {
397 rc = document_root_options.get_or_create_subpath_options(
398 &path[1..].split('/').collect::<Vec<_>>(),
399 &default_options,
400 );
401 borrowed = rc.borrow_mut();
402 &mut *borrowed
403 } else {
404 return Err(Error::configuration(format!(
405 "PathOption path '{}' is invalid.",
406 path
407 )));
408 };
409 subpath_options.override_default_options(&path_options);
410 }
411
412 Ok(Json5Format {
413 default_options,
414 document_root_options_ref: Rc::new(RefCell::new(document_root_options)),
415 })
416 }
417
418 /// Create and return a Json5Format, with the default settings.
419 pub fn new() -> Result<Self, Error> {
420 Self::with_options(FormatOptions { ..Default::default() })
421 }
422
423 /// Formats the parsed document into a new Vector of UTF8 bytes.
424 ///
425 /// # Arguments
426 /// * `parsed_document` - The parsed state of the incoming document.
427 ///
428 /// # Example
429 ///
430 /// ```
431 /// # use json5format::*;
432 /// # let buffer = String::from("{}");
433 /// # let filename = String::from("example.json5");
434 /// let format = Json5Format::new()?;
435 /// let parsed_document = ParsedDocument::from_str(&buffer, Some(filename))?;
436 /// let bytes = format.to_utf8(&parsed_document)?;
437 /// # assert_eq!("{}\n", std::str::from_utf8(&bytes).unwrap());
438 /// # Ok::<(),anyhow::Error>(())
439 /// ```
440 pub fn to_utf8(&self, parsed_document: &ParsedDocument) -> Result<Vec<u8>, Error> {
441 let formatter =
442 Formatter::new(self.default_options.clone(), self.document_root_options_ref.clone());
443 formatter.format(parsed_document)
444 }
445
446 /// Formats the parsed document into a new String.
447 ///
448 /// # Arguments
449 /// * `parsed_document` - The parsed state of the incoming document.
450 ///
451 /// # Example
452 ///
453 /// ```
454 /// # use json5format::*;
455 /// # fn main() -> std::result::Result<(), Error> {
456 /// # let buffer = String::from("{}");
457 /// # let filename = String::from("example.json5");
458 /// let format = Json5Format::new()?;
459 /// let parsed_document = ParsedDocument::from_str(&buffer, Some(filename))?;
460 /// let formatted = format.to_string(&parsed_document)?;
461 /// # assert_eq!("{}\n", formatted);
462 /// # Ok(())
463 /// # }
464 /// ```
465 pub fn to_string(&self, parsed_document: &ParsedDocument) -> Result<String, Error> {
466 String::from_utf8(self.to_utf8(parsed_document)?)
467 .map_err(|e| Error::internal(None, e.to_string()))
468 }
469}