heck/
lib.rs

1//! **heck** is a case conversion library.
2//!
3//! This library exists to provide case conversion between common cases like
4//! CamelCase and snake_case. It is intended to be unicode aware, internally
5//! consistent, and reasonably well performing.
6//!
7//! ## Definition of a word boundary
8//!
9//! Word boundaries are defined as the "unicode words" defined in the
10//! `unicode_segmentation` library, as well as within those words in this
11//! manner:
12//!
13//! 1. All underscore characters are considered word boundaries.
14//! 2. If an uppercase character is followed by lowercase letters, a word
15//! boundary is considered to be just prior to that uppercase character.
16//! 3. If multiple uppercase characters are consecutive, they are considered to
17//! be within a single word, except that the last will be part of the next word
18//! if it is followed by lowercase characters (see rule 2).
19//!
20//! That is, "HelloWorld" is segmented `Hello|World` whereas "XMLHttpRequest" is
21//! segmented `XML|Http|Request`.
22//!
23//! Characters not within words (such as spaces, punctuations, and underscores)
24//! are not included in the output string except as they are a part of the case
25//! being converted to. Multiple adjacent word boundaries (such as a series of
26//! underscores) are folded into one. ("hello__world" in snake case is therefore
27//! "hello_world", not the exact same string). Leading or trailing word boundary
28//! indicators are dropped, except insofar as CamelCase capitalizes the first
29//! word.
30//!
31//! ### Cases contained in this library:
32//!
33//! 1. UpperCamelCase
34//! 2. lowerCamelCase
35//! 3. snake_case
36//! 4. kebab-case
37//! 5. SHOUTY_SNAKE_CASE
38//! 6. Title Case
39//! 7. SHOUTY-KEBAB-CASE
40//! 8. Train-Case
41#![deny(missing_docs)]
42#![forbid(unsafe_code)]
43
44mod kebab;
45mod lower_camel;
46mod shouty_kebab;
47mod shouty_snake;
48mod snake;
49mod title;
50mod train;
51mod upper_camel;
52
53pub use kebab::{AsKebabCase, ToKebabCase};
54pub use lower_camel::{AsLowerCamelCase, ToLowerCamelCase};
55pub use shouty_kebab::{AsShoutyKebabCase, ToShoutyKebabCase};
56pub use shouty_snake::{
57    AsShoutySnakeCase, AsShoutySnakeCase as AsShoutySnekCase, ToShoutySnakeCase, ToShoutySnekCase,
58};
59pub use snake::{AsSnakeCase, AsSnakeCase as AsSnekCase, ToSnakeCase, ToSnekCase};
60pub use title::{AsTitleCase, ToTitleCase};
61pub use train::{AsTrainCase, ToTrainCase};
62pub use upper_camel::{
63    AsUpperCamelCase, AsUpperCamelCase as AsPascalCase, ToPascalCase, ToUpperCamelCase,
64};
65
66use std::fmt;
67
68#[cfg(feature = "unicode")]
69fn get_iterator(s: &str) -> unicode_segmentation::UnicodeWords {
70    use unicode_segmentation::UnicodeSegmentation;
71    s.unicode_words()
72}
73#[cfg(not(feature = "unicode"))]
74fn get_iterator(s: &str) -> impl Iterator<Item = &str> {
75    s.split(|letter: char| !letter.is_ascii_alphanumeric())
76}
77
78fn transform<F, G>(
79    s: &str,
80    mut with_word: F,
81    mut boundary: G,
82    f: &mut fmt::Formatter,
83) -> fmt::Result
84where
85    F: FnMut(&str, &mut fmt::Formatter) -> fmt::Result,
86    G: FnMut(&mut fmt::Formatter) -> fmt::Result,
87{
88    /// Tracks the current 'mode' of the transformation algorithm as it scans
89    /// the input string.
90    ///
91    /// The mode is a tri-state which tracks the case of the last cased
92    /// character of the current word. If there is no cased character
93    /// (either lowercase or uppercase) since the previous word boundary,
94    /// than the mode is `Boundary`. If the last cased character is lowercase,
95    /// then the mode is `Lowercase`. Othertherwise, the mode is
96    /// `Uppercase`.
97    #[derive(Clone, Copy, PartialEq)]
98    enum WordMode {
99        /// There have been no lowercase or uppercase characters in the current
100        /// word.
101        Boundary,
102        /// The previous cased character in the current word is lowercase.
103        Lowercase,
104        /// The previous cased character in the current word is uppercase.
105        Uppercase,
106    }
107
108    let mut first_word = true;
109
110    for word in get_iterator(s) {
111        let mut char_indices = word.char_indices().peekable();
112        let mut init = 0;
113        let mut mode = WordMode::Boundary;
114
115        while let Some((i, c)) = char_indices.next() {
116            // Skip underscore characters
117            if c == '_' {
118                if init == i {
119                    init += 1;
120                }
121                continue;
122            }
123
124            if let Some(&(next_i, next)) = char_indices.peek() {
125                // The mode including the current character, assuming the
126                // current character does not result in a word boundary.
127                let next_mode = if c.is_lowercase() {
128                    WordMode::Lowercase
129                } else if c.is_uppercase() {
130                    WordMode::Uppercase
131                } else {
132                    mode
133                };
134
135                // Word boundary after if next is underscore or current is
136                // not uppercase and next is uppercase
137                if next == '_' || (next_mode == WordMode::Lowercase && next.is_uppercase()) {
138                    if !first_word {
139                        boundary(f)?;
140                    }
141                    with_word(&word[init..next_i], f)?;
142                    first_word = false;
143                    init = next_i;
144                    mode = WordMode::Boundary;
145
146                // Otherwise if current and previous are uppercase and next
147                // is lowercase, word boundary before
148                } else if mode == WordMode::Uppercase && c.is_uppercase() && next.is_lowercase() {
149                    if !first_word {
150                        boundary(f)?;
151                    } else {
152                        first_word = false;
153                    }
154                    with_word(&word[init..i], f)?;
155                    init = i;
156                    mode = WordMode::Boundary;
157
158                // Otherwise no word boundary, just update the mode
159                } else {
160                    mode = next_mode;
161                }
162            } else {
163                // Collect trailing characters as a word
164                if !first_word {
165                    boundary(f)?;
166                } else {
167                    first_word = false;
168                }
169                with_word(&word[init..], f)?;
170                break;
171            }
172        }
173    }
174
175    Ok(())
176}
177
178fn lowercase(s: &str, f: &mut fmt::Formatter) -> fmt::Result {
179    let mut chars = s.chars().peekable();
180    while let Some(c) = chars.next() {
181        if c == 'Σ' && chars.peek().is_none() {
182            write!(f, "ς")?;
183        } else {
184            write!(f, "{}", c.to_lowercase())?;
185        }
186    }
187
188    Ok(())
189}
190
191fn uppercase(s: &str, f: &mut fmt::Formatter) -> fmt::Result {
192    for c in s.chars() {
193        write!(f, "{}", c.to_uppercase())?;
194    }
195
196    Ok(())
197}
198
199fn capitalize(s: &str, f: &mut fmt::Formatter) -> fmt::Result {
200    let mut char_indices = s.char_indices();
201    if let Some((_, c)) = char_indices.next() {
202        write!(f, "{}", c.to_uppercase())?;
203        if let Some((i, _)) = char_indices.next() {
204            lowercase(&s[i..], f)?;
205        }
206    }
207
208    Ok(())
209}