heck/
lib.rs

1//! **heck** is a case conversion library.
2//!
3//! This library exists to provide case conversion between common cases like
4//! CamelCase and snake_case. It is intended to be unicode aware, internally,
5//! consistent, and reasonably well performing.
6//!
7//! ## Definition of a word boundary
8//!
9//! Word boundaries are defined as the "unicode words" defined in the
10//! `unicode_segmentation` library, as well as within those words in this manner:
11//!
12//! 1. All underscore characters are considered word boundaries.
13//! 2. If an uppercase character is followed by lowercase letters, a word boundary
14//! is considered to be just prior to that uppercase character.
15//! 3. If multiple uppercase characters are consecutive, they are considered to be
16//! within a single word, except that the last will be part of the next word if it
17//! is followed by lowercase characters (see rule 2).
18//!
19//! That is, "HelloWorld" is segmented `Hello|World` whereas "XMLHttpRequest" is
20//! segmented `XML|Http|Request`.
21//!
22//! Characters not within words (such as spaces, punctuations, and underscores)
23//! are not included in the output string except as they are a part of the case
24//! being converted to. Multiple adjacent word boundaries (such as a series of
25//! underscores) are folded into one. ("hello__world" in snake case is therefore
26//! "hello_world", not the exact same string). Leading or trailing word boundary
27//! indicators are dropped, except insofar as CamelCase capitalizes the first word.
28//!
29//! ### Cases contained in this library:
30//!
31//! 1. CamelCase
32//! 2. snake_case
33//! 3. kebab-case
34//! 4. SHOUTY_SNAKE_CASE
35//! 5. mixedCase
36//! 6. Title Case
37#![deny(missing_docs)]
38extern crate unicode_segmentation;
39
40mod camel;
41mod kebab;
42mod mixed;
43mod shouty_snake;
44mod snake;
45mod title;
46
47pub use camel::CamelCase;
48pub use kebab::KebabCase;
49pub use mixed::MixedCase;
50pub use shouty_snake::{ShoutySnakeCase, ShoutySnekCase};
51pub use snake::{SnakeCase, SnekCase};
52pub use title::TitleCase;
53
54use unicode_segmentation::UnicodeSegmentation;
55
56fn transform<F, G>(s: &str, with_word: F, boundary: G) -> String
57where
58    F: Fn(&str, &mut String),
59    G: Fn(&mut String)
60{
61
62    /// Tracks the current 'mode' of the transformation algorithm as it scans the input string.
63    ///
64    /// The mode is a tri-state which tracks the case of the last cased character of the current
65    /// word. If there is no cased character (either lowercase or uppercase) since the previous
66    /// word boundary, than the mode is `Boundary`. If the last cased character is lowercase, then
67    /// the mode is `Lowercase`. Othertherwise, the mode is `Uppercase`.
68    #[derive(Clone, Copy, PartialEq)]
69    enum WordMode {
70        /// There have been no lowercase or uppercase characters in the current word.
71        Boundary,
72        /// The previous cased character in the current word is lowercase.
73        Lowercase,
74        /// The previous cased character in the current word is uppercase.
75        Uppercase,
76    }
77
78    let mut out = String::new();
79    let mut first_word = true;
80
81    for word in s.unicode_words() {
82        let mut char_indices = word.char_indices().peekable();
83        let mut init = 0;
84        let mut mode = WordMode::Boundary;
85
86        while let Some((i, c)) = char_indices.next() {
87            // Skip underscore characters
88            if c == '_' {
89                if init == i { init += 1; }
90                continue
91            }
92
93            if let Some(&(next_i, next)) = char_indices.peek() {
94
95                // The mode including the current character, assuming the current character does
96                // not result in a word boundary.
97                let next_mode = if c.is_lowercase() {
98                    WordMode::Lowercase
99                } else if c.is_uppercase() {
100                    WordMode::Uppercase
101                } else {
102                    mode
103                };
104
105                // Word boundary after if next is underscore or current is
106                // not uppercase and next is uppercase
107                if next == '_' || (next_mode == WordMode::Lowercase && next.is_uppercase()) {
108                    if !first_word { boundary(&mut out); }
109                    with_word(&word[init..next_i], &mut out);
110                    first_word = false;
111                    init = next_i;
112                    mode = WordMode::Boundary;
113
114                // Otherwise if current and previous are uppercase and next
115                // is lowercase, word boundary before
116                } else if mode == WordMode::Uppercase && c.is_uppercase() && next.is_lowercase() {
117                    if !first_word { boundary(&mut out); }
118                    else { first_word = false; }
119                    with_word(&word[init..i], &mut out);
120                    init = i;
121                    mode = WordMode::Boundary;
122
123                // Otherwise no word boundary, just update the mode
124                } else {
125                    mode = next_mode;
126                }
127            } else {
128                // Collect trailing characters as a word
129                if !first_word { boundary(&mut out); }
130                else { first_word = false; }
131                with_word(&word[init..], &mut out);
132                break;
133            }
134        }
135    }
136
137    out
138}
139
140fn lowercase(s: &str, out: &mut String) {
141    let mut chars = s.chars().peekable();
142    while let Some(c) = chars.next() {
143        if c == 'Σ' && chars.peek().is_none() {
144            out.push('ς');
145        } else {
146            out.extend(c.to_lowercase());
147        }
148    }
149}
150
151fn uppercase(s: &str, out: &mut String ) {
152    for c in s.chars() {
153        out.extend(c.to_uppercase())
154    }
155}
156
157fn capitalize(s: &str, out: &mut String) {
158    let mut char_indices = s.char_indices();
159    if let Some((_, c)) = char_indices.next() {
160        out.extend(c.to_uppercase());
161        if let Some((i, _)) = char_indices.next() {
162            lowercase(&s[i..], out);
163        }
164    }
165}