textwrap/splitting.rs
1//! Word splitting functionality.
2//!
3//! To wrap text into lines, long words sometimes need to be split
4//! across lines. The [`WordSplitter`] trait defines this
5//! functionality. [`HyphenSplitter`] is the default implementation of
6//! this treat: it will simply split words on existing hyphens.
7
8#[cfg(feature = "hyphenation")]
9use hyphenation::{Hyphenator, Standard};
10
11/// An interface for splitting words.
12///
13/// When the [`wrap_iter`] method will try to fit text into a line, it
14/// will eventually find a word that it too large the current text
15/// width. It will then call the currently configured `WordSplitter` to
16/// have it attempt to split the word into smaller parts. This trait
17/// describes that functionality via the [`split`] method.
18///
19/// If the `textwrap` crate has been compiled with the `hyphenation`
20/// feature enabled, you will find an implementation of `WordSplitter`
21/// by the `hyphenation::language::Corpus` struct. Use this struct for
22/// language-aware hyphenation. See the [`hyphenation` documentation]
23/// for details.
24///
25/// [`wrap_iter`]: ../struct.Wrapper.html#method.wrap_iter
26/// [`split`]: #tymethod.split
27/// [`hyphenation` documentation]: https://docs.rs/hyphenation/
28pub trait WordSplitter {
29 /// Return all possible splits of word. Each split is a triple
30 /// with a head, a hyphen, and a tail where `head + &hyphen +
31 /// &tail == word`. The hyphen can be empty if there is already a
32 /// hyphen in the head.
33 ///
34 /// The splits should go from smallest to longest and should
35 /// include no split at all. So the word "technology" could be
36 /// split into
37 ///
38 /// ```no_run
39 /// vec![("tech", "-", "nology"),
40 /// ("technol", "-", "ogy"),
41 /// ("technolo", "-", "gy"),
42 /// ("technology", "", "")];
43 /// ```
44 fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)>;
45}
46
47/// Use this as a [`Wrapper.splitter`] to avoid any kind of
48/// hyphenation:
49///
50/// ```
51/// use textwrap::{Wrapper, NoHyphenation};
52///
53/// let wrapper = Wrapper::with_splitter(8, NoHyphenation);
54/// assert_eq!(wrapper.wrap("foo bar-baz"), vec!["foo", "bar-baz"]);
55/// ```
56///
57/// [`Wrapper.splitter`]: ../struct.Wrapper.html#structfield.splitter
58#[derive(Clone, Debug)]
59pub struct NoHyphenation;
60
61/// `NoHyphenation` implements `WordSplitter` by not splitting the
62/// word at all.
63impl WordSplitter for NoHyphenation {
64 fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)> {
65 vec![(word, "", "")]
66 }
67}
68
69/// Simple and default way to split words: splitting on existing
70/// hyphens only.
71///
72/// You probably don't need to use this type since it's already used
73/// by default by `Wrapper::new`.
74#[derive(Clone, Debug)]
75pub struct HyphenSplitter;
76
77/// `HyphenSplitter` is the default `WordSplitter` used by
78/// `Wrapper::new`. It will split words on any existing hyphens in the
79/// word.
80///
81/// It will only use hyphens that are surrounded by alphanumeric
82/// characters, which prevents a word like "--foo-bar" from being
83/// split on the first or second hyphen.
84impl WordSplitter for HyphenSplitter {
85 fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)> {
86 let mut triples = Vec::new();
87 // Split on hyphens, smallest split first. We only use hyphens
88 // that are surrounded by alphanumeric characters. This is to
89 // avoid splitting on repeated hyphens, such as those found in
90 // --foo-bar.
91 let mut char_indices = word.char_indices();
92 // Early return if the word is empty.
93 let mut prev = match char_indices.next() {
94 None => return vec![(word, "", "")],
95 Some((_, ch)) => ch,
96 };
97
98 // Find current word, or return early if the word only has a
99 // single character.
100 let (mut idx, mut cur) = match char_indices.next() {
101 None => return vec![(word, "", "")],
102 Some((idx, cur)) => (idx, cur),
103 };
104
105 for (i, next) in char_indices {
106 if prev.is_alphanumeric() && cur == '-' && next.is_alphanumeric() {
107 let (head, tail) = word.split_at(idx + 1);
108 triples.push((head, "", tail));
109 }
110 prev = cur;
111 idx = i;
112 cur = next;
113 }
114
115 // Finally option is no split at all.
116 triples.push((word, "", ""));
117
118 triples
119 }
120}
121
122/// A hyphenation dictionary can be used to do language-specific
123/// hyphenation using patterns from the hyphenation crate.
124#[cfg(feature = "hyphenation")]
125impl WordSplitter for Standard {
126 fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)> {
127 // Find splits based on language dictionary.
128 let mut triples = Vec::new();
129 for n in self.hyphenate(word).breaks {
130 let (head, tail) = word.split_at(n);
131 let hyphen = if head.ends_with('-') { "" } else { "-" };
132 triples.push((head, hyphen, tail));
133 }
134 // Finally option is no split at all.
135 triples.push((word, "", ""));
136
137 triples
138 }
139}