textwrap/splitting.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
//! Word splitting functionality.
//!
//! To wrap text into lines, long words sometimes need to be split
//! across lines. The [`WordSplitter`] trait defines this
//! functionality. [`HyphenSplitter`] is the default implementation of
//! this treat: it will simply split words on existing hyphens.
#[cfg(feature = "hyphenation")]
use hyphenation::{Hyphenator, Standard};
/// An interface for splitting words.
///
/// When the [`wrap_iter`] method will try to fit text into a line, it
/// will eventually find a word that it too large the current text
/// width. It will then call the currently configured `WordSplitter` to
/// have it attempt to split the word into smaller parts. This trait
/// describes that functionality via the [`split`] method.
///
/// If the `textwrap` crate has been compiled with the `hyphenation`
/// feature enabled, you will find an implementation of `WordSplitter`
/// by the `hyphenation::language::Corpus` struct. Use this struct for
/// language-aware hyphenation. See the [`hyphenation` documentation]
/// for details.
///
/// [`wrap_iter`]: ../struct.Wrapper.html#method.wrap_iter
/// [`split`]: #tymethod.split
/// [`hyphenation` documentation]: https://docs.rs/hyphenation/
pub trait WordSplitter {
/// Return all possible splits of word. Each split is a triple
/// with a head, a hyphen, and a tail where `head + &hyphen +
/// &tail == word`. The hyphen can be empty if there is already a
/// hyphen in the head.
///
/// The splits should go from smallest to longest and should
/// include no split at all. So the word "technology" could be
/// split into
///
/// ```no_run
/// vec![("tech", "-", "nology"),
/// ("technol", "-", "ogy"),
/// ("technolo", "-", "gy"),
/// ("technology", "", "")];
/// ```
fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)>;
}
/// Use this as a [`Wrapper.splitter`] to avoid any kind of
/// hyphenation:
///
/// ```
/// use textwrap::{Wrapper, NoHyphenation};
///
/// let wrapper = Wrapper::with_splitter(8, NoHyphenation);
/// assert_eq!(wrapper.wrap("foo bar-baz"), vec!["foo", "bar-baz"]);
/// ```
///
/// [`Wrapper.splitter`]: ../struct.Wrapper.html#structfield.splitter
#[derive(Clone, Debug)]
pub struct NoHyphenation;
/// `NoHyphenation` implements `WordSplitter` by not splitting the
/// word at all.
impl WordSplitter for NoHyphenation {
fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)> {
vec![(word, "", "")]
}
}
/// Simple and default way to split words: splitting on existing
/// hyphens only.
///
/// You probably don't need to use this type since it's already used
/// by default by `Wrapper::new`.
#[derive(Clone, Debug)]
pub struct HyphenSplitter;
/// `HyphenSplitter` is the default `WordSplitter` used by
/// `Wrapper::new`. It will split words on any existing hyphens in the
/// word.
///
/// It will only use hyphens that are surrounded by alphanumeric
/// characters, which prevents a word like "--foo-bar" from being
/// split on the first or second hyphen.
impl WordSplitter for HyphenSplitter {
fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)> {
let mut triples = Vec::new();
// Split on hyphens, smallest split first. We only use hyphens
// that are surrounded by alphanumeric characters. This is to
// avoid splitting on repeated hyphens, such as those found in
// --foo-bar.
let mut char_indices = word.char_indices();
// Early return if the word is empty.
let mut prev = match char_indices.next() {
None => return vec![(word, "", "")],
Some((_, ch)) => ch,
};
// Find current word, or return early if the word only has a
// single character.
let (mut idx, mut cur) = match char_indices.next() {
None => return vec![(word, "", "")],
Some((idx, cur)) => (idx, cur),
};
for (i, next) in char_indices {
if prev.is_alphanumeric() && cur == '-' && next.is_alphanumeric() {
let (head, tail) = word.split_at(idx + 1);
triples.push((head, "", tail));
}
prev = cur;
idx = i;
cur = next;
}
// Finally option is no split at all.
triples.push((word, "", ""));
triples
}
}
/// A hyphenation dictionary can be used to do language-specific
/// hyphenation using patterns from the hyphenation crate.
#[cfg(feature = "hyphenation")]
impl WordSplitter for Standard {
fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)> {
// Find splits based on language dictionary.
let mut triples = Vec::new();
for n in self.hyphenate(word).breaks {
let (head, tail) = word.split_at(n);
let hyphen = if head.ends_with('-') { "" } else { "-" };
triples.push((head, hyphen, tail));
}
// Finally option is no split at all.
triples.push((word, "", ""));
triples
}
}