lipsum/lib.rs
1//! Lorem ipsum generator.
2//!
3//! This crate contains functions for generating pseudo-Latin lorem
4//! ipsum placeholder text. The traditional lorem ipsum text start
5//! like this:
6//!
7//! > Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do
8//! > eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut
9//! > enim ad minim veniam, quis nostrud exercitation ullamco laboris
10//! > nisi ut aliquip ex ea commodo consequat. [...]
11//!
12//! This text is in the [`LOREM_IPSUM`] constant. Random text looking
13//! like the above can be generated using the [`lipsum`] function.
14//! This function allows you to generate as much text as desired and
15//! each invocation will generate different text. This is done using a
16//! [Markov chain] based on both the [`LOREM_IPSUM`] and
17//! [`LIBER_PRIMUS`] texts. The latter constant holds the full text of
18//! the first book of a work by Cicero, of which the lorem ipsum text
19//! is a scrambled subset.
20//!
21//! The random looking text is generatd using a Markov chain of order
22//! two, which simply means that the next word is based on the
23//! previous two words in the input texts. The Markov chain can be
24//! used with other input texts by creating an instance of
25//! [`MarkovChain`] and calling its [`learn`] method.
26//!
27//! [`LOREM_IPSUM`]: constant.LOREM_IPSUM.html
28//! [`LIBER_PRIMUS`]: constant.LIBER_PRIMUS.html
29//! [`lipsum`]: fn.lipsum.html
30//! [`MarkovChain`]: struct.MarkovChain.html
31//! [`learn`]: struct.MarkovChain.html#method.learn
32//! [Markov chain]: https://en.wikipedia.org/wiki/Markov_chain
33
34#![doc(html_root_url = "https://docs.rs/lipsum/0.6.0")]
35#![deny(missing_docs)]
36
37extern crate rand;
38#[cfg(test)]
39extern crate rand_xorshift;
40
41use rand::rngs::ThreadRng;
42use rand::seq::SliceRandom;
43use rand::Rng;
44use std::cell::RefCell;
45use std::collections::HashMap;
46
47/// A bigram is simply two consecutive words.
48pub type Bigram<'a> = (&'a str, &'a str);
49
50/// Simple order two Markov chain implementation.
51///
52/// The [Markov chain] is a chain of order two, which means that it
53/// will use the previous two words (a bigram) when predicting the
54/// next word. This is normally enough to generate random text that
55/// looks somewhat plausible. The implementation is based on
56/// [Generating arbitrary text with Markov chains in Rust][blog post].
57///
58/// [Markov chain]: https://en.wikipedia.org/wiki/Markov_chain
59/// [blog post]: https://blakewilliams.me/posts/generating-arbitrary-text-with-markov-chains-in-rust
60pub struct MarkovChain<'a, R: Rng> {
61 map: HashMap<Bigram<'a>, Vec<&'a str>>,
62 keys: Vec<Bigram<'a>>,
63 rng: R,
64}
65
66impl<'a> MarkovChain<'a, ThreadRng> {
67 /// Create a new empty Markov chain. It will use a default
68 /// thread-local random number generator.
69 ///
70 /// # Examples
71 ///
72 /// ```
73 /// use lipsum::MarkovChain;
74 ///
75 /// let chain = MarkovChain::new();
76 /// assert!(chain.is_empty());
77 /// ```
78 pub fn new() -> MarkovChain<'a, ThreadRng> {
79 MarkovChain::new_with_rng(rand::thread_rng())
80 }
81}
82
83impl<'a> Default for MarkovChain<'a, ThreadRng> {
84 /// Create a new empty Markov chain. It will use a default
85 /// thread-local random number generator.
86 fn default() -> Self {
87 Self::new()
88 }
89}
90
91impl<'a, R: Rng> MarkovChain<'a, R> {
92 /// Create a new empty Markov chain that uses the given random
93 /// number generator.
94 ///
95 /// # Examples
96 ///
97 /// ```
98 /// extern crate rand;
99 /// extern crate rand_xorshift;
100 /// # extern crate lipsum;
101 ///
102 /// # fn main() {
103 /// use rand::SeedableRng;
104 /// use rand_xorshift::XorShiftRng;
105 /// use lipsum::MarkovChain;
106 ///
107 /// let rng = XorShiftRng::seed_from_u64(0);
108 /// let mut chain = MarkovChain::new_with_rng(rng);
109 /// chain.learn("infra-red red orange yellow green blue indigo x-ray");
110 ///
111 /// // The chain jumps consistently like this:
112 /// assert_eq!(chain.generate(1), "Yellow.");
113 /// assert_eq!(chain.generate(1), "Blue.");
114 /// assert_eq!(chain.generate(1), "Green.");
115 /// # }
116 /// ```
117
118 pub fn new_with_rng(rng: R) -> MarkovChain<'a, R> {
119 MarkovChain {
120 map: HashMap::new(),
121 keys: Vec::new(),
122 rng: rng,
123 }
124 }
125
126 /// Add new text to the Markov chain. This can be called several
127 /// times to build up the chain.
128 ///
129 /// # Examples
130 ///
131 /// ```
132 /// use lipsum::MarkovChain;
133 ///
134 /// let mut chain = MarkovChain::new();
135 /// chain.learn("red green blue");
136 /// assert_eq!(chain.words(("red", "green")), Some(&vec!["blue"]));
137 ///
138 /// chain.learn("red green yellow");
139 /// assert_eq!(chain.words(("red", "green")), Some(&vec!["blue", "yellow"]));
140 /// ```
141 pub fn learn(&mut self, sentence: &'a str) {
142 let words = sentence.split_whitespace().collect::<Vec<&str>>();
143 for window in words.windows(3) {
144 let (a, b, c) = (window[0], window[1], window[2]);
145 self.map.entry((a, b)).or_insert_with(Vec::new).push(c);
146 }
147 // Sync the keys with the current map.
148 self.keys = self.map.keys().cloned().collect();
149 self.keys.sort();
150 }
151
152 /// Returs the number of states in the Markov chain.
153 ///
154 /// # Examples
155 ///
156 /// ```
157 /// use lipsum::MarkovChain;
158 ///
159 /// let mut chain = MarkovChain::new();
160 /// assert_eq!(chain.len(), 0);
161 ///
162 /// chain.learn("red orange yellow green blue indigo");
163 /// assert_eq!(chain.len(), 4);
164 /// ```
165 #[inline]
166 pub fn len(&self) -> usize {
167 self.map.len()
168 }
169
170 /// Returns `true` if the Markov chain has no states.
171 ///
172 /// # Examples
173 ///
174 /// ```
175 /// use lipsum::MarkovChain;
176 ///
177 /// let mut chain = MarkovChain::new();
178 /// assert!(chain.is_empty());
179 ///
180 /// chain.learn("foo bar baz");
181 /// assert!(!chain.is_empty());
182 /// ```
183 pub fn is_empty(&self) -> bool {
184 self.len() == 0
185 }
186
187 /// Get the possible words following the given bigram, or `None`
188 /// if the state is invalid.
189 ///
190 /// # Examples
191 ///
192 /// ```
193 /// use lipsum::MarkovChain;
194 ///
195 /// let mut chain = MarkovChain::new();
196 /// chain.learn("red green blue");
197 /// assert_eq!(chain.words(("red", "green")), Some(&vec!["blue"]));
198 /// assert_eq!(chain.words(("foo", "bar")), None);
199 /// ```
200 pub fn words(&self, state: Bigram<'a>) -> Option<&Vec<&str>> {
201 self.map.get(&state)
202 }
203
204 /// Generate a sentence with `n` words of lorem ipsum text. The
205 /// sentence will start from a random point in the Markov chain
206 /// and a `.` will be added as necessary to form a full sentence.
207 ///
208 /// See [`generate_from`] if you want to control the starting
209 /// point for the generated text and see [`iter`] if you simply
210 /// want a sequence of words.
211 ///
212 /// # Examples
213 ///
214 /// Generating the sounds of a grandfather clock:
215 ///
216 /// ```
217 /// use lipsum::MarkovChain;
218 ///
219 /// let mut chain = MarkovChain::new();
220 /// chain.learn("Tick, Tock, Tick, Tock, Ding! Tick, Tock, Ding! Ding!");
221 /// println!("{}", chain.generate(15));
222 /// ```
223 ///
224 /// The output looks like this:
225 ///
226 /// > Ding! Tick, Tock, Tick, Tock, Ding! Ding! Tock, Ding! Tick,
227 /// > Tock, Tick, Tock, Tick, Tock.
228 ///
229 /// [`generate_from`]: struct.MarkovChain.html#method.generate_from
230 /// [`iter`]: struct.MarkovChain.html#method.iter
231 pub fn generate(&mut self, n: usize) -> String {
232 join_words(self.iter().take(n))
233 }
234
235 /// Generate a sentence with `n` words of lorem ipsum text. The
236 /// sentence will start from the given bigram and a `.` will be
237 /// added as necessary to form a full sentence.
238 ///
239 /// Use [`generate`] if the starting point is not important. See
240 /// [`iter_from`] if you want a sequence of words that you can
241 /// format yourself.
242 ///
243 /// [`generate`]: struct.MarkovChain.html#method.generate
244 /// [`iter_from`]: struct.MarkovChain.html#method.iter_from
245 pub fn generate_from(&mut self, n: usize, from: Bigram<'a>) -> String {
246 join_words(self.iter_from(from).take(n))
247 }
248
249 /// Make a never-ending iterator over the words in the Markov
250 /// chain. The iterator starts at a random point in the chain.
251 pub fn iter(&mut self) -> Words<R> {
252 let state = if self.is_empty() {
253 ("", "")
254 } else {
255 *self.keys.choose(&mut self.rng).unwrap()
256 };
257 Words {
258 map: &self.map,
259 rng: &mut self.rng,
260 keys: &self.keys,
261 state: state,
262 }
263 }
264
265 /// Make a never-ending iterator over the words in the Markov
266 /// chain. The iterator starts at the given bigram.
267 pub fn iter_from(&mut self, from: Bigram<'a>) -> Words<R> {
268 Words {
269 map: &self.map,
270 rng: &mut self.rng,
271 keys: &self.keys,
272 state: from,
273 }
274 }
275}
276
277/// Never-ending iterator over words in the Markov chain.
278///
279/// Generated with the [`iter`] or [`iter_from`] methods.
280///
281/// [`iter`]: struct.MarkovChain.html#method.iter
282/// [`iter_from`]: struct.MarkovChain.html#method.iter_from
283pub struct Words<'a, R: 'a + Rng> {
284 map: &'a HashMap<Bigram<'a>, Vec<&'a str>>,
285 rng: &'a mut R,
286 keys: &'a Vec<Bigram<'a>>,
287 state: Bigram<'a>,
288}
289
290impl<'a, R: Rng> Iterator for Words<'a, R> {
291 type Item = &'a str;
292
293 fn next(&mut self) -> Option<&'a str> {
294 if self.map.is_empty() {
295 return None;
296 }
297
298 let result = Some(self.state.0);
299
300 while !self.map.contains_key(&self.state) {
301 self.state = *self.keys.choose(self.rng).unwrap();
302 }
303 let next_words = &self.map[&self.state];
304 let next = next_words.choose(self.rng).unwrap();
305 self.state = (self.state.1, next);
306 result
307 }
308}
309
310/// Check if `c` is an ASCII punctuation character.
311fn is_ascii_punctuation(c: char) -> bool {
312 // We use the table from the unstable
313 // AsciiExt::is_ascii_punctuation function:
314 //
315 // U+0021 ... U+002F `! " # $ % & ' ( ) * + , - . /`
316 // U+003A ... U+0040 `: ; < = > ? @`
317 // U+005B ... U+0060 `[ \\ ] ^ _ \``
318 // U+007B ... U+007E `{ | } ~`
319 match c {
320 '\x21'...'\x2F' | '\x3A'...'\x40' | '\x5B'...'\x60' | '\x7B'...'\x7E' => true,
321 _ => false,
322 }
323}
324
325/// Capitalize the first character in a string.
326fn capitalize<'a>(word: &'a str) -> String {
327 let idx = match word.chars().next() {
328 Some(c) => c.len_utf8(),
329 None => 0,
330 };
331
332 let mut result = String::with_capacity(word.len());
333 result.push_str(&word[..idx].to_uppercase());
334 result.push_str(&word[idx..]);
335 result
336}
337
338/// Join words from an iterator. The first word is always capitalized
339/// and the generated sentence will end with `'.'` if it doesn't
340/// already end with some other ASCII punctuation character.
341fn join_words<'a, I: Iterator<Item = &'a str>>(mut words: I) -> String {
342 match words.next() {
343 None => String::new(),
344 Some(word) => {
345 let mut sentence = capitalize(word);
346
347 // Add remaining words.
348 for word in words {
349 sentence.push(' ');
350 sentence.push_str(word);
351 }
352
353 // Ensure the sentence ends with either one of ".!?".
354 if !sentence.ends_with(|c: char| c == '.' || c == '!' || c == '?') {
355 // Trim all trailing punctuation characters to avoid
356 // adding '.' after a ',' or similar.
357 let idx = sentence.trim_right_matches(is_ascii_punctuation).len();
358 sentence.truncate(idx);
359 sentence.push('.');
360 }
361
362 sentence
363 }
364 }
365}
366
367/// The traditional lorem ipsum text as given in [Wikipedia]. Using
368/// this text alone for a Markov chain of order two doesn't work very
369/// well since each bigram (two consequtive words) is followed by just
370/// one other word. In other words, the Markov chain will always
371/// produce the same output and recreate the lorem ipsum text
372/// precisely. However, combining it with the full text in
373/// [`LIBER_PRIMUS`] works well.
374///
375/// [Wikipedia]: https://en.wikipedia.org/wiki/Lorem_ipsum
376/// [`LIBER_PRIMUS`]: constant.LIBER_PRIMUS.html
377pub const LOREM_IPSUM: &'static str = include_str!("lorem-ipsum.txt");
378
379/// The first book in Cicero's work De finibus bonorum et malorum ("On
380/// the ends of good and evil"). The lorem ipsum text in
381/// [`LOREM_IPSUM`] is derived from part of this text.
382///
383/// [`LOREM_IPSUM`]: constant.LOREM_IPSUM.html
384pub const LIBER_PRIMUS: &'static str = include_str!("liber-primus.txt");
385
386thread_local! {
387 // Markov chain generating lorem ipsum text.
388 static LOREM_IPSUM_CHAIN: RefCell<MarkovChain<'static, ThreadRng>> = {
389 let mut chain = MarkovChain::new();
390 // The cost of learning increases as more and more text is
391 // added, so we start with the smallest text.
392 chain.learn(LOREM_IPSUM);
393 chain.learn(LIBER_PRIMUS);
394 RefCell::new(chain)
395 }
396}
397
398/// Generate `n` words of lorem ipsum text. The output will always
399/// start with "Lorem ipsum".
400///
401/// The text continues with the standard lorem ipsum text from
402/// [`LOREM_IPSUM`] and becomes random if more than 18 words is
403/// requested. See [`lipsum_words`] if fully random text is needed.
404///
405/// # Examples
406///
407/// ```
408/// use lipsum::lipsum;
409///
410/// assert_eq!(lipsum(7), "Lorem ipsum dolor sit amet, consectetur adipiscing.");
411/// ```
412///
413/// [`LOREM_IPSUM`]: constant.LOREM_IPSUM.html
414/// [`lipsum_words`]: fn.lipsum_words.html
415pub fn lipsum(n: usize) -> String {
416 LOREM_IPSUM_CHAIN.with(|cell| {
417 let mut chain = cell.borrow_mut();
418 chain.generate_from(n, ("Lorem", "ipsum"))
419 })
420}
421
422/// Generate `n` words of random lorem ipsum text.
423///
424/// The text starts with a random word from [`LOREM_IPSUM`]. Multiple
425/// sentences may be generated, depending on the punctuation of the
426/// words being random selected.
427///
428/// # Examples
429///
430/// ```
431/// use lipsum::lipsum_words;
432///
433/// println!("{}", lipsum_words(6));
434/// // -> "Propter soliditatem, censet in infinito inani."
435/// ```
436///
437/// [`LOREM_IPSUM`]: constant.LOREM_IPSUM.html
438pub fn lipsum_words(n: usize) -> String {
439 LOREM_IPSUM_CHAIN.with(|cell| {
440 let mut chain = cell.borrow_mut();
441 chain.generate(n)
442 })
443}
444
445/// Minimum number of words to include in a title.
446const TITLE_MIN_WORDS: usize = 3;
447/// Maximum number of words to include in a title.
448const TITLE_MAX_WORDS: usize = 8;
449/// Words shorter than this size are not capitalized.
450const TITLE_SMALL_WORD: usize = 3;
451
452/// Generate a short lorem ipsum text with words in title case.
453///
454/// The words are capitalized and stripped for punctuation characters.
455///
456/// # Examples
457///
458/// ```
459/// use lipsum::lipsum_title;
460///
461/// println!("{}", lipsum_title());
462/// ```
463///
464/// This will generate a string like
465///
466/// > Grate Meminit et Praesentibus
467///
468/// which should be suitable for use in a document title for section
469/// heading.
470pub fn lipsum_title() -> String {
471 LOREM_IPSUM_CHAIN.with(|cell| {
472 let n = rand::thread_rng().gen_range(TITLE_MIN_WORDS, TITLE_MAX_WORDS);
473 let mut chain = cell.borrow_mut();
474 // The average word length with our corpus is 7.6 bytes so
475 // this capacity will avoid most allocations.
476 let mut title = String::with_capacity(8 * n);
477
478 let words = chain
479 .iter()
480 .map(|word| word.trim_matches(is_ascii_punctuation))
481 .filter(|word| !word.is_empty())
482 .take(n);
483
484 for (i, word) in words.enumerate() {
485 if i > 0 {
486 title.push(' ');
487 }
488
489 // Capitalize the first word and all long words.
490 if i == 0 || word.len() > TITLE_SMALL_WORD {
491 title.push_str(&capitalize(word));
492 } else {
493 title.push_str(word);
494 }
495 }
496 title
497 })
498}
499
500#[cfg(test)]
501mod tests {
502 use super::rand::SeedableRng;
503 use super::rand_xorshift::XorShiftRng;
504 use super::*;
505
506 #[test]
507 fn starts_with_lorem_ipsum() {
508 assert_eq!(&lipsum(10)[..11], "Lorem ipsum");
509 }
510
511 #[test]
512 fn generate_zero_words() {
513 assert_eq!(lipsum(0).split_whitespace().count(), 0);
514 }
515
516 #[test]
517 fn generate_one_word() {
518 assert_eq!(lipsum(1).split_whitespace().count(), 1);
519 }
520
521 #[test]
522 fn generate_two_words() {
523 assert_eq!(lipsum(2).split_whitespace().count(), 2);
524 }
525
526 #[test]
527 fn starts_differently() {
528 // Check that calls to lipsum_words don't always start with
529 // "Lorem ipsum".
530 let idx = "Lorem ipsum".len();
531 assert_ne!(&lipsum_words(5)[..idx], &lipsum_words(5)[..idx]);
532 }
533
534 #[test]
535 fn generate_title() {
536 for word in lipsum_title().split_whitespace() {
537 assert!(
538 !word.starts_with(is_ascii_punctuation) && !word.ends_with(is_ascii_punctuation),
539 "Unexpected punctuation: {:?}",
540 word
541 );
542 if word.len() > TITLE_SMALL_WORD {
543 assert!(
544 word.starts_with(char::is_uppercase),
545 "Expected small word to be capitalized: {:?}",
546 word
547 );
548 }
549 }
550 }
551
552 #[test]
553 fn empty_chain() {
554 let mut chain = MarkovChain::new();
555 assert_eq!(chain.generate(10), "");
556 }
557
558 #[test]
559 fn generate_from() {
560 let mut chain = MarkovChain::new();
561 chain.learn("red orange yellow green blue indigo violet");
562 assert_eq!(
563 chain.generate_from(5, ("orange", "yellow")),
564 "Orange yellow green blue indigo."
565 );
566 }
567
568 #[test]
569 fn generate_last_bigram() {
570 // The bigram "yyy zzz" will not be present in the Markov
571 // chain's map, and so we will not generate "xxx yyy zzz" as
572 // one would expect. The chain moves from state "xxx yyy" to
573 // "yyy zzz", but sees that as invalid state and resets itself
574 // back to "xxx yyy".
575 let mut chain = MarkovChain::new();
576 chain.learn("xxx yyy zzz");
577 assert_ne!(chain.generate_from(3, ("xxx", "yyy")), "xxx yyy zzz");
578 }
579
580 #[test]
581 fn generate_from_no_panic() {
582 // No panic when asked to generate a chain from a starting
583 // point that doesn't exist in the chain.
584 let mut chain = MarkovChain::new();
585 chain.learn("foo bar baz");
586 chain.generate_from(3, ("xxx", "yyy"));
587 }
588
589 #[test]
590 fn chain_map() {
591 let mut chain = MarkovChain::new();
592 chain.learn("foo bar baz quuz");
593 let map = &chain.map;
594
595 assert_eq!(map.len(), 2);
596 assert_eq!(map[&("foo", "bar")], vec!["baz"]);
597 assert_eq!(map[&("bar", "baz")], vec!["quuz"]);
598 }
599
600 #[test]
601 fn new_with_rng() {
602 let rng = XorShiftRng::seed_from_u64(1234);
603 let mut chain = MarkovChain::new_with_rng(rng);
604 chain.learn("foo bar x y z");
605 chain.learn("foo bar a b c");
606
607 assert_eq!(chain.generate(15), "A b x y y b y bar a b y x y bar a.");
608 }
609}