//! Lorem ipsum generator.
//! This crate contains functions for generating pseudo-Latin lorem
//! ipsum placeholder text. The traditional lorem ipsum text start
//! like this:
//! > Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do
//! > eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut
//! > enim ad minim veniam, quis nostrud exercitation ullamco laboris
//! > nisi ut aliquip ex ea commodo consequat. [...]
//! This text is in the [`LOREM_IPSUM`] constant. Random text looking
//! like the above can be generated using the [`lipsum`] function.
//! This function allows you to generate as much text as desired and
//! each invocation will generate different text. This is done using a
//! [Markov chain] based on both the [`LOREM_IPSUM`] and
//! [`LIBER_PRIMUS`] texts. The latter constant holds the full text of
//! the first book of a work by Cicero, of which the lorem ipsum text
//! is a scrambled subset.
//! The random looking text is generatd using a Markov chain of order
//! two, which simply means that the next word is based on the
//! previous two words in the input texts. The Markov chain can be
//! used with other input texts by creating an instance of
//! [`MarkovChain`] and calling its [`learn`] method.
//! [`LOREM_IPSUM`]: constant.LOREM_IPSUM.html
//! [`LIBER_PRIMUS`]: constant.LIBER_PRIMUS.html
//! [`lipsum`]: fn.lipsum.html
//! [`MarkovChain`]: struct.MarkovChain.html
//! [`learn`]: struct.MarkovChain.html#method.learn
//! [Markov chain]:
#![doc(html_root_url = "")]
extern crate rand;
extern crate rand_xorshift;
use rand::rngs::ThreadRng;
use rand::seq::SliceRandom;
use rand::Rng;
use std::cell::RefCell;
use std::collections::HashMap;
/// A bigram is simply two consecutive words.
pub type Bigram<'a> = (&'a str, &'a str);
/// Simple order two Markov chain implementation.
/// The [Markov chain] is a chain of order two, which means that it
/// will use the previous two words (a bigram) when predicting the
/// next word. This is normally enough to generate random text that
/// looks somewhat plausible. The implementation is based on
/// [Generating arbitrary text with Markov chains in Rust][blog post].
/// [Markov chain]:
/// [blog post]:
pub struct MarkovChain<'a, R: Rng> {
map: HashMap<Bigram<'a>, Vec<&'a str>>,
keys: Vec<Bigram<'a>>,
rng: R,
impl<'a> MarkovChain<'a, ThreadRng> {
/// Create a new empty Markov chain. It will use a default
/// thread-local random number generator.
/// # Examples
/// ```
/// use lipsum::MarkovChain;
/// let chain = MarkovChain::new();
/// assert!(chain.is_empty());
/// ```
pub fn new() -> MarkovChain<'a, ThreadRng> {
impl<'a> Default for MarkovChain<'a, ThreadRng> {
/// Create a new empty Markov chain. It will use a default
/// thread-local random number generator.
fn default() -> Self {
impl<'a, R: Rng> MarkovChain<'a, R> {
/// Create a new empty Markov chain that uses the given random
/// number generator.
/// # Examples
/// ```
/// extern crate rand;
/// extern crate rand_xorshift;
/// # extern crate lipsum;
/// # fn main() {
/// use rand::SeedableRng;
/// use rand_xorshift::XorShiftRng;
/// use lipsum::MarkovChain;
/// let rng = XorShiftRng::seed_from_u64(0);
/// let mut chain = MarkovChain::new_with_rng(rng);
/// chain.learn("infra-red red orange yellow green blue indigo x-ray");
/// // The chain jumps consistently like this:
/// assert_eq!(chain.generate(1), "Yellow.");
/// assert_eq!(chain.generate(1), "Blue.");
/// assert_eq!(chain.generate(1), "Green.");
/// # }
/// ```
pub fn new_with_rng(rng: R) -> MarkovChain<'a, R> {
MarkovChain {
map: HashMap::new(),
keys: Vec::new(),
rng: rng,
/// Add new text to the Markov chain. This can be called several
/// times to build up the chain.
/// # Examples
/// ```
/// use lipsum::MarkovChain;
/// let mut chain = MarkovChain::new();
/// chain.learn("red green blue");
/// assert_eq!(chain.words(("red", "green")), Some(&vec!["blue"]));
/// chain.learn("red green yellow");
/// assert_eq!(chain.words(("red", "green")), Some(&vec!["blue", "yellow"]));
/// ```
pub fn learn(&mut self, sentence: &'a str) {
let words = sentence.split_whitespace().collect::<Vec<&str>>();
for window in {
let (a, b, c) = (window[0], window[1], window[2]);, b)).or_insert_with(Vec::new).push(c);
// Sync the keys with the current map.
self.keys =;
/// Returs the number of states in the Markov chain.
/// # Examples
/// ```
/// use lipsum::MarkovChain;
/// let mut chain = MarkovChain::new();
/// assert_eq!(chain.len(), 0);
/// chain.learn("red orange yellow green blue indigo");
/// assert_eq!(chain.len(), 4);
/// ```
pub fn len(&self) -> usize {
/// Returns `true` if the Markov chain has no states.
/// # Examples
/// ```
/// use lipsum::MarkovChain;
/// let mut chain = MarkovChain::new();
/// assert!(chain.is_empty());
/// chain.learn("foo bar baz");
/// assert!(!chain.is_empty());
/// ```
pub fn is_empty(&self) -> bool {
self.len() == 0
/// Get the possible words following the given bigram, or `None`
/// if the state is invalid.
/// # Examples
/// ```
/// use lipsum::MarkovChain;
/// let mut chain = MarkovChain::new();
/// chain.learn("red green blue");
/// assert_eq!(chain.words(("red", "green")), Some(&vec!["blue"]));
/// assert_eq!(chain.words(("foo", "bar")), None);
/// ```
pub fn words(&self, state: Bigram<'a>) -> Option<&Vec<&str>> {
/// Generate a sentence with `n` words of lorem ipsum text. The
/// sentence will start from a random point in the Markov chain
/// and a `.` will be added as necessary to form a full sentence.
/// See [`generate_from`] if you want to control the starting
/// point for the generated text and see [`iter`] if you simply
/// want a sequence of words.
/// # Examples
/// Generating the sounds of a grandfather clock:
/// ```
/// use lipsum::MarkovChain;
/// let mut chain = MarkovChain::new();
/// chain.learn("Tick, Tock, Tick, Tock, Ding! Tick, Tock, Ding! Ding!");
/// println!("{}", chain.generate(15));
/// ```
/// The output looks like this:
/// > Ding! Tick, Tock, Tick, Tock, Ding! Ding! Tock, Ding! Tick,
/// > Tock, Tick, Tock, Tick, Tock.
/// [`generate_from`]: struct.MarkovChain.html#method.generate_from
/// [`iter`]: struct.MarkovChain.html#method.iter
pub fn generate(&mut self, n: usize) -> String {
/// Generate a sentence with `n` words of lorem ipsum text. The
/// sentence will start from the given bigram and a `.` will be
/// added as necessary to form a full sentence.
/// Use [`generate`] if the starting point is not important. See
/// [`iter_from`] if you want a sequence of words that you can
/// format yourself.
/// [`generate`]: struct.MarkovChain.html#method.generate
/// [`iter_from`]: struct.MarkovChain.html#method.iter_from
pub fn generate_from(&mut self, n: usize, from: Bigram<'a>) -> String {
/// Make a never-ending iterator over the words in the Markov
/// chain. The iterator starts at a random point in the chain.
pub fn iter(&mut self) -> Words<R> {
let state = if self.is_empty() {
("", "")
} else {
*self.keys.choose(&mut self.rng).unwrap()
Words {
map: &,
rng: &mut self.rng,
keys: &self.keys,
state: state,
/// Make a never-ending iterator over the words in the Markov
/// chain. The iterator starts at the given bigram.
pub fn iter_from(&mut self, from: Bigram<'a>) -> Words<R> {
Words {
map: &,
rng: &mut self.rng,
keys: &self.keys,
state: from,
/// Never-ending iterator over words in the Markov chain.
/// Generated with the [`iter`] or [`iter_from`] methods.
/// [`iter`]: struct.MarkovChain.html#method.iter
/// [`iter_from`]: struct.MarkovChain.html#method.iter_from
pub struct Words<'a, R: 'a + Rng> {
map: &'a HashMap<Bigram<'a>, Vec<&'a str>>,
rng: &'a mut R,
keys: &'a Vec<Bigram<'a>>,
state: Bigram<'a>,
impl<'a, R: Rng> Iterator for Words<'a, R> {
type Item = &'a str;
fn next(&mut self) -> Option<&'a str> {
if {
return None;
let result = Some(self.state.0);
while ! {
self.state = *self.keys.choose(self.rng).unwrap();
let next_words = &[&self.state];
let next = next_words.choose(self.rng).unwrap();
self.state = (self.state.1, next);
/// Check if `c` is an ASCII punctuation character.
fn is_ascii_punctuation(c: char) -> bool {
// We use the table from the unstable
// AsciiExt::is_ascii_punctuation function:
// U+0021 ... U+002F `! " # $ % & ' ( ) * + , - . /`
// U+003A ... U+0040 `: ; < = > ? @`
// U+005B ... U+0060 `[ \\ ] ^ _ \``
// U+007B ... U+007E `{ | } ~`
match c {
'\x21'...'\x2F' | '\x3A'...'\x40' | '\x5B'...'\x60' | '\x7B'...'\x7E' => true,
_ => false,
/// Capitalize the first character in a string.
fn capitalize<'a>(word: &'a str) -> String {
let idx = match word.chars().next() {
Some(c) => c.len_utf8(),
None => 0,
let mut result = String::with_capacity(word.len());
/// Join words from an iterator. The first word is always capitalized
/// and the generated sentence will end with `'.'` if it doesn't
/// already end with some other ASCII punctuation character.
fn join_words<'a, I: Iterator<Item = &'a str>>(mut words: I) -> String {
match {
None => String::new(),
Some(word) => {
let mut sentence = capitalize(word);
// Add remaining words.
for word in words {
sentence.push(' ');
// Ensure the sentence ends with either one of ".!?".
if !sentence.ends_with(|c: char| c == '.' || c == '!' || c == '?') {
// Trim all trailing punctuation characters to avoid
// adding '.' after a ',' or similar.
let idx = sentence.trim_right_matches(is_ascii_punctuation).len();
/// The traditional lorem ipsum text as given in [Wikipedia]. Using
/// this text alone for a Markov chain of order two doesn't work very
/// well since each bigram (two consequtive words) is followed by just
/// one other word. In other words, the Markov chain will always
/// produce the same output and recreate the lorem ipsum text
/// precisely. However, combining it with the full text in
/// [`LIBER_PRIMUS`] works well.
/// [Wikipedia]:
/// [`LIBER_PRIMUS`]: constant.LIBER_PRIMUS.html
pub const LOREM_IPSUM: &'static str = include_str!("lorem-ipsum.txt");
/// The first book in Cicero's work De finibus bonorum et malorum ("On
/// the ends of good and evil"). The lorem ipsum text in
/// [`LOREM_IPSUM`] is derived from part of this text.
/// [`LOREM_IPSUM`]: constant.LOREM_IPSUM.html
pub const LIBER_PRIMUS: &'static str = include_str!("liber-primus.txt");
thread_local! {
// Markov chain generating lorem ipsum text.
static LOREM_IPSUM_CHAIN: RefCell<MarkovChain<'static, ThreadRng>> = {
let mut chain = MarkovChain::new();
// The cost of learning increases as more and more text is
// added, so we start with the smallest text.
/// Generate `n` words of lorem ipsum text. The output will always
/// start with "Lorem ipsum".
/// The text continues with the standard lorem ipsum text from
/// [`LOREM_IPSUM`] and becomes random if more than 18 words is
/// requested. See [`lipsum_words`] if fully random text is needed.
/// # Examples
/// ```
/// use lipsum::lipsum;
/// assert_eq!(lipsum(7), "Lorem ipsum dolor sit amet, consectetur adipiscing.");
/// ```
/// [`LOREM_IPSUM`]: constant.LOREM_IPSUM.html
/// [`lipsum_words`]: fn.lipsum_words.html
pub fn lipsum(n: usize) -> String {
LOREM_IPSUM_CHAIN.with(|cell| {
let mut chain = cell.borrow_mut();
chain.generate_from(n, ("Lorem", "ipsum"))
/// Generate `n` words of random lorem ipsum text.
/// The text starts with a random word from [`LOREM_IPSUM`]. Multiple
/// sentences may be generated, depending on the punctuation of the
/// words being random selected.
/// # Examples
/// ```
/// use lipsum::lipsum_words;
/// println!("{}", lipsum_words(6));
/// // -> "Propter soliditatem, censet in infinito inani."
/// ```
/// [`LOREM_IPSUM`]: constant.LOREM_IPSUM.html
pub fn lipsum_words(n: usize) -> String {
LOREM_IPSUM_CHAIN.with(|cell| {
let mut chain = cell.borrow_mut();
/// Minimum number of words to include in a title.
const TITLE_MIN_WORDS: usize = 3;
/// Maximum number of words to include in a title.
const TITLE_MAX_WORDS: usize = 8;
/// Words shorter than this size are not capitalized.
const TITLE_SMALL_WORD: usize = 3;
/// Generate a short lorem ipsum text with words in title case.
/// The words are capitalized and stripped for punctuation characters.
/// # Examples
/// ```
/// use lipsum::lipsum_title;
/// println!("{}", lipsum_title());
/// ```
/// This will generate a string like
/// > Grate Meminit et Praesentibus
/// which should be suitable for use in a document title for section
/// heading.
pub fn lipsum_title() -> String {
LOREM_IPSUM_CHAIN.with(|cell| {
let n = rand::thread_rng().gen_range(TITLE_MIN_WORDS, TITLE_MAX_WORDS);
let mut chain = cell.borrow_mut();
// The average word length with our corpus is 7.6 bytes so
// this capacity will avoid most allocations.
let mut title = String::with_capacity(8 * n);
let words = chain
.map(|word| word.trim_matches(is_ascii_punctuation))
.filter(|word| !word.is_empty())
for (i, word) in words.enumerate() {
if i > 0 {
title.push(' ');
// Capitalize the first word and all long words.
if i == 0 || word.len() > TITLE_SMALL_WORD {
} else {
mod tests {
use super::rand::SeedableRng;
use super::rand_xorshift::XorShiftRng;
use super::*;
fn starts_with_lorem_ipsum() {
assert_eq!(&lipsum(10)[..11], "Lorem ipsum");
fn generate_zero_words() {
assert_eq!(lipsum(0).split_whitespace().count(), 0);
fn generate_one_word() {
assert_eq!(lipsum(1).split_whitespace().count(), 1);
fn generate_two_words() {
assert_eq!(lipsum(2).split_whitespace().count(), 2);
fn starts_differently() {
// Check that calls to lipsum_words don't always start with
// "Lorem ipsum".
let idx = "Lorem ipsum".len();
assert_ne!(&lipsum_words(5)[..idx], &lipsum_words(5)[..idx]);
fn generate_title() {
for word in lipsum_title().split_whitespace() {
!word.starts_with(is_ascii_punctuation) && !word.ends_with(is_ascii_punctuation),
"Unexpected punctuation: {:?}",
if word.len() > TITLE_SMALL_WORD {
"Expected small word to be capitalized: {:?}",
fn empty_chain() {
let mut chain = MarkovChain::new();
assert_eq!(chain.generate(10), "");
fn generate_from() {
let mut chain = MarkovChain::new();
chain.learn("red orange yellow green blue indigo violet");
chain.generate_from(5, ("orange", "yellow")),
"Orange yellow green blue indigo."
fn generate_last_bigram() {
// The bigram "yyy zzz" will not be present in the Markov
// chain's map, and so we will not generate "xxx yyy zzz" as
// one would expect. The chain moves from state "xxx yyy" to
// "yyy zzz", but sees that as invalid state and resets itself
// back to "xxx yyy".
let mut chain = MarkovChain::new();
chain.learn("xxx yyy zzz");
assert_ne!(chain.generate_from(3, ("xxx", "yyy")), "xxx yyy zzz");
fn generate_from_no_panic() {
// No panic when asked to generate a chain from a starting
// point that doesn't exist in the chain.
let mut chain = MarkovChain::new();
chain.learn("foo bar baz");
chain.generate_from(3, ("xxx", "yyy"));
fn chain_map() {
let mut chain = MarkovChain::new();
chain.learn("foo bar baz quuz");
let map = &;
assert_eq!(map.len(), 2);
assert_eq!(map[&("foo", "bar")], vec!["baz"]);
assert_eq!(map[&("bar", "baz")], vec!["quuz"]);
fn new_with_rng() {
let rng = XorShiftRng::seed_from_u64(1234);
let mut chain = MarkovChain::new_with_rng(rng);
chain.learn("foo bar x y z");
chain.learn("foo bar a b c");
assert_eq!(chain.generate(15), "A b x y y b y bar a b y x y bar a.");