package Text::Travesty; # Chapter 17/Appendix A: The Text::Travesty module implements "travesty", # a Markov chain algorithm that analyzes a text document and generates a # new document that preserves all the word-pair frequencies of the original. use strict; use Text::Wrap qw(fill); use IO::File; sub new { my $pack = shift; return bless { words => [], lookup => {}, num => {}, a => '', p=> '', n=>'', },$pack; } sub add { my $self = shift; my $string = shift; my ($words,$lookup,$num,$a,$p,$n) = @{$self}{qw(words lookup num a p n)}; for my $w (split /\s+/,$string) { ($a,$p) = ($p,$n); unless (defined($n = $num->{$w})) { push @{$words},$w; $n = pack 'S',$#$words; $num->{$w} = $n; } $lookup->{"$a$p"} .= $n; } @{$self}{'a','p','n'} = ($a,$p,$n); } sub analyze_file { my $self = shift; my $file = shift; unless (defined (fileno $file)) { $file = IO::File->new($file) || croak("Couldn't open $file: $!\n"); } $self->add($_) while defined ($_ = <$file>); } sub generate { my $self = shift; my $word_count = shift || 1000; my ($words,$lookup,$a,$p) = @{$self}{qw(words lookup a p)}; my ($n,$foo,$result); while ($word_count--) { $n = $lookup->{"$a$p"}; ($foo,$n) = each(%$lookup) if $n eq ''; $n = substr($n,int(rand(length($n))) & 0177776,2); ($a,$p) = ($p,$n); my $w = unpack('S',$n); $w = $words->[$w]; $result .= $w; $result .= $w =~ /\.$/ && rand() < .1 ? "\n\n" : ' '; } @{$self}{qw(a p)} = ($a,$p); return $result; } sub words { return @{shift->{words}}; } sub pretty_text { my $self = shift; my $text = $self->generate(@_); return fill("\t",'',$text) . "\n"; } sub reset { my $self= shift; @{$self}{qw(lookup num)} = ({},{}); $self->{words} = []; delete $self->{a}; delete $self->{p}; } 1; =head1 NAME Text::Travesty - Turn text into a travesty =head1 SYNOPSIS use Text::Travesty my $travesty = Text::Travesty->new; $travesty->analyze_file('for_whom_the_bell_tolls.txt'); print $travesty->generate(1000); =head1 DESCRIPTION This module uses the travesty algorithm to construct a Markov Chain of human-readable text and spew out stylistically similar (but entirely meaningless) text that has the same word frequency characteristics. =head1 CONSTRUCTOR =over 4 =item $travesty = Text::Travesty->new The new() method constructs a new Text::Travesty object with empty frequency tables. You will typically call add() or analyze_file() one or more times to add text to the frequency tables. =back =head1 OBJECT METHODS =over 4 =item $travesty->add($text); This method splits the provided text into words and adds them to the internal frequency tables. You will typically call add() multiple times during the analysis of a longer text. The definition of "words" is a bit unusual, because it includes punctuation and other non-whitespace characters. The pseudo-punctuation makes the generated travesties more fun. =item $travesty->analyze_file($file) This method adds the entire contents of the indicated file to the frequency tables. C<$file> may be an opened filehandle, in which case analyze_file() reads its contents through to EOF, or a file path, in which case the method opens it for reading. =item $text = $travesty->generate([$count]) The generate() method will spew back a travesty of the input text based on a Markov model built from the word frequency tables. C<$count>, if provided, gives the length of the text to generate in words. If not provided, the count defaults to 1000. =item $text = $travesty->pretty_text([$count]) This method is similar to generate() except that the returned text is formatted into wrapped paragraphs. =item @words = $travesty->words This method returns a list of all the unique words in the frequency tables. Punctuation and capitalization count for uniqueness. =item $travesty->reset Reset the travesty object, clearing out its frequency tables and readying it to accept a new text to analyze. =back =head1 SEE ALSO L, L, L =head1 AUTHOR Lincoln Stein =head1 COPYRIGHT Copyright (c) 2000 Lincoln Stein. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut