123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103 |
- #!/usr/bin/perl
- # A very simple text generator, using Markov chains.
- # This version uses prefixes of variable lengths, between `n_min` and `n_max`.
- # See also:
- # https://en.wikipedia.org/wiki/Markov_chain
- # https://rosettacode.org/wiki/Markov_chain_text_generator
- use 5.014;
- use strict;
- use warnings;
- use Encode qw(decode_utf8);
- use Text::Unidecode qw(unidecode);
- use List::Util qw(uniq);
- my $n_min = 2;
- my $n_max = 4;
- my $max = 200 - $n_max;
- sub build_dict {
- my (@orig_words) = @_;
- my %dict;
- foreach my $n ($n_min .. $n_max) {
- my @words = (@orig_words, @orig_words[0 .. $n - 1]);
- for my $i (0 .. $#words - $n) {
- my @prefix = @words[$i .. $i + $n - 1];
- push @{$dict{join ' ', @prefix}}, $words[$i + $n];
- }
- }
- foreach my $key(keys %dict) {
- $dict{$key} = [uniq(@{$dict{$key}})];
- }
- return %dict;
- }
- my $text = do {
- if (-t STDIN) {
- my $content = '';
- foreach my $file (@ARGV) {
- open my $fh, '<', $file;
- local $/;
- $content .= <$fh>;
- $content .= "\n";
- }
- $content;
- }
- else {
- local $/;
- <>;
- }
- };
- $text = decode_utf8($text);
- $text = unidecode($text);
- $text = lc($text);
- $text =~ s/[^\w'-]+/ /g;
- my @words = grep { /^[a-z]/ } split ' ', $text;
- my %dict = build_dict(@words);
- my $idx = int(rand(@words - $n_max));
- my @rotor = @words[$idx .. $idx + $n_min - 1];
- my @chain = @rotor;
- sub pick_next {
- my (@prefix) = @_;
- my $key = join(' ', @prefix);
- my @arr = @{$dict{$key}};
- $arr[rand @arr];
- }
- for (1 .. $max) {
- my $new = pick_next(@rotor);
- my $idx = int(rand($n_max - $n_min + 1) + $n_min - 1);
- if ($idx > $#rotor) {
- #shift(@rotor) if rand(1) < 0.5;
- }
- else {
- @rotor = @rotor[$#rotor - $idx + 1 .. $#rotor];
- }
- push @rotor, $new;
- push @chain, $new;
- }
- while (@chain) {
- say join(' ', splice(@chain, 0, 8));
- }
|