markov_chain_text_generator.pl 1.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103
  1. #!/usr/bin/perl
  2. # A very simple text generator, using Markov chains.
  3. # This version uses prefixes of variable lengths, between `n_min` and `n_max`.
  4. # See also:
  5. # https://en.wikipedia.org/wiki/Markov_chain
  6. # https://rosettacode.org/wiki/Markov_chain_text_generator
  7. use 5.014;
  8. use strict;
  9. use warnings;
  10. use Encode qw(decode_utf8);
  11. use Text::Unidecode qw(unidecode);
  12. use List::Util qw(uniq);
  13. my $n_min = 2;
  14. my $n_max = 4;
  15. my $max = 200 - $n_max;
  16. sub build_dict {
  17. my (@orig_words) = @_;
  18. my %dict;
  19. foreach my $n ($n_min .. $n_max) {
  20. my @words = (@orig_words, @orig_words[0 .. $n - 1]);
  21. for my $i (0 .. $#words - $n) {
  22. my @prefix = @words[$i .. $i + $n - 1];
  23. push @{$dict{join ' ', @prefix}}, $words[$i + $n];
  24. }
  25. }
  26. foreach my $key(keys %dict) {
  27. $dict{$key} = [uniq(@{$dict{$key}})];
  28. }
  29. return %dict;
  30. }
  31. my $text = do {
  32. if (-t STDIN) {
  33. my $content = '';
  34. foreach my $file (@ARGV) {
  35. open my $fh, '<', $file;
  36. local $/;
  37. $content .= <$fh>;
  38. $content .= "\n";
  39. }
  40. $content;
  41. }
  42. else {
  43. local $/;
  44. <>;
  45. }
  46. };
  47. $text = decode_utf8($text);
  48. $text = unidecode($text);
  49. $text = lc($text);
  50. $text =~ s/[^\w'-]+/ /g;
  51. my @words = grep { /^[a-z]/ } split ' ', $text;
  52. my %dict = build_dict(@words);
  53. my $idx = int(rand(@words - $n_max));
  54. my @rotor = @words[$idx .. $idx + $n_min - 1];
  55. my @chain = @rotor;
  56. sub pick_next {
  57. my (@prefix) = @_;
  58. my $key = join(' ', @prefix);
  59. my @arr = @{$dict{$key}};
  60. $arr[rand @arr];
  61. }
  62. for (1 .. $max) {
  63. my $new = pick_next(@rotor);
  64. my $idx = int(rand($n_max - $n_min + 1) + $n_min - 1);
  65. if ($idx > $#rotor) {
  66. #shift(@rotor) if rand(1) < 0.5;
  67. }
  68. else {
  69. @rotor = @rotor[$#rotor - $idx + 1 .. $#rotor];
  70. }
  71. push @rotor, $new;
  72. push @chain, $new;
  73. }
  74. while (@chain) {
  75. say join(' ', splice(@chain, 0, 8));
  76. }