huffman_coding.pl 1.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273
  1. #!/usr/bin/perl
  2. # https://rosettacode.org/wiki/Huffman_coding#Perl
  3. use 5.020;
  4. use strict;
  5. use warnings;
  6. use experimental qw(signatures);
  7. # produce encode and decode dictionary from a tree
  8. sub walk ($node, $code, $h, $rev_h) {
  9. my $c = $node->[0];
  10. if (ref $c) { walk($c->[$_], $code . $_, $h, $rev_h) for (0, 1) }
  11. else { $h->{$c} = $code; $rev_h->{$code} = $c }
  12. return ($h, $rev_h);
  13. }
  14. # make a tree, and return resulting dictionaries
  15. sub mktree ($bytes) {
  16. my (%freq, @nodes);
  17. ++$freq{$_} for @$bytes;
  18. @nodes = map { [$_, $freq{$_}] } sort { $a <=> $b } keys %freq;
  19. do { # poor man's priority queue
  20. @nodes = sort { $a->[1] <=> $b->[1] } @nodes;
  21. my ($x, $y) = splice(@nodes, 0, 2);
  22. if (defined($x)) {
  23. if (defined($y)) {
  24. push @nodes, [[$x, $y], $x->[1] + $y->[1]];
  25. }
  26. else {
  27. push @nodes, [[$x], $x->[1]];
  28. }
  29. }
  30. } while (@nodes > 1);
  31. walk($nodes[0], '', {}, {});
  32. }
  33. sub encode ($bytes, $dict) {
  34. join('', map { $dict->{$_} // die("bad char $_") } @$bytes);
  35. }
  36. sub decode ($str, $dict) {
  37. my ($seg, @out) = ("");
  38. # append to current segment until it's in the dictionary
  39. foreach my $bit (split('', $str)) {
  40. $seg .= $bit;
  41. my $x = $dict->{$seg} // next;
  42. push @out, $x;
  43. $seg = '';
  44. }
  45. die "bad code" if length($seg);
  46. return \@out;
  47. }
  48. my $txt = 'this is an example for huffman encoding';
  49. my @bytes = unpack('C*', $txt);
  50. my ($h, $rev_h) = mktree(\@bytes);
  51. for (keys %$h) { printf("%3d: %s\n", $_, $h->{$_}) }
  52. my $enc = encode(\@bytes, $h);
  53. say $enc;
  54. my $dec = decode($enc, $rev_h);
  55. say pack('C*', @$dec);