trizen
/
perl-scripts


			
				
					
						
						
							12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273
							#!/usr/bin/perl

# https://rosettacode.org/wiki/Huffman_coding#Perl

use 5.020;
use strict;
use warnings;

use experimental qw(signatures);

# produce encode and decode dictionary from a tree
sub walk ($node, $code, $h, $rev_h) {

    my $c = $node->[0];
    if (ref $c) { walk($c->[$_], $code . $_, $h, $rev_h) for (0, 1) }
    else        { $h->{$c} = $code; $rev_h->{$code} = $c }

    return ($h, $rev_h);
}

# make a tree, and return resulting dictionaries
sub mktree ($bytes) {
    my (%freq, @nodes);

    ++$freq{$_} for @$bytes;
    @nodes = map { [$_, $freq{$_}] } sort { $a <=> $b } keys %freq;

    do {    # poor man's priority queue
        @nodes = sort { $a->[1] <=> $b->[1] } @nodes;
        my ($x, $y) = splice(@nodes, 0, 2);
        if (defined($x)) {
            if (defined($y)) {
                push @nodes, [[$x, $y], $x->[1] + $y->[1]];
            }
            else {
                push @nodes, [[$x], $x->[1]];
            }
        }
    } while (@nodes > 1);

    walk($nodes[0], '', {}, {});
}

sub encode ($bytes, $dict) {
    join('', map { $dict->{$_} // die("bad char $_") } @$bytes);
}

sub decode ($str, $dict) {
    my ($seg, @out) = ("");

    # append to current segment until it's in the dictionary
    foreach my $bit (split('', $str)) {
        $seg .= $bit;
        my $x = $dict->{$seg} // next;
        push @out, $x;
        $seg = '';
    }

    die "bad code" if length($seg);
    return \@out;
}

my $txt   = 'this is an example for huffman encoding';
my @bytes = unpack('C*', $txt);
my ($h, $rev_h) = mktree(\@bytes);
for (keys %$h) { printf("%3d: %s\n", $_, $h->{$_}) }

my $enc = encode(\@bytes, $h);
say $enc;

my $dec = decode($enc, $rev_h);
say pack('C*', @$dec);