123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185 |
- #!/usr/bin/perl
- # Implementation of Delta + Run-length + Elias coding, for encoding arbitrary integers.
- # References:
- # Data Compression (Summer 2023) - Lecture 5 - Basic Techniques
- # https://youtube.com/watch?v=TdFWb8mL5Gk
- #
- # Data Compression (Summer 2023) - Lecture 6 - Delta Compression and Prediction
- # https://youtube.com/watch?v=-3H_eDbWNEU
- use 5.036;
- sub read_bit ($fh, $bitstring) {
- if (($$bitstring // '') eq '') {
- $$bitstring = unpack('b*', getc($fh) // return undef);
- }
- chop($$bitstring);
- }
- sub run_length ($arr) {
- @$arr || return [];
- my @result = [$arr->[0], 1];
- my $prev_value = $arr->[0];
- foreach my $i (1 .. $#{$arr}) {
- my $curr_value = $arr->[$i];
- if ($curr_value eq $prev_value) {
- ++$result[-1][1];
- }
- else {
- push(@result, [$curr_value, 1]);
- }
- $prev_value = $curr_value;
- }
- return \@result;
- }
- sub DRE_encoding ($integers, $double = 0) {
- my @deltas;
- my $prev = 0;
- unshift(@$integers, scalar(@$integers));
- while (@$integers) {
- my $curr = shift(@$integers);
- push @deltas, $curr - $prev;
- $prev = $curr;
- }
- my $bitstring = '';
- my $rle = run_length(\@deltas);
- foreach my $cv (@$rle) {
- my ($c, $v) = @$cv;
- if ($c == 0) {
- $bitstring .= '0';
- }
- elsif ($double) {
- my $t = sprintf('%b', abs($c) + 1);
- my $l = sprintf('%b', length($t));
- $bitstring .= '1' . (($c < 0) ? '0' : '1') . ('1' x (length($l) - 1)) . '0' . substr($l, 1) . substr($t, 1);
- }
- else {
- my $t = sprintf('%b', abs($c));
- $bitstring .= '1' . (($c < 0) ? '0' : '1') . ('1' x (length($t) - 1)) . '0' . substr($t, 1);
- }
- if ($v == 1) {
- $bitstring .= '0';
- }
- else {
- my $t = sprintf('%b', $v);
- $bitstring .= join('', '1' x (length($t) - 1), '0', substr($t, 1));
- }
- }
- pack('B*', $bitstring);
- }
- sub DRE_decoding ($bitstring, $double = 0) {
- open my $fh, '<:raw', \$bitstring;
- my @deltas;
- my $buffer = '';
- my $len = 0;
- for (my $k = 0 ; $k <= $len ; ++$k) {
- my $bit = read_bit($fh, \$buffer) // last;
- if ($bit eq '0') {
- push @deltas, 0;
- }
- elsif ($double) {
- my $bit = read_bit($fh, \$buffer);
- my $bl = 0;
- ++$bl while (read_bit($fh, \$buffer) eq '1');
- my $bl2 = oct('0b1' . join('', map { read_bit($fh, \$buffer) } 1 .. $bl));
- my $int = oct('0b1' . join('', map { read_bit($fh, \$buffer) } 1 .. ($bl2 - 1)));
- push @deltas, ($bit eq '1' ? 1 : -1) * ($int - 1);
- }
- else {
- my $bit = read_bit($fh, \$buffer);
- my $n = 0;
- ++$n while (read_bit($fh, \$buffer) eq '1');
- my $d = oct('0b1' . join('', map { read_bit($fh, \$buffer) } 1 .. $n));
- push @deltas, ($bit eq '1' ? $d : -$d);
- }
- my $bl = 0;
- while (read_bit($fh, \$buffer) == 1) {
- ++$bl;
- }
- if ($bl > 0) {
- my $run = oct('0b1' . join('', map { read_bit($fh, \$buffer) } 1 .. $bl)) - 1;
- $k += $run;
- push @deltas, ($deltas[-1]) x $run;
- }
- if ($k == 0) {
- $len = pop(@deltas);
- }
- }
- my @acc;
- my $prev = $len;
- foreach my $d (@deltas) {
- $prev += $d;
- push @acc, $prev;
- }
- return \@acc;
- }
- my $str = join('', 'a' x 13, 'b' x 14, 'c' x 10, 'd' x 3, 'e' x 1, 'f' x 1, 'g' x 4);
- my @bytes = unpack('C*', $str);
- my $enc = DRE_encoding(\@bytes);
- my $dec = pack('C*', @{DRE_decoding($enc)});
- say unpack('B*', $enc);
- say $dec;
- $dec eq $str or die "error: $dec != $str";
- do {
- my @integers = map { int(rand($_)) } 1 .. 1000;
- my $str = DRE_encoding([@integers], 1);
- say "Encoded length: ", length($str);
- say "Rawdata length: ", length(join(' ', @integers));
- my $decoded = DRE_decoding($str, 1);
- join(' ', @integers) eq join(' ', @$decoded) or die "Decoding error";
- {
- open my $fh, '<:raw', __FILE__;
- my $str = do { local $/; <$fh> };
- my $encoded = DRE_encoding([unpack('C*', $str)], 1);
- my $decoded = DRE_decoding($encoded, 1);
- $str eq pack('C*', @$decoded) or die "error";
- }
- }
- __END__
- Encoded length: 1879
- Rawdata length: 3628
|