delta_rle_elias_encoding.pl 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185
  1. #!/usr/bin/perl
  2. # Implementation of Delta + Run-length + Elias coding, for encoding arbitrary integers.
  3. # References:
  4. # Data Compression (Summer 2023) - Lecture 5 - Basic Techniques
  5. # https://youtube.com/watch?v=TdFWb8mL5Gk
  6. #
  7. # Data Compression (Summer 2023) - Lecture 6 - Delta Compression and Prediction
  8. # https://youtube.com/watch?v=-3H_eDbWNEU
  9. use 5.036;
  10. sub read_bit ($fh, $bitstring) {
  11. if (($$bitstring // '') eq '') {
  12. $$bitstring = unpack('b*', getc($fh) // return undef);
  13. }
  14. chop($$bitstring);
  15. }
  16. sub run_length ($arr) {
  17. @$arr || return [];
  18. my @result = [$arr->[0], 1];
  19. my $prev_value = $arr->[0];
  20. foreach my $i (1 .. $#{$arr}) {
  21. my $curr_value = $arr->[$i];
  22. if ($curr_value eq $prev_value) {
  23. ++$result[-1][1];
  24. }
  25. else {
  26. push(@result, [$curr_value, 1]);
  27. }
  28. $prev_value = $curr_value;
  29. }
  30. return \@result;
  31. }
  32. sub DRE_encoding ($integers, $double = 0) {
  33. my @deltas;
  34. my $prev = 0;
  35. unshift(@$integers, scalar(@$integers));
  36. while (@$integers) {
  37. my $curr = shift(@$integers);
  38. push @deltas, $curr - $prev;
  39. $prev = $curr;
  40. }
  41. my $bitstring = '';
  42. my $rle = run_length(\@deltas);
  43. foreach my $cv (@$rle) {
  44. my ($c, $v) = @$cv;
  45. if ($c == 0) {
  46. $bitstring .= '0';
  47. }
  48. elsif ($double) {
  49. my $t = sprintf('%b', abs($c) + 1);
  50. my $l = sprintf('%b', length($t));
  51. $bitstring .= '1' . (($c < 0) ? '0' : '1') . ('1' x (length($l) - 1)) . '0' . substr($l, 1) . substr($t, 1);
  52. }
  53. else {
  54. my $t = sprintf('%b', abs($c));
  55. $bitstring .= '1' . (($c < 0) ? '0' : '1') . ('1' x (length($t) - 1)) . '0' . substr($t, 1);
  56. }
  57. if ($v == 1) {
  58. $bitstring .= '0';
  59. }
  60. else {
  61. my $t = sprintf('%b', $v);
  62. $bitstring .= join('', '1' x (length($t) - 1), '0', substr($t, 1));
  63. }
  64. }
  65. pack('B*', $bitstring);
  66. }
  67. sub DRE_decoding ($bitstring, $double = 0) {
  68. open my $fh, '<:raw', \$bitstring;
  69. my @deltas;
  70. my $buffer = '';
  71. my $len = 0;
  72. for (my $k = 0 ; $k <= $len ; ++$k) {
  73. my $bit = read_bit($fh, \$buffer) // last;
  74. if ($bit eq '0') {
  75. push @deltas, 0;
  76. }
  77. elsif ($double) {
  78. my $bit = read_bit($fh, \$buffer);
  79. my $bl = 0;
  80. ++$bl while (read_bit($fh, \$buffer) eq '1');
  81. my $bl2 = oct('0b1' . join('', map { read_bit($fh, \$buffer) } 1 .. $bl));
  82. my $int = oct('0b1' . join('', map { read_bit($fh, \$buffer) } 1 .. ($bl2 - 1)));
  83. push @deltas, ($bit eq '1' ? 1 : -1) * ($int - 1);
  84. }
  85. else {
  86. my $bit = read_bit($fh, \$buffer);
  87. my $n = 0;
  88. ++$n while (read_bit($fh, \$buffer) eq '1');
  89. my $d = oct('0b1' . join('', map { read_bit($fh, \$buffer) } 1 .. $n));
  90. push @deltas, ($bit eq '1' ? $d : -$d);
  91. }
  92. my $bl = 0;
  93. while (read_bit($fh, \$buffer) == 1) {
  94. ++$bl;
  95. }
  96. if ($bl > 0) {
  97. my $run = oct('0b1' . join('', map { read_bit($fh, \$buffer) } 1 .. $bl)) - 1;
  98. $k += $run;
  99. push @deltas, ($deltas[-1]) x $run;
  100. }
  101. if ($k == 0) {
  102. $len = pop(@deltas);
  103. }
  104. }
  105. my @acc;
  106. my $prev = $len;
  107. foreach my $d (@deltas) {
  108. $prev += $d;
  109. push @acc, $prev;
  110. }
  111. return \@acc;
  112. }
  113. my $str = join('', 'a' x 13, 'b' x 14, 'c' x 10, 'd' x 3, 'e' x 1, 'f' x 1, 'g' x 4);
  114. my @bytes = unpack('C*', $str);
  115. my $enc = DRE_encoding(\@bytes);
  116. my $dec = pack('C*', @{DRE_decoding($enc)});
  117. say unpack('B*', $enc);
  118. say $dec;
  119. $dec eq $str or die "error: $dec != $str";
  120. do {
  121. my @integers = map { int(rand($_)) } 1 .. 1000;
  122. my $str = DRE_encoding([@integers], 1);
  123. say "Encoded length: ", length($str);
  124. say "Rawdata length: ", length(join(' ', @integers));
  125. my $decoded = DRE_decoding($str, 1);
  126. join(' ', @integers) eq join(' ', @$decoded) or die "Decoding error";
  127. {
  128. open my $fh, '<:raw', __FILE__;
  129. my $str = do { local $/; <$fh> };
  130. my $encoded = DRE_encoding([unpack('C*', $str)], 1);
  131. my $decoded = DRE_decoding($encoded, 1);
  132. $str eq pack('C*', @$decoded) or die "error";
  133. }
  134. }
  135. __END__
  136. Encoded length: 1879
  137. Rawdata length: 3628