delta_encoding_with_elias_coding.pl 2.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
  1. #!/usr/bin/perl
  2. # Author: Trizen
  3. # Date: 14 June 2023
  4. # https://github.com/trizen
  5. # Implementation of the Delta encoding scheme, combined with Elias gamma encoding, optimized for moderately large deltas.
  6. # Reference:
  7. # Data Compression (Summer 2023) - Lecture 6 - Delta Compression and Prediction
  8. # https://youtube.com/watch?v=-3H_eDbWNEU
  9. use 5.036;
  10. sub read_bit ($fh, $bitstring) {
  11. if (($$bitstring // '') eq '') {
  12. $$bitstring = unpack('b*', getc($fh) // return undef);
  13. }
  14. chop($$bitstring);
  15. }
  16. sub delta_encode ($integers) {
  17. my @deltas;
  18. my $prev = 0;
  19. unshift(@$integers, scalar(@$integers));
  20. while (@$integers) {
  21. my $curr = shift(@$integers);
  22. push @deltas, $curr - $prev;
  23. $prev = $curr;
  24. }
  25. my $bitstring = '';
  26. foreach my $d (@deltas) {
  27. if ($d == 0) {
  28. $bitstring .= '0';
  29. }
  30. else {
  31. my $t = sprintf('%b', abs($d));
  32. $bitstring .= '1' . (($d < 0) ? '0' : '1') . ('1' x (length($t) - 1)) . '0' . substr($t, 1);
  33. }
  34. }
  35. pack('B*', $bitstring);
  36. }
  37. sub delta_decode ($str) {
  38. open my $fh, '<:raw', \$str;
  39. my @deltas;
  40. my $buffer = '';
  41. my $len = 0;
  42. for (my $k = 0 ; $k <= $len ; ++$k) {
  43. my $bit = read_bit($fh, \$buffer);
  44. if ($bit eq '0') {
  45. push @deltas, 0;
  46. }
  47. else {
  48. my $bit = read_bit($fh, \$buffer);
  49. my $n = 0;
  50. ++$n while (read_bit($fh, \$buffer) eq '1');
  51. my $d = oct('0b1' . join('', map { read_bit($fh, \$buffer) } 1 .. $n));
  52. push @deltas, ($bit eq '1' ? $d : -$d);
  53. }
  54. if ($k == 0) {
  55. $len = pop(@deltas);
  56. }
  57. }
  58. my @acc;
  59. my $prev = $len;
  60. foreach my $d (@deltas) {
  61. $prev += $d;
  62. push @acc, $prev;
  63. }
  64. return \@acc;
  65. }
  66. my @integers = map { int(rand($_)) } 1 .. 1000;
  67. my $str = delta_encode([@integers]);
  68. say "Encoded length: ", length($str);
  69. say "Rawdata length: ", length(join(' ', @integers));
  70. my $decoded = delta_decode($str);
  71. join(' ', @integers) eq join(' ', @$decoded) or die "Decoding error";
  72. {
  73. open my $fh, '<:raw', __FILE__;
  74. my $str = do { local $/; <$fh> };
  75. my $encoded = delta_encode([unpack('C*', $str)]);
  76. my $decoded = delta_decode($encoded);
  77. $str eq pack('C*', @$decoded) or die "error";
  78. }
  79. __END__
  80. Encoded length: 1882
  81. Rawdata length: 3626