delta_rle_elias_encoding.sf 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124
  1. #!/usr/bin/ruby
  2. # Author: Trizen
  3. # Date: 09 September 2023
  4. # https://github.com/trizen
  5. # Implementation of Delta + Run-length + Elias coding, for encoding arbitrary integers.
  6. # References:
  7. # Data Compression (Summer 2023) - Lecture 5 - Basic Techniques
  8. # https://youtube.com/watch?v=TdFWb8mL5Gk
  9. #
  10. # Data Compression (Summer 2023) - Lecture 6 - Delta Compression and Prediction
  11. # https://youtube.com/watch?v=-3H_eDbWNEU
  12. func read_bit (FileHandle fh, Ref bitstring) {
  13. if ((*bitstring \\ '').is_empty) {
  14. *bitstring = unpack('b*', fh.getc \\ die "error")
  15. }
  16. var bit = (*bitstring).substr(-1)
  17. *bitstring = (*bitstring).substr(0, -1)
  18. return bit
  19. }
  20. func DRE_encode (Array integers, Bool double = false) {
  21. var deltas = [0, integers.len, integers...].diffs.run_length
  22. var bitstring = FileHandle.new_buf(:raw)
  23. for c,v in (deltas) {
  24. if (c == 0) {
  25. bitstring << '0'
  26. }
  27. elsif (double) {
  28. var t = c.abs.inc.as_bin
  29. var l = t.len.as_bin
  30. bitstring << join('', '1', ((c < 0) ? '0' : '1'), ('1' * (l.len-1)), '0', l.substr(1), t.substr(1))
  31. }
  32. else {
  33. var t = c.abs.as_bin
  34. bitstring << join('', '1', ((c < 0) ? '0' : '1'), ('1' * (t.len-1)), '0', t.substr(1))
  35. }
  36. if (v == 1) {
  37. bitstring << '0'
  38. }
  39. else {
  40. var t = v.as_bin
  41. bitstring << join('', ('1' * (t.len-1)), '0', t.substr(1))
  42. }
  43. }
  44. pack('B*', bitstring.parent)
  45. }
  46. func DRE_decode (FileHandle fh, Bool double = false) {
  47. var deltas = []
  48. var buffer = ''
  49. var len = 0
  50. for (var k = 0 ; k <= len ; ++k) {
  51. var bit = read_bit(fh, \buffer)
  52. if (bit == '0') {
  53. deltas << 0
  54. }
  55. elsif (double) {
  56. var bit = read_bit(fh, \buffer)
  57. var bl = (^Inf -> first { read_bit(fh, \buffer) != '1' })
  58. var bl2 = Num('1' + bl.of { read_bit(fh, \buffer) }.join, 2)
  59. var int = Num('1' + (bl2-1).of { read_bit(fh, \buffer) }.join, 2)-1
  60. deltas << (bit == '1' ? int : -int)
  61. }
  62. else {
  63. var bit = read_bit(fh, \buffer)
  64. var n = (^Inf -> first { read_bit(fh, \buffer) != '1' })
  65. var d = Num('1' + n.of { read_bit(fh, \buffer) }.join, 2)
  66. deltas << (bit == '1' ? d : -d)
  67. }
  68. var bl = (^Inf -> first { read_bit(fh, \buffer) != '1' })
  69. if (bl > 0) {
  70. var run = Num('1' + bl.of { read_bit(fh, \buffer) }.join, 2)-1
  71. k += run
  72. deltas << run.of(deltas[-1])...
  73. }
  74. if (k == 0) {
  75. len = deltas.pop
  76. }
  77. }
  78. var acc = [len, deltas...].acc
  79. acc.shift
  80. return acc
  81. }
  82. var str = %n[6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 1 1 3 3 1 2 3 0 0 1 2 4 2 1 0 1 2 1 1 0 0 1].pack('C*')
  83. var enc = DRE_encode(str.bytes, false)
  84. var dec = DRE_decode(enc.open_r(:raw), false)
  85. say "Encoded: #{unpack('B*', enc)}"
  86. say "Decoded: #{dec}"
  87. assert_eq(dec.pack('C*'), str)
  88. do {
  89. var str = File(__FILE__).read(:raw)
  90. var encoded = DRE_encode(str.bytes, true)
  91. var decoded = DRE_decode(encoded.open_r(:raw), true)
  92. assert_eq(str, decoded.pack('C*'))
  93. }
  94. __END__
  95. Encoded: 111111110011001010111111001001101011010001111101111111101010101011000011100000101000110100101010001101001110001010001001001101001000001000001100
  96. Decoded: [6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 1, 1, 3, 3, 1, 2, 3, 0, 0, 1, 2, 4, 2, 1, 0, 1, 2, 1, 1, 0, 0, 1]