gzip_block_type_1_huffman_only.pl 2.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091
  1. #!/usr/bin/perl
  2. # Author: Trizen
  3. # Date: 13 January 2024
  4. # Edit: 05 April 2024
  5. # https://github.com/trizen
  6. # Create a valid Gzip container, using DEFLATE's Block Type 1 with fixed-length prefix codes only, without LZSS.
  7. # Reference:
  8. # Data Compression (Summer 2023) - Lecture 11 - DEFLATE (gzip)
  9. # https://youtube.com/watch?v=SJPvNi4HrWQ
  10. use 5.036;
  11. use Digest::CRC qw();
  12. use File::Basename qw(basename);
  13. use Compression::Util qw(:all);
  14. use constant {
  15. CHUNK_SIZE => 0xffff, # 2^16 - 1
  16. };
  17. my $MAGIC = pack('C*', 0x1f, 0x8b); # magic MIME type
  18. my $CM = chr(0x08); # 0x08 = DEFLATE
  19. my $FLAGS = chr(0x00); # flags
  20. my $MTIME = pack('C*', (0x00) x 4); # modification time
  21. my $XFLAGS = chr(0x00); # extra flags
  22. my $OS = chr(0x03); # 0x03 = Unix
  23. my $input = $ARGV[0] // die "usage: $0 [input] [output.gz]\n";
  24. my $output = $ARGV[1] // (basename($input) . '.gz');
  25. open my $in_fh, '<:raw', $input
  26. or die "Can't open file <<$input>> for reading: $!";
  27. open my $out_fh, '>:raw', $output
  28. or die "Can't open file <<$output>> for writing: $!";
  29. print $out_fh $MAGIC, $CM, $FLAGS, $MTIME, $XFLAGS, $OS;
  30. my $total_length = 0;
  31. my $crc32 = Digest::CRC->new(type => "crc32");
  32. my $bitstring = '';
  33. my $block_type = '10'; # 00 = store; 10 = LZSS + Fixed codes; 01 = LZSS + Dynamic codes
  34. my @code_lengths = (0) x 288;
  35. foreach my $i (0 .. 143) {
  36. $code_lengths[$i] = 8;
  37. }
  38. foreach my $i (144 .. 255) {
  39. $code_lengths[$i] = 9;
  40. }
  41. foreach my $i (256 .. 279) {
  42. $code_lengths[$i] = 7;
  43. }
  44. foreach my $i (280 .. 287) {
  45. $code_lengths[$i] = 8;
  46. }
  47. my ($dict) = huffman_from_code_lengths(\@code_lengths);
  48. if (eof($in_fh)) { # empty file
  49. $bitstring = '1' . '10' . $dict->{256};
  50. }
  51. while (read($in_fh, (my $chunk), CHUNK_SIZE)) {
  52. my $chunk_len = length($chunk);
  53. my $is_last = eof($in_fh) ? '1' : '0';
  54. my $block_header = join('', $is_last, $block_type);
  55. $bitstring .= $block_header;
  56. $bitstring .= huffman_encode([unpack('C*', $chunk), 256], $dict);
  57. print $out_fh pack('b*', substr($bitstring, 0, length($bitstring) - (length($bitstring) % 8), ''));
  58. $crc32->add($chunk);
  59. $total_length += $chunk_len;
  60. }
  61. if ($bitstring ne '') {
  62. print $out_fh pack('b*', $bitstring);
  63. }
  64. print $out_fh pack('b*', int2bits_lsb($crc32->digest, 32));
  65. print $out_fh pack('b*', int2bits_lsb($total_length, 32));
  66. close $in_fh;
  67. close $out_fh;