gzip_store.pl 1.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768
  1. #!/usr/bin/perl
  2. # Author: Trizen
  3. # Date: 13 January 2024
  4. # https://github.com/trizen
  5. # Create a valid Gzip container, with uncompressed data.
  6. # Reference:
  7. # Data Compression (Summer 2023) - Lecture 11 - DEFLATE (gzip)
  8. # https://youtube.com/watch?v=SJPvNi4HrWQ
  9. use 5.036;
  10. use Digest::CRC qw();
  11. use File::Basename qw(basename);
  12. use constant {
  13. CHUNK_SIZE => 0xffff, # 2^16 - 1
  14. };
  15. my $MAGIC = pack('C*', 0x1f, 0x8b); # magic MIME type
  16. my $CM = chr(0x08); # 0x08 = DEFLATE
  17. my $FLAGS = chr(0x00); # flags
  18. my $MTIME = pack('C*', (0x00) x 4); # modification time
  19. my $XFLAGS = chr(0x00); # extra flags
  20. my $OS = chr(0x03); # 0x03 = Unix
  21. my $input = $ARGV[0] // die "usage: $0 [input] [output.gz]\n";
  22. my $output = $ARGV[1] // (basename($input) . '.gz');
  23. sub int2bits ($value, $size = 32) {
  24. scalar reverse sprintf("%0*b", $size, $value);
  25. }
  26. open my $in_fh, '<:raw', $input
  27. or die "Can't open file <<$input>> for reading: $!";
  28. open my $out_fh, '>:raw', $output
  29. or die "Can't open file <<$output>> for writing: $!";
  30. print $out_fh $MAGIC, $CM, $FLAGS, $MTIME, $XFLAGS, $OS;
  31. my $total_length = 0;
  32. my $block_type = '00'; # 00 = store; 10 = LZSS + Fixed codes; 01 = LZSS + Dynamic codes
  33. my $crc32 = Digest::CRC->new(type => "crc32");
  34. while (read($in_fh, (my $chunk), CHUNK_SIZE)) {
  35. my $chunk_len = length($chunk);
  36. my $len = int2bits($chunk_len, 16);
  37. my $nlen = int2bits((~$chunk_len) & 0xffff, 16);
  38. my $is_last = eof($in_fh) ? '1' : '0';
  39. my $block_header = pack('b*', $is_last . $block_type . ('0' x 5) . $len . $nlen);
  40. print $out_fh $block_header;
  41. print $out_fh $chunk;
  42. $crc32->add($chunk);
  43. $total_length += $chunk_len;
  44. }
  45. print $out_fh pack('b*', int2bits($crc32->digest, 32));
  46. print $out_fh pack('b*', int2bits($total_length, 32));
  47. close $in_fh;
  48. close $out_fh;