gzip_comment.pl 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218
  1. #!/usr/bin/perl
  2. # Author: Trizen
  3. # Date: 21 January 2024
  4. # https://github.com/trizen
  5. # Add and extract a GZIP comment, given a ".gz" file.
  6. # References:
  7. # Data Compression (Summer 2023) - Lecture 11 - DEFLATE (gzip)
  8. # https://youtube.com/watch?v=SJPvNi4HrWQ
  9. #
  10. # GZIP file format specification version 4.3
  11. # https://datatracker.ietf.org/doc/html/rfc1952
  12. use 5.036;
  13. use Getopt::Std qw(getopts);
  14. use MIME::Base64 qw(encode_base64 decode_base64);
  15. use constant {
  16. CHUNK_SIZE => 0xffff, # 2^16 - 1
  17. };
  18. getopts('ebho:', \my %opts);
  19. sub usage ($exit_code = 0) {
  20. print <<"EOT";
  21. usage: $0 [options] [input.gz] [comment.txt]"
  22. options:
  23. -o : output file
  24. -e : extract comment
  25. -b : base64 encoding / decoding of the comment
  26. -h : print this message and exit
  27. example:
  28. # Add comment to "input.gz" from "file.txt" (base64-encoded)
  29. perl $0 -o output.gz -b input.gz file.txt
  30. # Extract comment from "input.gz" (base64-decoded)
  31. perl $0 -o comment.txt -eb input.gz
  32. EOT
  33. exit $exit_code;
  34. }
  35. sub read_null_terminated ($in_fh) {
  36. my $string = '';
  37. while (1) {
  38. my $c = getc($in_fh) // die "Invalid gzip data";
  39. last if $c eq "\0";
  40. $string .= $c;
  41. }
  42. return $string;
  43. }
  44. sub extract_comment ($input_gz, $output_file) {
  45. open my $in_fh, '<:raw', $input_gz
  46. or die "Can't open file <<$input_gz>> for reading: $!";
  47. my $MAGIC = (getc($in_fh) // die "error") . (getc($in_fh) // die "error");
  48. if ($MAGIC ne pack('C*', 0x1f, 0x8b)) {
  49. die "Not a Gzip file: $input_gz\n";
  50. }
  51. my $CM = getc($in_fh) // die "error"; # 0x08 = DEFLATE
  52. my $FLAGS = getc($in_fh) // die "error"; # flags
  53. my $MTIME = join('', map { getc($in_fh) // die "error" } 1 .. 4); # modification time
  54. my $XFLAGS = getc($in_fh) // die "error"; # extra flags
  55. my $OS = getc($in_fh) // die "error"; # 0x03 = Unix
  56. my $has_filename = 0;
  57. if ((ord($FLAGS) & 0b0000_1000) != 0) {
  58. say STDERR "Has filename.";
  59. $has_filename = 1;
  60. }
  61. if ((ord($FLAGS) & 0b0001_0000) != 0) {
  62. say STDERR "Has comment.";
  63. }
  64. else {
  65. die "No comment was found.\n";
  66. }
  67. if ($has_filename) {
  68. read_null_terminated($in_fh); # filename
  69. }
  70. my $comment = read_null_terminated($in_fh);
  71. my $out_fh;
  72. if (defined($output_file)) {
  73. open $out_fh, '>:raw', $output_file
  74. or die "Can't open file <<$output_file>> for writing: $!";
  75. }
  76. else {
  77. $out_fh = \*STDOUT;
  78. }
  79. if ($opts{b}) {
  80. $comment = decode_base64($comment);
  81. }
  82. print $out_fh $comment;
  83. }
  84. sub add_comment ($input_gz, $comment_file, $output_gz) {
  85. if (!defined($output_gz)) {
  86. if ($input_gz =~ /\.tar\.gz\z/) {
  87. $output_gz = "output.tar.gz";
  88. }
  89. elsif ($input_gz =~ /\.tgz\z/) {
  90. $output_gz = "output.tgz";
  91. }
  92. else {
  93. $output_gz = "output.gz";
  94. }
  95. }
  96. if (-e $output_gz) {
  97. die "Output file <<$output_gz>> already exists!\n";
  98. }
  99. open my $in_fh, '<:raw', $input_gz
  100. or die "Can't open file <<$input_gz>> for reading: $!";
  101. open my $comment_fh, '<:raw', $comment_file
  102. or die "Can't open file <<$comment_file>> for reading: $!";
  103. my $MAGIC = (getc($in_fh) // die "error") . (getc($in_fh) // die "error");
  104. if ($MAGIC ne pack('C*', 0x1f, 0x8b)) {
  105. die "Not a Gzip file: $input_gz\n";
  106. }
  107. my $CM = getc($in_fh) // die "error"; # 0x08 = DEFLATE
  108. my $FLAGS = getc($in_fh) // die "error"; # flags
  109. my $MTIME = join('', map { getc($in_fh) // die "error" } 1 .. 4); # modification time
  110. my $XFLAGS = getc($in_fh) // die "error"; # extra flags
  111. my $OS = getc($in_fh) // die "error"; # 0x03 = Unix
  112. open my $out_fh, '>:raw', $output_gz
  113. or die "Can't open file <<$output_gz>> for writing: $!";
  114. print $out_fh $MAGIC, $CM, chr(ord($FLAGS) | 0b0001_0000), $MTIME, $XFLAGS, $OS;
  115. my $has_filename = 0;
  116. my $has_comment = 0;
  117. if ((ord($FLAGS) & 0b0000_1000) != 0) {
  118. say STDERR "Has filename.";
  119. $has_filename = 1;
  120. }
  121. else {
  122. say STDERR "Has no filename.";
  123. }
  124. if ((ord($FLAGS) & 0b0001_0000) != 0) {
  125. say STDERR "Has comment.";
  126. $has_comment = 1;
  127. }
  128. else {
  129. say STDERR "Has no existing comment.";
  130. }
  131. if ($has_filename) {
  132. my $filename = read_null_terminated($in_fh); # filename
  133. print $out_fh $filename . "\0";
  134. }
  135. if ($has_comment) {
  136. say STDERR "Replacing existing comment.";
  137. read_null_terminated($in_fh); # remove existing comment
  138. }
  139. else {
  140. say STDERR "Adding comment from file.";
  141. }
  142. my $comment = do {
  143. local $/;
  144. <$comment_fh>;
  145. };
  146. if ($opts{b}) {
  147. $comment = encode_base64($comment);
  148. }
  149. print $out_fh $comment;
  150. print $out_fh "\0";
  151. # Copy the rest of the gzip file
  152. while (read($in_fh, (my $chunk), CHUNK_SIZE)) {
  153. print $out_fh $chunk;
  154. }
  155. return 1;
  156. }
  157. if ($opts{h}) {
  158. usage(0);
  159. }
  160. my $input_gz = shift(@ARGV) // usage(2);
  161. if ($opts{e}) {
  162. extract_comment($input_gz, $opts{o});
  163. }
  164. else {
  165. my $comment_file = shift(@ARGV) // usage(2);
  166. add_comment($input_gz, $comment_file, $opts{o});
  167. }