upgrade-files.pl 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221
  1. #! /usr/bin/perl -w
  2. use CGI qw/:standard/;
  3. use CGI::Carp qw(fatalsToBrowser);
  4. if (param('separator') eq 'UseMod 0.92' or param('separator') eq 'UseMod 1.00') {
  5. $FS = "\xb3";
  6. } elsif (param('separator') eq 'UseMod 1.00 with $NewFS set') {
  7. $FS = "\x1e\xff\xfe\x1e";
  8. } else {
  9. $FS = "\x1e";
  10. }
  11. $NewFS = "\x1e";
  12. # override $FS if you want!
  13. print header() . start_html('Upgrading Files'), p;
  14. print q{Upgrade version: $Id: upgrade-files.pl,v 1.16 2010/11/06 11:51:18 as Exp $}, "\n";
  15. if (not param('dir')) {
  16. print start_form, p, '$DataDir: ', textfield('dir', '/tmp/oddmuse'),
  17. p, radio_group('separator', ['Oddmuse', 'UseMod 0.92', 'UseMod 1.00',
  18. 'UseMod 1.00 with $NewFS set']),
  19. p, checkbox('convert', 'checked', 'on', 'Convert Latin-1 to UTF-8'),
  20. p, submit('Ok'), "\n", end_form;
  21. } elsif (param('dir') and not param('sure')) {
  22. print start_form, hidden('sure', 'yes'), hidden('dir', param('dir')),
  23. hidden('separator', param('separator')), hidden('convert', param('convert')),
  24. p, '$DataDir: ', param('dir'),
  25. p, 'separator used when reading pages: ',
  26. join(', ', map { sprintf('0x%x', ord($_)) } split (//, $FS)),
  27. p, 'separator used when writing pages: ',
  28. join(', ', map { sprintf('0x%x', ord($_)) } split (//, $NewFS)),
  29. p, 'Convert Latin-1 to UTF-8: ', param('convert') ? 'Yes' : 'No',
  30. p, submit('Confirm'), "\n", end_form;
  31. } else {
  32. rewrite(scalar(param('dir')));
  33. }
  34. print end_html();
  35. sub rewrite {
  36. my ($directory) = @_;
  37. $FS1 = $FS . "1";
  38. $FS2 = $FS . "2";
  39. $FS3 = $FS . "3";
  40. my @files = glob("$directory/page/*/*.db");
  41. if (not @files) {
  42. print "$directory does not seem to be a data directory.\n";
  43. return;
  44. }
  45. print '<pre>';
  46. foreach my $file (@files) {
  47. print "Reading page $file...\n";
  48. my %page = split(/$FS1/, read_file($file), -1);
  49. %section = split(/$FS2/, $page{text_default}, -1);
  50. %text = split(/$FS3/, $section{data}, -1);
  51. $file =~ s!/([A-Z]|other)/!/!;
  52. $file =~ s/\.db$/.pg/ or die "Invalid page name\n";
  53. print "Writing $file...\n";
  54. write_page_file($file);
  55. }
  56. print '</pre>';
  57. @files = glob("$directory/referer/*/*.rb");
  58. print '<pre>';
  59. foreach my $file (@files) {
  60. print "Reading refer $file...\n";
  61. my $data = read_file($file);
  62. $data =~ s/$FS1/$NewFS/g;
  63. $file =~ s!/([A-Z]|other)/!/!;
  64. $file =~ s/\.rb$/.rf/ or die "Invalid page name\n";
  65. print "Writing $file...\n";
  66. write_file($file, $data);
  67. }
  68. print '</pre>';
  69. @files = glob("$directory/keep/*/*.kp");
  70. foreach my $file (@files) {
  71. print '<pre>';
  72. print "Reading keep $file...\n";
  73. my $data = read_file($file);
  74. my @list = split(/$FS1/, $data);
  75. my $out = $file;
  76. $out =~ s!/([A-Z]|other)/!/!;
  77. $out =~ s/\.kp$// or die "Invalid keep name\n";
  78. # We introduce a new variable $dir, here, instead of using $out,
  79. # because $out will be part of the filename later on, and the
  80. # filename will be converted in write_file. To convert $out to
  81. # utf8 would double-encode the directory part of the filename.
  82. my $dir = param('convert') ? utf8($out) : $out;
  83. print "Creating $out...\n";
  84. mkdir($dir) or die "Cannot create directory $dir\n" unless -d $dir;
  85. foreach my $keep (@list) {
  86. next unless $keep;
  87. %section = split(/$FS2/, $keep, -1);
  88. %text = split(/$FS3/, $section{data}, -1);
  89. my $current = "$out/$section{'revision'}.kp";
  90. print "Writing $current...\n";
  91. write_keep_file($current);
  92. }
  93. print '</pre>';
  94. }
  95. @files = glob("$directory/*rclog");
  96. print '<pre>';
  97. foreach my $file (@files) {
  98. print "Reading $file...\n";
  99. my $data = read_file($file);
  100. @rc = split(/\n/, $data);
  101. foreach (@rc) {
  102. my ($ts, $pagename, $summary, $minor, $host, $kind, $extraTemp)
  103. = split(/$FS3/, $_);
  104. my %extra = split(/$FS2/, $extraTemp, -1);
  105. foreach ('name', 'revision', 'languages', 'cluster') {
  106. $extra{$_} = '' unless $extra{$_};
  107. }
  108. $extra{languages} =~ s/$FS1/,/g;
  109. $_ = join($NewFS, $ts, $pagename, $minor, $summary, $host,
  110. $extra{name}, $extra{revision}, $extra{languages}, $extra{cluster});
  111. }
  112. $data = join("\n", @rc) . "\n";
  113. $file =~ s/log$/.log/;
  114. print "Writing $file...\n";
  115. write_file($file, $data);
  116. }
  117. print '</pre>';
  118. print p, "Done.\n";
  119. }
  120. sub read_file {
  121. my ($filename) = @_;
  122. my ($data);
  123. local $/ = undef; # Read complete files
  124. open(F, "<$filename") or die "can't read $filename: $!";
  125. $data=<F>;
  126. close F;
  127. return $data;
  128. }
  129. sub write_file {
  130. my ($filename, $data) = @_;
  131. if (param('convert')) {
  132. $filename = utf8($filename);
  133. $data = utf8($data);
  134. }
  135. open(F, ">$filename") or die "can't write $filename: $!";
  136. print F $data;
  137. close F;
  138. }
  139. sub cache {
  140. $_ = shift;
  141. return "" unless $_;
  142. my ($block, $flag) = split(/$FS2/, $_);
  143. my @blocks = split(/$FS3/, $block);
  144. my @flags = split(/$FS3/, $flag);
  145. return 'blocks: ' . escape_newlines(join($NewFS, @blocks)) . "\n"
  146. . 'flags: ' . escape_newlines(join($NewFS, @flags)) . "\n";
  147. }
  148. sub escape_newlines {
  149. $_ = shift;
  150. $_ =~ s/\n/\n\t/g if $_;
  151. return $_;
  152. }
  153. # Skip the info encoded in the filename (page name). We need the info
  154. # stored in the rclog (summary, ip, host, username) for the history
  155. # page. Don't trust the modification dates of the files themselves,
  156. # which is why we have the timestamp in the file, too. We need the
  157. # timestamp when expiring old keep files. We need all the info in the
  158. # page file that will eventually end up in the keep file.
  159. sub basic_data {
  160. my $data = 'ts: ' . $section{ts} . "\n" if $section{ts};
  161. $data .= 'keep-ts: ' . $section{keepts} . "\n" if $section{keepts};
  162. $data .= 'revision: ' . $section{revision} . "\n" if $section{revision};
  163. $data .= 'summary: ' . $section{summary} . "\n" if $section{summary};
  164. $data .= 'summary: ' . $text{summary} . "\n" if $text{summary} and not $section{summary};
  165. $data .= 'username: ' . $section{username} . "\n" if $section{username};
  166. $data .= 'ip: ' . $section{ip} . "\n" if $section{ip};
  167. $data .= 'host: ' . $section{host} . "\n" if $section{host};
  168. $data .= 'minor: ' . $text{minor} . "\n" if $text{minor};
  169. # $data .= 'oldmajor: ' . $page{cache_oldmajor} . "\n" if $page{cache_oldmajor};
  170. $data .= 'text: ' . escape_newlines($text{text}) . "\n";
  171. return $data;
  172. }
  173. sub write_page_file {
  174. my $file = shift;
  175. my $data = basic_data();
  176. $data .= cache($page{cache_blocks});
  177. $data .= 'diff-major: ' . escape_newlines($page{cache_diff_default_major}) . "\n"
  178. if $page{cache_diff_default_major};
  179. $data .= 'diff-minor: ' . escape_newlines($page{cache_diff_default_minor}) . "\n"
  180. if $page{cache_diff_default_minor};
  181. write_file($file, $data);
  182. }
  183. sub write_keep_file {
  184. my $file = shift;
  185. my $data = basic_data();
  186. write_file($file, $data);
  187. }
  188. # This Latin-1 to UTF-8 conversion was written by Skalman on the
  189. # Oddmuse Wiki. He says: I added a quick, dirty and completely
  190. # unreadable hack to convert all characters above 0x7F:
  191. # s/([\x80-\xff])/chr(0xc0+(ord($1)>>6)).chr(ord($1)&0b00111111|0b10000000)/ge;
  192. # Reading the UTF-8 and Unicode FAQ, I convert every character to
  193. # (binary) 110xxxxx 10xxxxxx where the 'x' marks the bits of the
  194. # original ISO-8859-1 character. That is: take the two most
  195. # significant bits of the caracter and add them to 0xC0 (first byte),
  196. # then replace the first two bits with 10 (second byte).
  197. sub utf8 {
  198. $_ = shift;
  199. s/([\x80-\xff])/chr(0xc0+(ord($1)>>6)).chr(ord($1)&0b00111111|0b10000000)/ge;
  200. return $_;
  201. }