mediawiki_mysql2postgres.pl 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442
  1. #!/usr/bin/perl
  2. ## Convert data from a MySQL mediawiki database into a Postgres mediawiki database
  3. ## NOTE: It is probably easier to dump your wiki using maintenance/dumpBackup.php
  4. ## and then import it with maintenance/importDump.php
  5. ## If having UTF-8 problems, there are reports that adding --compatible=postgresql
  6. ## may help.
  7. use strict;
  8. use warnings;
  9. use Data::Dumper;
  10. use Getopt::Long;
  11. use vars qw(%table %tz %special @torder $COM);
  12. my $VERSION = '1.2';
  13. ## The following options can be changed via command line arguments:
  14. my $MYSQLDB = '';
  15. my $MYSQLUSER = '';
  16. ## If the following are zero-length, we omit their arguments entirely:
  17. my $MYSQLHOST = '';
  18. my $MYSQLPASSWORD = '';
  19. my $MYSQLSOCKET = '';
  20. ## Name of the dump file created
  21. my $MYSQLDUMPFILE = 'mediawiki_upgrade.pg';
  22. ## How verbose should this script be (0, 1, or 2)
  23. my $verbose = 0;
  24. my $help = 0;
  25. my $USAGE = "
  26. Usage: $0 --db=<dbname> --user=<user> [OPTION]...
  27. Example: $0 --db=wikidb --user=wikiuser --pass=sushi
  28. Converts a MediaWiki schema from MySQL to Postgres
  29. Options:
  30. db Name of the MySQL database
  31. user MySQL database username
  32. pass MySQL database password
  33. host MySQL database host
  34. socket MySQL database socket
  35. verbose Verbosity, increases with multiple uses
  36. ";
  37. GetOptions
  38. (
  39. 'db=s' => \$MYSQLDB,
  40. 'user=s' => \$MYSQLUSER,
  41. 'pass=s' => \$MYSQLPASSWORD,
  42. 'host=s' => \$MYSQLHOST,
  43. 'socket=s' => \$MYSQLSOCKET,
  44. 'verbose+' => \$verbose,
  45. 'help' => \$help,
  46. );
  47. die $USAGE
  48. if ! length $MYSQLDB
  49. or ! length $MYSQLUSER
  50. or $help;
  51. ## The Postgres schema file: should not be changed
  52. my $PG_SCHEMA = 'tables.sql';
  53. ## What version we default to when we can't parse the old schema
  54. my $MW_DEFAULT_VERSION = 110;
  55. ## Try and find a working version of mysqldump
  56. $verbose and warn "Locating the mysqldump executable\n";
  57. my @MYSQLDUMP = ('/usr/local/bin/mysqldump', '/usr/bin/mysqldump');
  58. my $MYSQLDUMP;
  59. for my $mytry (@MYSQLDUMP) {
  60. next if ! -e $mytry;
  61. -x $mytry or die qq{Not an executable file: "$mytry"\n};
  62. my $version = qx{$mytry -V};
  63. $version =~ /^mysqldump\s+Ver\s+\d+/ or die qq{Program at "$mytry" does not act like mysqldump\n};
  64. $MYSQLDUMP = $mytry;
  65. }
  66. $MYSQLDUMP or die qq{Could not find the mysqldump program\n};
  67. ## Flags we use for mysqldump
  68. my @MYSQLDUMPARGS = qw(
  69. --skip-lock-tables
  70. --complete-insert
  71. --skip-extended-insert
  72. --skip-add-drop-table
  73. --skip-add-locks
  74. --skip-disable-keys
  75. --skip-set-charset
  76. --skip-comments
  77. --skip-quote-names
  78. );
  79. $verbose and warn "Checking that mysqldump can handle our flags\n";
  80. ## Make sure this version can handle all the flags we want.
  81. ## Combine with user dump below
  82. my $MYSQLDUMPARGS = join ' ' => @MYSQLDUMPARGS;
  83. ## Argh. Any way to make this work on Win32?
  84. my $version = qx{$MYSQLDUMP $MYSQLDUMPARGS 2>&1};
  85. if ($version =~ /unknown option/) {
  86. die qq{Sorry, you need to use a newer version of the mysqldump program than the one at "$MYSQLDUMP"\n};
  87. }
  88. push @MYSQLDUMPARGS, "--user=$MYSQLUSER";
  89. length $MYSQLPASSWORD and push @MYSQLDUMPARGS, "--password=$MYSQLPASSWORD";
  90. length $MYSQLHOST and push @MYSQLDUMPARGS, "--host=$MYSQLHOST";
  91. ## Open the dump file to hold the mysqldump output
  92. open my $mdump, '+>', $MYSQLDUMPFILE or die qq{Could not open "$MYSQLDUMPFILE": $!\n};
  93. print qq{Writing file "$MYSQLDUMPFILE"\n};
  94. open my $mfork2, '-|' or exec $MYSQLDUMP, @MYSQLDUMPARGS, '--no-data', $MYSQLDB;
  95. my $oldselect = select $mdump;
  96. print while <$mfork2>;
  97. ## Slurp in the current schema
  98. my $current_schema;
  99. seek $mdump, 0, 0;
  100. {
  101. local $/;
  102. $current_schema = <$mdump>;
  103. }
  104. seek $mdump, 0, 0;
  105. truncate $mdump, 0;
  106. warn qq{Trying to determine database version...\n} if $verbose;
  107. my $current_version = 0;
  108. if ($current_schema =~ /CREATE TABLE \S+cur /) {
  109. $current_version = 103;
  110. }
  111. elsif ($current_schema =~ /CREATE TABLE \S+brokenlinks /) {
  112. $current_version = 104;
  113. }
  114. elsif ($current_schema !~ /CREATE TABLE \S+templatelinks /) {
  115. $current_version = 105;
  116. }
  117. elsif ($current_schema !~ /CREATE TABLE \S+validate /) {
  118. $current_version = 106;
  119. }
  120. elsif ($current_schema !~ /ipb_auto tinyint/) {
  121. $current_version = 107;
  122. }
  123. elsif ($current_schema !~ /CREATE TABLE \S+profiling /) {
  124. $current_version = 108;
  125. }
  126. elsif ($current_schema !~ /CREATE TABLE \S+querycachetwo /) {
  127. $current_version = 109;
  128. }
  129. else {
  130. $current_version = $MW_DEFAULT_VERSION;
  131. }
  132. if (!$current_version) {
  133. warn qq{WARNING! Could not figure out the old version, assuming MediaWiki $MW_DEFAULT_VERSION\n};
  134. $current_version = $MW_DEFAULT_VERSION;
  135. }
  136. ## Check for a table prefix:
  137. my $table_prefix = '';
  138. if ($current_schema =~ /CREATE TABLE (\S+)querycache /) {
  139. $table_prefix = $1;
  140. }
  141. warn qq{Old schema is from MediaWiki version $current_version\n} if $verbose;
  142. warn qq{Table prefix is "$table_prefix"\n} if $verbose and length $table_prefix;
  143. $verbose and warn qq{Writing file "$MYSQLDUMPFILE"\n};
  144. my $now = scalar localtime;
  145. my $conninfo = '';
  146. $MYSQLHOST and $conninfo .= "\n-- host $MYSQLHOST";
  147. $MYSQLSOCKET and $conninfo .= "\n-- socket $MYSQLSOCKET";
  148. print qq{
  149. -- Dump of MySQL Mediawiki tables for import into a Postgres Mediawiki schema
  150. -- Performed by the program: $0
  151. -- Version: $VERSION
  152. -- Author: Greg Sabino Mullane <greg\@turnstep.com> Comments welcome
  153. --
  154. -- This file was created: $now
  155. -- Executable used: $MYSQLDUMP
  156. -- Connection information:
  157. -- database: $MYSQLDB
  158. -- user: $MYSQLUSER$conninfo
  159. -- This file can be imported manually with psql like so:
  160. -- psql -p port# -h hostname -U username -f $MYSQLDUMPFILE databasename
  161. -- This will overwrite any existing MediaWiki information, so be careful
  162. };
  163. ## psql specific stuff
  164. print q{
  165. \\set ON_ERROR_STOP
  166. BEGIN;
  167. SET client_min_messages = 'WARNING';
  168. SET timezone = 'GMT';
  169. SET DateStyle = 'ISO, YMD';
  170. };
  171. warn qq{Reading in the Postgres schema information\n} if $verbose;
  172. open my $schema, '<', $PG_SCHEMA
  173. or die qq{Could not open "$PG_SCHEMA": make sure this script is run from maintenance/postgres/\n};
  174. my $t;
  175. while (<$schema>) {
  176. if (/CREATE TABLE\s+(\S+)/) {
  177. $t = $1;
  178. $table{$t}={};
  179. $verbose > 1 and warn qq{ Found table $t\n};
  180. }
  181. elsif (/^ +(\w+)\s+TIMESTAMP/) {
  182. $tz{$t}{$1}++;
  183. $verbose > 1 and warn qq{ Got a timestamp for column $1\n};
  184. }
  185. elsif (/REFERENCES\s*([^( ]+)/) {
  186. my $ref = $1;
  187. exists $table{$ref} or die qq{No parent table $ref found for $t\n};
  188. $table{$t}{$ref}++;
  189. }
  190. }
  191. close $schema or die qq{Could not close "$PG_SCHEMA": $!\n};
  192. ## Read in special cases and table/version information
  193. $verbose and warn qq{Reading in schema exception information\n};
  194. my %version_tables;
  195. while (<DATA>) {
  196. if (/^VERSION\s+(\d+\.\d+):\s+(.+)/) {
  197. my $list = join '|' => split /\s+/ => $2;
  198. $version_tables{$1} = qr{\b$list\b};
  199. next;
  200. }
  201. next unless /^(\w+)\s*(.*)/;
  202. $special{$1} = $2||'';
  203. $special{$2} = $1 if length $2;
  204. }
  205. ## Determine the order of tables based on foreign key constraints
  206. $verbose and warn qq{Figuring out order of tables to dump\n};
  207. my %dumped;
  208. my $bail = 0;
  209. {
  210. my $found=0;
  211. T: for my $t (sort keys %table) {
  212. next if exists $dumped{$t} and $dumped{$t} >= 1;
  213. $found=1;
  214. for my $dep (sort keys %{$table{$t}}) {
  215. next T if ! exists $dumped{$dep} or $dumped{$dep} < 0;
  216. }
  217. $dumped{$t} = -1 if ! exists $dumped{$t};
  218. ## Skip certain tables that are not imported
  219. next if exists $special{$t} and !$special{$t};
  220. push @torder, $special{$t} || $t;
  221. }
  222. last if !$found;
  223. push @torder, '---';
  224. for (values %dumped) { $_+=2; }
  225. die "Too many loops!\n" if $bail++ > 1000;
  226. redo;
  227. }
  228. ## Prepare the Postgres database for the move
  229. $verbose and warn qq{Writing Postgres transformation information\n};
  230. print "\n-- Empty out all existing tables\n";
  231. $verbose and warn qq{Writing truncates to empty existing tables\n};
  232. for my $t (@torder, 'objectcache', 'querycache') {
  233. next if $t eq '---';
  234. my $tname = $special{$t}||$t;
  235. printf qq{TRUNCATE TABLE %-20s CASCADE;\n}, qq{"$tname"};
  236. }
  237. print "\n\n";
  238. print qq{-- Temporarily rename pagecontent to "${table_prefix}text"\n};
  239. print qq{ALTER TABLE pagecontent RENAME TO "${table_prefix}text";\n\n};
  240. print qq{-- Allow rc_ip to contain empty string, will convert at end\n};
  241. print qq{ALTER TABLE recentchanges ALTER rc_ip TYPE text USING host(rc_ip);\n\n};
  242. print "-- Changing all timestamp fields to handle raw integers\n";
  243. for my $t (sort keys %tz) {
  244. next if $t eq 'archive2';
  245. for my $c (sort keys %{$tz{$t}}) {
  246. printf "ALTER TABLE %-18s ALTER %-25s TYPE TEXT;\n", $t, $c;
  247. }
  248. }
  249. print "\n";
  250. print q{
  251. INSERT INTO page VALUES (0,-1,'Dummy Page','',0,0,0,default,now(),0,10);
  252. };
  253. ## If we have a table _prefix, we need to temporarily rename all of our Postgres
  254. ## tables temporarily for the import. Perhaps consider making this an auto-schema
  255. ## thing in the future.
  256. if (length $table_prefix) {
  257. print qq{\n\n-- Temporarily renaming tables to accomodate the table_prefix "$table_prefix"\n\n};
  258. for my $t (@torder) {
  259. next if $t eq '---' or $t eq 'text' or $t eq 'user';
  260. my $tname = $special{$t}||$t;
  261. printf qq{ALTER TABLE %-18s RENAME TO "${table_prefix}$tname";\n}, qq{"$tname"};
  262. }
  263. }
  264. ## Try and dump the ill-named "user" table:
  265. ## We do this table alone because "user" is a reserved word.
  266. print q{
  267. SET escape_string_warning TO 'off';
  268. \\o /dev/null
  269. -- Postgres uses a table name of "mwuser" instead of "user"
  270. -- Create a dummy user to satisfy fk contraints especially with revisions
  271. SELECT setval('user_user_id_seq',0,'false');
  272. INSERT INTO mwuser
  273. VALUES (DEFAULT,'Anonymous','',NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,now(),now());
  274. };
  275. push @MYSQLDUMPARGS, '--no-create-info';
  276. $verbose and warn qq{Dumping "user" table\n};
  277. $verbose > 2 and warn Dumper \@MYSQLDUMPARGS;
  278. my $usertable = "${table_prefix}user";
  279. open my $mfork, '-|' or exec $MYSQLDUMP, @MYSQLDUMPARGS, $MYSQLDB, $usertable;
  280. ## Unfortunately, there is no easy way to catch errors
  281. my $numusers = 0;
  282. while (<$mfork>) {
  283. ++$numusers and print if s/INSERT INTO $usertable/INSERT INTO mwuser/;
  284. }
  285. close $mfork;
  286. if ($numusers < 1) {
  287. warn qq{No users found, probably a connection error.\n};
  288. print qq{ERROR: No users found, connection failed, or table "$usertable" does not exist. Dump aborted.\n};
  289. close $mdump or die qq{Could not close "$MYSQLDUMPFILE": $!\n};
  290. exit;
  291. }
  292. print "\n-- Users loaded: $numusers\n\n-- Loading rest of the mediawiki schema:\n";
  293. warn qq{Dumping all other tables from the MySQL schema\n} if $verbose;
  294. ## Dump the rest of the tables, in chunks based on constraints
  295. ## We do not need the user table:
  296. my @dumplist = grep { $_ ne 'user'} @torder;
  297. my @alist;
  298. {
  299. undef @alist;
  300. PICKATABLE: {
  301. my $tname = shift @dumplist;
  302. ## XXX Make this dynamic below
  303. for my $ver (sort {$b <=> $a } keys %version_tables) {
  304. redo PICKATABLE if $tname =~ $version_tables{$ver};
  305. }
  306. $tname = "${table_prefix}$tname" if length $table_prefix;
  307. next if $tname !~ /^\w/;
  308. push @alist, $tname;
  309. $verbose and warn " $tname...\n";
  310. pop @alist and last if index($alist[-1],'---') >= 0;
  311. redo if @dumplist;
  312. }
  313. ## Dump everything else
  314. open my $mfork2, '-|' or exec $MYSQLDUMP, @MYSQLDUMPARGS, $MYSQLDB, @alist;
  315. print while <$mfork2>;
  316. close $mfork2;
  317. warn qq{Finished dumping from MySQL\n} if $verbose;
  318. redo if @dumplist;
  319. }
  320. warn qq{Writing information to return Postgres database to normal\n} if $verbose;
  321. print qq{ALTER TABLE "${table_prefix}text" RENAME TO pagecontent;\n};
  322. print qq{ALTER TABLE ${table_prefix}recentchanges ALTER rc_ip TYPE cidr USING\n};
  323. print qq{ CASE WHEN rc_ip = '' THEN NULL ELSE rc_ip::cidr END;\n};
  324. ## Return tables to their original names if a table prefix was used.
  325. if (length $table_prefix) {
  326. print qq{\n\n-- Renaming tables by removing table prefix "$table_prefix"\n\n};
  327. my $maxsize = 18;
  328. for (@torder) {
  329. $maxsize = length "$_$table_prefix" if length "$_$table_prefix" > $maxsize;
  330. }
  331. for my $t (@torder) {
  332. next if $t eq '---' or $t eq 'text' or $t eq 'user';
  333. my $tname = $special{$t}||$t;
  334. printf qq{ALTER TABLE %*s RENAME TO "$tname";\n}, $maxsize+1, qq{"${table_prefix}$tname"};
  335. }
  336. }
  337. print qq{\n\n--Returning timestamps to normal\n};
  338. for my $t (sort keys %tz) {
  339. next if $t eq 'archive2';
  340. for my $c (sort keys %{$tz{$t}}) {
  341. printf "ALTER TABLE %-18s ALTER %-25s TYPE timestamptz\n".
  342. " USING TO_TIMESTAMP($c,'YYYYMMDDHHMISS');\n", $t, $c;
  343. }
  344. }
  345. ## Reset sequences
  346. print q{
  347. SELECT setval('filearchive_fa_id_seq', 1+coalesce(max(fa_id) ,0),false) FROM filearchive;
  348. SELECT setval('ipblocks_ipb_id_seq', 1+coalesce(max(ipb_id) ,0),false) FROM ipblocks;
  349. SELECT setval('job_job_id_seq', 1+coalesce(max(job_id) ,0),false) FROM job;
  350. SELECT setval('logging_log_id_seq', 1+coalesce(max(log_id) ,0),false) FROM logging;
  351. SELECT setval('page_page_id_seq', 1+coalesce(max(page_id),0),false) FROM page;
  352. SELECT setval('page_restrictions_pr_id_seq', 1+coalesce(max(pr_id) ,0),false) FROM page_restrictions;
  353. SELECT setval('recentchanges_rc_id_seq', 1+coalesce(max(rc_id) ,0),false) FROM recentchanges;
  354. SELECT setval('revision_rev_id_seq', 1+coalesce(max(rev_id) ,0),false) FROM revision;
  355. SELECT setval('text_old_id_seq', 1+coalesce(max(old_id) ,0),false) FROM pagecontent;
  356. SELECT setval('user_user_id_seq', 1+coalesce(max(user_id),0),false) FROM mwuser;
  357. };
  358. print "COMMIT;\n\\o\n\n-- End of dump\n\n";
  359. select $oldselect;
  360. close $mdump or die qq{Could not close "$MYSQLDUMPFILE": $!\n};
  361. exit;
  362. __DATA__
  363. ## Known remappings: either indicate the MySQL name,
  364. ## or leave blank if it should be skipped
  365. pagecontent text
  366. mwuser user
  367. archive2
  368. profiling
  369. objectcache
  370. ## Which tables to ignore depending on the version
  371. VERSION 1.6: externallinks job templatelinks transcache
  372. VERSION 1.7: filearchive langlinks querycache_info
  373. VERSION 1.9: querycachetwo page_restrictions redirect