populateArchiveRevId.php 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253
  1. <?php
  2. /**
  3. * Populate ar_rev_id in pre-1.5 rows
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License along
  16. * with this program; if not, write to the Free Software Foundation, Inc.,
  17. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18. * http://www.gnu.org/copyleft/gpl.html
  19. *
  20. * @file
  21. * @ingroup Maintenance
  22. */
  23. use Wikimedia\Rdbms\DBQueryError;
  24. use Wikimedia\Rdbms\IDatabase;
  25. require_once __DIR__ . '/Maintenance.php';
  26. /**
  27. * Maintenance script that populares archive.ar_rev_id in old rows
  28. *
  29. * @ingroup Maintenance
  30. * @since 1.31
  31. */
  32. class PopulateArchiveRevId extends LoggedUpdateMaintenance {
  33. /** @var array|null Dummy revision row */
  34. private static $dummyRev = null;
  35. public function __construct() {
  36. parent::__construct();
  37. $this->addDescription( 'Populate ar_rev_id in pre-1.5 rows' );
  38. $this->setBatchSize( 100 );
  39. }
  40. protected function getUpdateKey() {
  41. return __CLASS__;
  42. }
  43. protected function doDBUpdates() {
  44. $this->output( "Populating ar_rev_id...\n" );
  45. $dbw = $this->getDB( DB_MASTER );
  46. self::checkMysqlAutoIncrementBug( $dbw );
  47. // Quick exit if there are no rows needing updates.
  48. $any = $dbw->selectField(
  49. 'archive',
  50. 'ar_id',
  51. [ 'ar_rev_id' => null ],
  52. __METHOD__
  53. );
  54. if ( !$any ) {
  55. $this->output( "Completed ar_rev_id population, 0 rows updated.\n" );
  56. return true;
  57. }
  58. $count = 0;
  59. while ( true ) {
  60. wfWaitForSlaves();
  61. $arIds = $dbw->selectFieldValues(
  62. 'archive',
  63. 'ar_id',
  64. [ 'ar_rev_id' => null ],
  65. __METHOD__,
  66. [ 'LIMIT' => $this->getBatchSize(), 'ORDER BY' => [ 'ar_id' ] ]
  67. );
  68. if ( !$arIds ) {
  69. $this->output( "Completed ar_rev_id population, $count rows updated.\n" );
  70. return true;
  71. }
  72. $count += self::reassignArRevIds( $dbw, $arIds, [ 'ar_rev_id' => null ] );
  73. $min = min( $arIds );
  74. $max = max( $arIds );
  75. $this->output( " ... $min-$max\n" );
  76. }
  77. }
  78. /**
  79. * Check for (and work around) a MySQL auto-increment bug
  80. *
  81. * (T202032) MySQL until 8.0 and MariaDB until some version after 10.1.34
  82. * don't save the auto-increment value to disk, so on server restart it
  83. * might reuse IDs from deleted revisions. We can fix that with an insert
  84. * with an explicit rev_id value, if necessary.
  85. *
  86. * @param IDatabase $dbw
  87. */
  88. public static function checkMysqlAutoIncrementBug( IDatabase $dbw ) {
  89. if ( $dbw->getType() !== 'mysql' ) {
  90. return;
  91. }
  92. if ( !self::$dummyRev ) {
  93. self::$dummyRev = self::makeDummyRevisionRow( $dbw );
  94. }
  95. $ok = false;
  96. while ( !$ok ) {
  97. try {
  98. $dbw->doAtomicSection( __METHOD__, function ( $dbw, $fname ) {
  99. $dbw->insert( 'revision', self::$dummyRev, $fname );
  100. $id = $dbw->insertId();
  101. $toDelete[] = $id;
  102. $maxId = max(
  103. (int)$dbw->selectField( 'archive', 'MAX(ar_rev_id)', [], __METHOD__ ),
  104. (int)$dbw->selectField( 'slots', 'MAX(slot_revision_id)', [], __METHOD__ )
  105. );
  106. if ( $id <= $maxId ) {
  107. $dbw->insert( 'revision', [ 'rev_id' => $maxId + 1 ] + self::$dummyRev, $fname );
  108. $toDelete[] = $maxId + 1;
  109. }
  110. $dbw->delete( 'revision', [ 'rev_id' => $toDelete ], $fname );
  111. } );
  112. $ok = true;
  113. } catch ( DBQueryError $e ) {
  114. if ( $e->errno != 1062 ) { // 1062 is "duplicate entry", ignore it and retry
  115. throw $e;
  116. }
  117. }
  118. }
  119. }
  120. /**
  121. * Assign new ar_rev_ids to a set of ar_ids.
  122. * @param IDatabase $dbw
  123. * @param int[] $arIds
  124. * @param array $conds Extra conditions for the update
  125. * @return int Number of updated rows
  126. */
  127. public static function reassignArRevIds( IDatabase $dbw, array $arIds, array $conds = [] ) {
  128. if ( !self::$dummyRev ) {
  129. self::$dummyRev = self::makeDummyRevisionRow( $dbw );
  130. }
  131. $updates = $dbw->doAtomicSection( __METHOD__, function ( $dbw, $fname ) use ( $arIds ) {
  132. // Create new rev_ids by inserting dummy rows into revision and then deleting them.
  133. $dbw->insert( 'revision', array_fill( 0, count( $arIds ), self::$dummyRev ), $fname );
  134. $revIds = $dbw->selectFieldValues(
  135. 'revision',
  136. 'rev_id',
  137. [ 'rev_timestamp' => self::$dummyRev['rev_timestamp'] ],
  138. $fname
  139. );
  140. if ( !is_array( $revIds ) ) {
  141. throw new UnexpectedValueException( 'Failed to insert dummy revisions' );
  142. }
  143. if ( count( $revIds ) !== count( $arIds ) ) {
  144. throw new UnexpectedValueException(
  145. 'Tried to insert ' . count( $arIds ) . ' dummy revisions, but found '
  146. . count( $revIds ) . ' matching rows.'
  147. );
  148. }
  149. $dbw->delete( 'revision', [ 'rev_id' => $revIds ], $fname );
  150. return array_combine( $arIds, $revIds );
  151. } );
  152. $count = 0;
  153. foreach ( $updates as $arId => $revId ) {
  154. $dbw->update(
  155. 'archive',
  156. [ 'ar_rev_id' => $revId ],
  157. [ 'ar_id' => $arId ] + $conds,
  158. __METHOD__
  159. );
  160. $count += $dbw->affectedRows();
  161. }
  162. return $count;
  163. }
  164. /**
  165. * Construct a dummy revision table row to use for reserving IDs
  166. *
  167. * The row will have a wildly unlikely timestamp, and possibly a generic
  168. * user and comment, but will otherwise be derived from a revision on the
  169. * wiki's main page or some other revision in the database.
  170. *
  171. * @param IDatabase $dbw
  172. * @return array
  173. */
  174. private static function makeDummyRevisionRow( IDatabase $dbw ) {
  175. $ts = $dbw->timestamp( '11111111111111' );
  176. $rev = null;
  177. $mainPage = Title::newMainPage();
  178. $pageId = $mainPage ? $mainPage->getArticleId() : null;
  179. if ( $pageId ) {
  180. $rev = $dbw->selectRow(
  181. 'revision',
  182. '*',
  183. [ 'rev_page' => $pageId ],
  184. __METHOD__,
  185. [ 'ORDER BY' => 'rev_timestamp ASC' ]
  186. );
  187. }
  188. if ( !$rev ) {
  189. // No main page? Let's see if there are any revisions at all
  190. $rev = $dbw->selectRow(
  191. 'revision',
  192. '*',
  193. [],
  194. __METHOD__,
  195. [ 'ORDER BY' => 'rev_timestamp ASC' ]
  196. );
  197. }
  198. if ( !$rev ) {
  199. throw new UnexpectedValueException( 'No revisions are available to copy' );
  200. }
  201. unset( $rev->rev_id );
  202. $rev = (array)$rev;
  203. $rev['rev_timestamp'] = $ts;
  204. if ( isset( $rev['rev_user'] ) ) {
  205. $rev['rev_user'] = 0;
  206. $rev['rev_user_text'] = '0.0.0.0';
  207. }
  208. if ( isset( $rev['rev_comment'] ) ) {
  209. $rev['rev_comment'] = 'Dummy row';
  210. }
  211. $any = $dbw->selectField(
  212. 'revision',
  213. 'rev_id',
  214. [ 'rev_timestamp' => $ts ],
  215. __METHOD__
  216. );
  217. if ( $any ) {
  218. throw new UnexpectedValueException( "... Why does your database contain a revision dated $ts?" );
  219. }
  220. return $rev;
  221. }
  222. }
  223. $maintClass = "PopulateArchiveRevId";
  224. require_once RUN_MAINTENANCE_IF_MAIN;