123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345 |
- <?php
- /**
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- * http://www.gnu.org/copyleft/gpl.html
- *
- * @file
- * @ingroup Maintenance
- */
- use MediaWiki\MediaWikiServices;
- use MediaWiki\Storage\NameTableStore;
- use MediaWiki\Storage\SqlBlobStore;
- use Wikimedia\Assert\Assert;
- use Wikimedia\Rdbms\IDatabase;
- use Wikimedia\Rdbms\ResultWrapper;
- require_once __DIR__ . '/Maintenance.php';
- /**
- * Populate the content and slot tables.
- * @since 1.32
- */
- class PopulateContentTables extends Maintenance {
- /** @var IDatabase */
- private $dbw;
- /** @var NameTableStore */
- private $contentModelStore;
- /** @var int */
- private $mainRoleId;
- /** @var array|null Map "{$modelId}:{$address}" to content_id */
- private $contentRowMap = null;
- private $count = 0, $totalCount = 0;
- public function __construct() {
- parent::__construct();
- $this->addDescription( 'Populate content and slot tables' );
- $this->addOption( 'table', 'revision or archive table, or `all` to populate both', false,
- true );
- $this->addOption( 'reuse-content',
- 'Reuse content table rows when the address and model are the same. '
- . 'This will increase the script\'s time and memory usage, perhaps significantly.',
- false, false );
- $this->addOption( 'start-revision', 'The rev_id to start at', false, true );
- $this->addOption( 'start-archive', 'The ar_rev_id to start at', false, true );
- $this->setBatchSize( 500 );
- }
- private function initServices() {
- $this->dbw = $this->getDB( DB_MASTER );
- $this->contentModelStore = MediaWikiServices::getInstance()->getContentModelStore();
- $this->mainRoleId = MediaWikiServices::getInstance()->getSlotRoleStore()->acquireId( 'main' );
- }
- public function execute() {
- global $wgMultiContentRevisionSchemaMigrationStage;
- $t0 = microtime( true );
- if ( ( $wgMultiContentRevisionSchemaMigrationStage & SCHEMA_COMPAT_WRITE_NEW ) === 0 ) {
- $this->writeln(
- '...cannot update while \$wgMultiContentRevisionSchemaMigrationStage '
- . 'does not have the SCHEMA_COMPAT_WRITE_NEW bit set.'
- );
- return false;
- }
- $this->initServices();
- if ( $this->getOption( 'reuse-content', false ) ) {
- $this->loadContentMap();
- }
- foreach ( $this->getTables() as $table ) {
- $this->populateTable( $table );
- }
- $elapsed = microtime( true ) - $t0;
- $this->writeln( "Done. Processed $this->totalCount rows in $elapsed seconds" );
- return true;
- }
- /**
- * @return string[]
- */
- private function getTables() {
- $table = $this->getOption( 'table', 'all' );
- $validTableOptions = [ 'all', 'revision', 'archive' ];
- if ( !in_array( $table, $validTableOptions ) ) {
- $this->fatalError( 'Invalid table. Must be either `revision` or `archive` or `all`' );
- }
- if ( $table === 'all' ) {
- $tables = [ 'revision', 'archive' ];
- } else {
- $tables = [ $table ];
- }
- return $tables;
- }
- private function loadContentMap() {
- $t0 = microtime( true );
- $this->writeln( "Loading existing content table rows..." );
- $this->contentRowMap = [];
- $dbr = $this->getDB( DB_REPLICA );
- $from = false;
- while ( true ) {
- $res = $dbr->select(
- 'content',
- [ 'content_id', 'content_address', 'content_model' ],
- $from ? "content_id > $from" : '',
- __METHOD__,
- [ 'ORDER BY' => 'content_id', 'LIMIT' => $this->getBatchSize() ]
- );
- if ( !$res || !$res->numRows() ) {
- break;
- }
- foreach ( $res as $row ) {
- $from = $row->content_id;
- $this->contentRowMap["{$row->content_model}:{$row->content_address}"] = $row->content_id;
- }
- }
- $elapsed = microtime( true ) - $t0;
- $this->writeln( "Loaded " . count( $this->contentRowMap ) . " rows in $elapsed seconds" );
- }
- /**
- * @param string $table
- */
- private function populateTable( $table ) {
- $t0 = microtime( true );
- $this->count = 0;
- $this->writeln( "Populating $table..." );
- if ( $table === 'revision' ) {
- $idField = 'rev_id';
- $tables = [ 'revision', 'slots', 'page' ];
- $fields = [
- 'rev_id',
- 'len' => 'rev_len',
- 'sha1' => 'rev_sha1',
- 'text_id' => 'rev_text_id',
- 'content_model' => 'rev_content_model',
- 'namespace' => 'page_namespace',
- 'title' => 'page_title',
- ];
- $joins = [
- 'slots' => [ 'LEFT JOIN', 'rev_id=slot_revision_id' ],
- 'page' => [ 'LEFT JOIN', 'rev_page=page_id' ],
- ];
- $startOption = 'start-revision';
- } else {
- $idField = 'ar_rev_id';
- $tables = [ 'archive', 'slots' ];
- $fields = [
- 'rev_id' => 'ar_rev_id',
- 'len' => 'ar_len',
- 'sha1' => 'ar_sha1',
- 'text_id' => 'ar_text_id',
- 'content_model' => 'ar_content_model',
- 'namespace' => 'ar_namespace',
- 'title' => 'ar_title',
- ];
- $joins = [
- 'slots' => [ 'LEFT JOIN', 'ar_rev_id=slot_revision_id' ],
- ];
- $startOption = 'start-archive';
- }
- $minmax = $this->dbw->selectRow(
- $table,
- [ 'min' => "MIN( $idField )", 'max' => "MAX( $idField )" ],
- '',
- __METHOD__
- );
- if ( $this->hasOption( $startOption ) ) {
- $minmax->min = (int)$this->getOption( $startOption );
- }
- if ( !$minmax || !is_numeric( $minmax->min ) || !is_numeric( $minmax->max ) ) {
- // No rows?
- $minmax = (object)[ 'min' => 1, 'max' => 0 ];
- }
- $batchSize = $this->getBatchSize();
- for ( $startId = $minmax->min; $startId <= $minmax->max; $startId += $batchSize ) {
- $endId = min( $startId + $batchSize - 1, $minmax->max );
- $rows = $this->dbw->select(
- $tables,
- $fields,
- [
- "$idField >= $startId",
- "$idField <= $endId",
- 'slot_revision_id IS NULL',
- ],
- __METHOD__,
- [ 'ORDER BY' => 'rev_id' ],
- $joins
- );
- if ( $rows->numRows() !== 0 ) {
- $this->populateContentTablesForRowBatch( $rows, $startId, $table );
- }
- $elapsed = microtime( true ) - $t0;
- $this->writeln(
- "... $table processed up to revision id $endId of {$minmax->max}"
- . " ($this->count rows in $elapsed seconds)"
- );
- }
- $elapsed = microtime( true ) - $t0;
- $this->writeln( "Done populating $table table. Processed $this->count rows in $elapsed seconds" );
- }
- /**
- * @param ResultWrapper $rows
- * @param int $startId
- * @param string $table
- * @return int|null
- */
- private function populateContentTablesForRowBatch( ResultWrapper $rows, $startId, $table ) {
- $this->beginTransaction( $this->dbw, __METHOD__ );
- if ( $this->contentRowMap === null ) {
- $map = [];
- } else {
- $map = &$this->contentRowMap;
- }
- $contentKeys = [];
- try {
- // Step 1: Figure out content rows needing insertion.
- $contentRows = [];
- foreach ( $rows as $row ) {
- $revisionId = $row->rev_id;
- Assert::invariant( $revisionId !== null, 'rev_id must not be null' );
- $modelId = $this->contentModelStore->acquireId( $this->getContentModel( $row ) );
- $address = SqlBlobStore::makeAddressFromTextId( $row->text_id );
- $key = "{$modelId}:{$address}";
- $contentKeys[$revisionId] = $key;
- if ( !isset( $map[$key] ) ) {
- $map[$key] = false;
- $contentRows[] = [
- 'content_size' => (int)$row->len,
- 'content_sha1' => $row->sha1,
- 'content_model' => $modelId,
- 'content_address' => $address,
- ];
- }
- }
- // Step 2: Insert them, then read them back in for use in the next step.
- if ( $contentRows ) {
- $id = $this->dbw->selectField( 'content', 'MAX(content_id)', '', __METHOD__ );
- $this->dbw->insert( 'content', $contentRows, __METHOD__ );
- $res = $this->dbw->select(
- 'content',
- [ 'content_id', 'content_model', 'content_address' ],
- 'content_id > ' . (int)$id,
- __METHOD__
- );
- foreach ( $res as $row ) {
- $key = $row->content_model . ':' . $row->content_address;
- $map[$key] = $row->content_id;
- }
- }
- // Step 3: Insert the slot rows.
- $slotRows = [];
- foreach ( $rows as $row ) {
- $revisionId = $row->rev_id;
- $contentId = $map[$contentKeys[$revisionId]] ?? false;
- if ( $contentId === false ) {
- throw new \RuntimeException( "Content row for $revisionId not found after content insert" );
- }
- $slotRows[] = [
- 'slot_revision_id' => $revisionId,
- 'slot_role_id' => $this->mainRoleId,
- 'slot_content_id' => $contentId,
- // There's no way to really know the previous revision, so assume no inheriting.
- // rev_parent_id can get changed on undeletions, and deletions can screw up
- // rev_timestamp ordering.
- 'slot_origin' => $revisionId,
- ];
- }
- $this->dbw->insert( 'slots', $slotRows, __METHOD__ );
- $this->count += count( $slotRows );
- $this->totalCount += count( $slotRows );
- } catch ( \Exception $e ) {
- $this->rollbackTransaction( $this->dbw, __METHOD__ );
- $this->fatalError( "Failed to populate content table $table row batch starting at $startId "
- . "due to exception: " . $e->__toString() );
- }
- $this->commitTransaction( $this->dbw, __METHOD__ );
- }
- /**
- * @param \stdClass $row
- * @return string
- */
- private function getContentModel( $row ) {
- if ( isset( $row->content_model ) ) {
- return $row->content_model;
- }
- $title = Title::makeTitle( $row->namespace, $row->title );
- return ContentHandler::getDefaultModelFor( $title );
- }
- /**
- * @param string $msg
- */
- private function writeln( $msg ) {
- $this->output( "$msg\n" );
- }
- }
- $maintClass = 'PopulateContentTables';
- require_once RUN_MAINTENANCE_IF_MAIN;
|