00001 <?php 00002 00003 function convertLinks() { 00004 print "Converting links table to ID-ID...\n"; 00005 00006 global $wgLang, $wgDBserver, $wgDBadminuser, $wgDBadminpassword, $wgDBname; 00007 global $noKeys, $logPerformance, $fh; 00008 00009 $numRows = $tuplesAdded = $numBadLinks = $curRowsRead = 0; #counters etc 00010 $totalTuplesInserted = 0; # total tuples INSERTed into links_temp 00011 00012 $reportCurReadProgress = true; #whether or not to give progress reports while reading IDs from cur table 00013 $curReadReportInterval = 1000; #number of rows between progress reports 00014 00015 $reportLinksConvProgress = true; #whether or not to give progress reports during conversion 00016 $linksConvInsertInterval = 1000; #number of rows per INSERT 00017 00018 $initialRowOffset = 0; 00019 #$finalRowOffset = 0; # not used yet; highest row number from links table to process 00020 00021 # Overwrite the old links table with the new one. If this is set to false, 00022 # the new table will be left at links_temp. 00023 $overwriteLinksTable = true; 00024 00025 # Don't create keys, and so allow duplicates in the new links table. 00026 # This gives a huge speed improvement for very large links tables which are MyISAM. (What about InnoDB?) 00027 $noKeys = false; 00028 00029 00030 $logPerformance = false; # output performance data to a file 00031 $perfLogFilename = "convLinksPerf.txt"; 00032 #-------------------------------------------------------------------- 00033 00034 $res = wfQuery( "SELECT l_from FROM links LIMIT 1", DB_READ ); 00035 if ( mysql_field_type( $res, 0 ) == "int" ) { 00036 print "Schema already converted\n"; 00037 return; 00038 } 00039 00040 $res = wfQuery( "SELECT COUNT(*) AS count FROM links", DB_WRITE ); 00041 $row = wfFetchObject($res); 00042 $numRows = $row->count; 00043 wfFreeResult( $res ); 00044 00045 if ( $numRows == 0 ) { 00046 print "Updating schema (no rows to convert)...\n"; 00047 createTempTable(); 00048 } else { 00049 if ( $logPerformance ) { $fh = fopen ( $perfLogFilename, "w" ); } 00050 $baseTime = $startTime = getMicroTime(); 00051 # Create a title -> cur_id map 00052 print "Loading IDs from cur table...\n"; 00053 performanceLog ( "Reading $numRows rows from cur table...\n" ); 00054 performanceLog ( "rows read vs seconds elapsed:\n" ); 00055 wfBufferSQLResults( false ); 00056 $res = wfQuery( "SELECT cur_namespace,cur_title,cur_id FROM cur", DB_WRITE ); 00057 $ids = array(); 00058 00059 while ( $row = wfFetchObject( $res ) ) { 00060 $title = $row->cur_title; 00061 if ( $row->cur_namespace ) { 00062 $title = $wgLang->getNsText( $row->cur_namespace ) . ":$title"; 00063 } 00064 $ids[$title] = $row->cur_id; 00065 $curRowsRead++; 00066 if ($reportCurReadProgress) { 00067 if (($curRowsRead % $curReadReportInterval) == 0) { 00068 performanceLog( $curRowsRead . " " . (getMicroTime() - $baseTime) . "\n" ); 00069 print "\t$curRowsRead rows of cur table read.\n"; 00070 } 00071 } 00072 } 00073 wfFreeResult( $res ); 00074 wfBufferSQLResults( true ); 00075 print "Finished loading IDs.\n\n"; 00076 performanceLog( "Took " . (getMicroTime() - $baseTime) . " seconds to load IDs.\n\n" ); 00077 #-------------------------------------------------------------------- 00078 00079 # Now, step through the links table (in chunks of $linksConvInsertInterval rows), 00080 # convert, and write to the new table. 00081 createTempTable(); 00082 performanceLog( "Resetting timer.\n\n" ); 00083 $baseTime = getMicroTime(); 00084 print "Processing $numRows rows from links table...\n"; 00085 performanceLog( "Processing $numRows rows from links table...\n" ); 00086 performanceLog( "rows inserted vs seconds elapsed:\n" ); 00087 00088 for ($rowOffset = $initialRowOffset; $rowOffset < $numRows; $rowOffset += $linksConvInsertInterval) { 00089 $sqlRead = "SELECT * FROM links ".wfLimitResult($linksConvInsertInterval,$rowOffset); 00090 $res = wfQuery($sqlRead, DB_READ); 00091 if ( $noKeys ) { 00092 $sqlWrite = array("INSERT INTO links_temp(l_from,l_to) VALUES "); 00093 } else { 00094 $sqlWrite = array("INSERT IGNORE INTO links_temp(l_from,l_to) VALUES "); 00095 } 00096 00097 $tuplesAdded = 0; # no tuples added to INSERT yet 00098 while ( $row = wfFetchObject($res) ) { 00099 $fromTitle = $row->l_from; 00100 if ( array_key_exists( $fromTitle, $ids ) ) { # valid title 00101 $from = $ids[$fromTitle]; 00102 $to = $row->l_to; 00103 if ( $tuplesAdded != 0 ) { 00104 $sqlWrite[] = ","; 00105 } 00106 $sqlWrite[] = "($from,$to)"; 00107 $tuplesAdded++; 00108 } else { # invalid title 00109 $numBadLinks++; 00110 } 00111 } 00112 wfFreeResult($res); 00113 #print "rowOffset: $rowOffset\ttuplesAdded: $tuplesAdded\tnumBadLinks: $numBadLinks\n"; 00114 if ( $tuplesAdded != 0 ) { 00115 if ($reportLinksConvProgress) { 00116 print "Inserting $tuplesAdded tuples into links_temp..."; 00117 } 00118 wfQuery( implode("",$sqlWrite) , DB_WRITE ); 00119 $totalTuplesInserted += $tuplesAdded; 00120 if ($reportLinksConvProgress) 00121 print " done. Total $totalTuplesInserted tuples inserted.\n"; 00122 performanceLog( $totalTuplesInserted . " " . (getMicroTime() - $baseTime) . "\n" ); 00123 } 00124 } 00125 print "$totalTuplesInserted valid titles and $numBadLinks invalid titles were processed.\n\n"; 00126 performanceLog( "$totalTuplesInserted valid titles and $numBadLinks invalid titles were processed.\n" ); 00127 performanceLog( "Total execution time: " . (getMicroTime() - $startTime) . " seconds.\n" ); 00128 if ( $logPerformance ) { fclose ( $fh ); } 00129 } 00130 #-------------------------------------------------------------------- 00131 00132 if ( $overwriteLinksTable ) { 00133 $dbConn = Database::newFromParams( $wgDBserver, $wgDBadminuser, $wgDBadminpassword, $wgDBname ); 00134 if (!($dbConn->isOpen())) { 00135 print "Opening connection to database failed.\n"; 00136 return; 00137 } 00138 # Check for existing links_backup, and delete it if it exists. 00139 print "Dropping backup links table if it exists..."; 00140 $dbConn->query( "DROP TABLE IF EXISTS links_backup", DB_WRITE); 00141 print " done.\n"; 00142 00143 # Swap in the new table, and move old links table to links_backup 00144 print "Swapping tables 'links' to 'links_backup'; 'links_temp' to 'links'..."; 00145 $dbConn->query( "RENAME TABLE links TO links_backup, links_temp TO links", DB_WRITE ); 00146 print " done.\n\n"; 00147 00148 $dbConn->close(); 00149 print "Conversion complete. The old table remains at links_backup;\n"; 00150 print "delete at your leisure.\n"; 00151 } else { 00152 print "Conversion complete. The converted table is at links_temp;\n"; 00153 print "the original links table is unchanged.\n"; 00154 } 00155 } 00156 00157 #-------------------------------------------------------------------- 00158 00159 function createTempTable() { 00160 global $wgDBserver, $wgDBadminuser, $wgDBadminpassword, $wgDBname; 00161 global $noKeys; 00162 $dbConn = Database::newFromParams( $wgDBserver, $wgDBadminuser, $wgDBadminpassword, $wgDBname ); 00163 00164 if (!($dbConn->isOpen())) { 00165 print "Opening connection to database failed.\n"; 00166 return; 00167 } 00168 00169 print "Dropping temporary links table if it exists..."; 00170 $dbConn->query( "DROP TABLE IF EXISTS links_temp", DB_WRITE); 00171 print " done.\n"; 00172 00173 print "Creating temporary links table..."; 00174 if ( $noKeys ) { 00175 $dbConn->query( "CREATE TABLE links_temp ( " . 00176 "l_from int(8) unsigned NOT NULL default '0', " . 00177 "l_to int(8) unsigned NOT NULL default '0')", DB_WRITE); 00178 } else { 00179 $dbConn->query( "CREATE TABLE links_temp ( " . 00180 "l_from int(8) unsigned NOT NULL default '0', " . 00181 "l_to int(8) unsigned NOT NULL default '0', " . 00182 "UNIQUE KEY l_from(l_from,l_to), " . 00183 "KEY (l_to))", DB_WRITE); 00184 } 00185 print " done.\n\n"; 00186 } 00187 00188 function performanceLog( $text ) { 00189 global $logPerformance, $fh; 00190 if ( $logPerformance ) { 00191 fwrite( $fh, $text ); 00192 } 00193 } 00194 00195 function getMicroTime() { # return time in seconds, with microsecond accuracy 00196 list($usec, $sec) = explode(" ", microtime()); 00197 return ((float)$usec + (float)$sec); 00198 } 00199 00200 00201 00202 ?>