Main Page | Namespace List | Class Hierarchy | Class List | File List | Class Members | File Members | Related Pages

importUseModWiki.php

Go to the documentation of this file.
00001 <?php 00002 00003 /* 00004 Import data from a UseModWiki into a PediaWiki wiki 00005 2003-02-09 Brion VIBBER <brion@pobox.com> 00006 Based loosely on Magnus's code from 2001-2002 00007 00008 Updated limited version to get something working temporarily 00009 2003-10-09 00010 Be sure to run the link & index rebuilding scripts! 00011 00012 Some more munging for charsets etc 00013 2003-11-28 00014 00015 */ 00016 00017 /* Set these correctly! */ 00018 $wgImportEncoding = "CP1252"; /* We convert all to UTF-8 */ 00019 $wgRootDirectory = "/home/usemod/wiki-ia/lib-http/db/wiki"; 00020 00021 /* globals */ 00022 $wgFieldSeparator = "\xb3"; # Some wikis may use different char 00023 $FS = $wgFieldSeparator ; 00024 $FS1 = $FS."1" ; 00025 $FS2 = $FS."2" ; 00026 $FS3 = $FS."3" ; 00027 00028 $conversiontime = wfTimestampNow(); # Conversions will be marked with this timestamp 00029 $usercache = array(); 00030 00031 wfSeedRandom(); 00032 importPages(); 00033 00034 # ------------------------------------------------------------------------------ 00035 00036 function importPages() 00037 { 00038 global $wgRootDirectory; 00039 00040 $letters = array( 00041 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 00042 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 00043 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'other' ); 00044 foreach( $letters as $letter ) { 00045 $dir = "$wgRootDirectory/page/$letter"; 00046 if( is_dir( $dir ) ) 00047 importPageDirectory( $dir ); 00048 } 00049 } 00050 00051 function importPageDirectory( $dir, $prefix = "" ) 00052 { 00053 echo "\n-- Checking page directory $dir\n"; 00054 $mydir = opendir( $dir ); 00055 while( $entry = readdir( $mydir ) ) { 00056 if( preg_match( '/^(.+)\.db$/', $entry, $m ) ) { 00057 echo importPage( $prefix . $m[1] ); 00058 } else { 00059 if( is_dir( "$dir/$entry" ) ) { 00060 if( $entry != '.' && $entry != '..' ) { 00061 importPageDirectory( "$dir/$entry", "$entry/" ); 00062 } 00063 } else { 00064 echo "-- File '$entry' doesn't seem to contain an article. Skipping.\n"; 00065 } 00066 } 00067 } 00068 } 00069 00070 00071 # ------------------------------------------------------------------------------ 00072 00073 /* fetch_ functions 00074 Grab a given item from the database 00075 */ 00076 function fetchUser( $uid ) 00077 { 00078 die ("fetchUser not implemented" ); 00079 00080 global $FS,$FS2,$FS3, $wgRootDirectory; 00081 00082 $fname = $wgRootDirectory . "/page/" . $title; 00083 if( !file_exists( $fname ) ) return false; 00084 00085 $data = splitHash( implode( "", file( $fname ) ) ); 00086 # enough? 00087 00088 return $data; 00089 } 00090 00091 function useModFilename( $title ) { 00092 $c = substr( $title, 0, 1 ); 00093 if(preg_match( '/[A-Z]/', $c ) ) { 00094 return "$c/$title"; 00095 } 00096 return "other/$title"; 00097 } 00098 00099 function fetchPage( $title ) 00100 { 00101 global $FS,$FS1,$FS2,$FS3, $wgRootDirectory; 00102 00103 $fname = $wgRootDirectory . "/page/" . useModFilename( $title ) . ".db"; 00104 if( !file_exists( $fname ) ) { 00105 die( "Couldn't open file '$fname' for page '$title'.\n" ); 00106 } 00107 00108 $page = splitHash( $FS1, file_get_contents( $fname ) ); 00109 $section = splitHash( $FS2, $page["text_default"] ); 00110 $text = splitHash( $FS3, $section["data"] ); 00111 00112 return array2object( array( "text" => $text["text"] , "summary" => $text["summary"] , 00113 "minor" => $text["minor"] , "ts" => $section["ts"] , 00114 "username" => $section["username"] , "host" => $section["host"] ) ); 00115 } 00116 00117 function fetchKeptPages( $title ) 00118 { 00119 global $FS,$FS1,$FS2,$FS3, $wgRootDirectory, $wgTimezoneCorrection; 00120 00121 $fname = $wgRootDirectory . "/keep/" . useModFilename( $title ) . ".kp"; 00122 if( !file_exists( $fname ) ) return array(); 00123 00124 $keptlist = explode( $FS1, file_get_contents( $fname ) ); 00125 array_shift( $keptlist ); # Drop the junk at beginning of file 00126 00127 $revisions = array(); 00128 foreach( $keptlist as $rev ) { 00129 $section = splitHash( $FS2, $rev ); 00130 $text = splitHash( $FS3, $section["data"] ); 00131 if ( $text["text"] && $text["minor"] != "" && ( $section["ts"]*1 > 0 ) ) { 00132 array_push( $revisions, array2object( array ( "text" => $text["text"] , "summary" => $text["summary"] , 00133 "minor" => $text["minor"] , "ts" => $section["ts"] , 00134 "username" => $section["username"] , "host" => $section["host"] ) ) ); 00135 } else { 00136 echo "-- skipped a bad old revision\n"; 00137 } 00138 } 00139 return $revisions; 00140 } 00141 00142 function splitHash ( $sep , $str ) { 00143 $temp = explode ( $sep , $str ) ; 00144 $ret = array () ; 00145 for ( $i = 0; $i+1 < count ( $temp ) ; $i++ ) { 00146 $ret[$temp[$i]] = $temp[++$i] ; 00147 } 00148 return $ret ; 00149 } 00150 00151 00152 /* import_ functions 00153 Take a fetched item and produce SQL 00154 */ 00155 00156 /* importUser 00157 $uid is the UseMod user id number. 00158 The new ones will be assigned arbitrarily and are for internal use only. 00159 00160 THIS IS DELAYED SINCE PUBLIC DUMPS DONT INCLUDE USER DIR 00161 */ 00162 function importUser( $uid ) 00163 { 00164 global $last_uid, $user_list, $wgTimestampCorrection; 00165 die("importUser NYI"); 00166 return ""; 00167 00168 $stuff = fetchUser( $uid ); 00169 $last_uid++; 00170 00171 $name = wfStrencode( $stuff->username ); 00172 $hash = md5hash( $stuff->password ); # Doable? 00173 $tzoffset = $stuff['tzoffset'] - ($wgTimestampCorrection / 3600); # -8 to 0; +9 to +1 00174 $hideminor = ($stuff['rcall'] ? 0 : 1); 00175 $options = "cols={$stuff['editcols']} 00176 rows={$stuff['editrows']} 00177 rcdays={$stuff['rcdays']} 00178 timecorrection={$tzoffset} 00179 hideminor={$hideminor} 00180 "; 00181 00182 $sql = "INSERT 00183 INTO user (user_id,user_name,user_password,user_options) 00184 VALUES ({$last_uid},'{$name}','{$hash}','{$options}');\n"; 00185 return $sql; 00186 } 00187 00188 function checkUserCache( $name, $host ) 00189 { 00190 global $usercache; 00191 00192 if( $name ) { 00193 if( in_array( $name, $usercache ) ) { 00194 $userid = $usercache[$name]; 00195 } else { 00196 # If we haven't imported user accounts 00197 $userid = 0; 00198 } 00199 $username = wfStrencode( $name ); 00200 } else { 00201 $userid = 0; 00202 $username = wfStrencode( $host ); 00203 } 00204 return array( $userid, $username ); 00205 } 00206 00207 function importPage( $title ) 00208 { 00209 global $usercache; 00210 global $conversiontime; 00211 00212 echo "\n-- Importing page $title\n"; 00213 $page = fetchPage( $title ); 00214 00215 $newtitle = wfStrencode( recodeText( $title ) ); 00216 $namespace = 0; 00217 00218 # Current revision: 00219 $text = wfStrencode( recodeText( $page->text ) ); 00220 $comment = wfStrencode( recodeText( $page->summary ) ); 00221 $minor = ($page->minor ? 1 : 0); 00222 list( $userid, $username ) = checkUserCache( $page->username, $page->host ); 00223 $username = wfStrencode( recodeText( $username ) ); 00224 $timestamp = wfUnix2Timestamp( $page->ts ); 00225 $redirect = ( preg_match( '/^#REDIRECT/', $page->text ) ? 1 : 0 ); 00226 $random = mt_rand() / mt_getrandmax(); 00227 $inverse = wfInvertTimestamp( $timestamp ); 00228 $sql = " 00229 INSERT 00230 INTO cur (cur_namespace,cur_title,cur_text,cur_comment,cur_user,cur_user_text,cur_timestamp,inverse_timestamp,cur_touched,cur_minor_edit,cur_is_redirect,cur_random) VALUES 00231 ($namespace,'$newtitle','$text','$comment',$userid,'$username','$timestamp','$inverse','$conversiontime',$minor,$redirect,$random);\n"; 00232 00233 # History 00234 $revisions = fetchKeptPages( $title ); 00235 if(count( $revisions ) == 0 ) { 00236 return $sql; 00237 } 00238 00239 $any = false; 00240 $sql .= "INSERT 00241 INTO old (old_namespace,old_title,old_text,old_comment,old_user,old_user_text,old_timestamp,inverse_timestamp,old_minor_edit) VALUES\n"; 00242 foreach( $revisions as $rev ) { 00243 $text = wfStrencode( recodeText( $rev->text ) ); 00244 $minor = ($rev->minor ? 1 : 0); 00245 list( $userid, $username ) = checkUserCache( $rev->username, $rev->host ); 00246 $username = wfStrencode( recodeText( $username ) ); 00247 $timestamp = wfUnix2Timestamp( $rev->ts ); 00248 $inverse = wfInvertTimestamp( $timestamp ); 00249 $comment = wfStrencode( recodeText( $rev->summary ) ); 00250 00251 if($any) $sql .= ","; 00252 $sql .= "\n\t($namespace,'$newtitle','$text','$comment',$userid,'$username','$timestamp','$inverse',$minor)"; 00253 $any = true; 00254 } 00255 $sql .= ";\n\n"; 00256 return $sql; 00257 } 00258 00259 # Whee! 00260 function recodeText( $string ) { 00261 global $wgImportEncoding; 00262 # For currently latin-1 wikis 00263 $string = str_replace( "\r\n", "\n", $string ); 00264 $string = iconv( $wgImportEncoding, "UTF-8", $string ); 00265 $string = wfMungeToUtf8( $string ); # Any old &#1234; stuff 00266 return $string; 00267 } 00268 00269 function wfUtf8Sequence($codepoint) { 00270 if($codepoint < 0x80) return chr($codepoint); 00271 if($codepoint < 0x800) return chr($codepoint >> 6 & 0x3f | 0xc0) . 00272 chr($codepoint & 0x3f | 0x80); 00273 if($codepoint < 0x10000) return chr($codepoint >> 12 & 0x0f | 0xe0) . 00274 chr($codepoint >> 6 & 0x3f | 0x80) . 00275 chr($codepoint & 0x3f | 0x80); 00276 if($codepoint < 0x100000) return chr($codepoint >> 18 & 0x07 | 0xf0) . # Double-check this 00277 chr($codepoint >> 12 & 0x3f | 0x80) . 00278 chr($codepoint >> 6 & 0x3f | 0x80) . 00279 chr($codepoint & 0x3f | 0x80); 00280 # Doesn't yet handle outside the BMP 00281 return "&#$codepoint;"; 00282 } 00283 00284 function wfMungeToUtf8($string) { 00285 $string = preg_replace ( '/&#([0-9]+);/e', 'wfUtf8Sequence($1)', $string ); 00286 $string = preg_replace ( '/&#x([0-9a-f]+);/ie', 'wfUtf8Sequence(0x$1)', $string ); 00287 # Should also do named entities here 00288 return $string; 00289 } 00290 00291 function wfStrencode( $string ) { 00292 return mysql_escape_string( $string ); 00293 } 00294 00295 function wfUnix2Timestamp( $unixtime ) { 00296 return gmdate( "YmdHis", $unixtime ); 00297 } 00298 00299 function wfTimestamp2Unix( $ts ) 00300 { 00301 return gmmktime( ( (int)substr( $ts, 8, 2) ), 00302 (int)substr( $ts, 10, 2 ), (int)substr( $ts, 12, 2 ), 00303 (int)substr( $ts, 4, 2 ), (int)substr( $ts, 6, 2 ), 00304 (int)substr( $ts, 0, 4 ) ); 00305 } 00306 00307 function wfTimestampNow() { 00308 # return NOW 00309 return gmdate( "YmdHis" ); 00310 } 00311 00312 # Sorting hack for MySQL 3, which doesn't use index sorts for DESC 00313 function wfInvertTimestamp( $ts ) { 00314 return strtr( 00315 $ts, 00316 "0123456789", 00317 "9876543210" 00318 ); 00319 } 00320 00321 function wfSeedRandom() 00322 { 00323 $seed = hexdec(substr(md5(microtime()),-8)) & 0x7fffffff; 00324 mt_srand( $seed ); 00325 $wgRandomSeeded = true; 00326 } 00327 00328 function array2object( $arr ) { 00329 $o = (object)0; 00330 foreach( $arr as $x => $y ) { 00331 $o->$x = $y; 00332 } 00333 return $o; 00334 } 00335 00336 ?>

Generated on Tue Jun 29 23:40:03 2004 for Mediawiki by doxygen 1.3.7