Mediawiki: Parser.php Source File

00001 <?php 00002 00003 // require_once('Tokenizer.php'); 00004 00005 # PHP Parser 00006 # 00007 # Processes wiki markup 00008 # 00009 # There are two main entry points into the Parser class: parse() and preSaveTransform(). 00010 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup. 00011 # 00012 # Globals used: 00013 # objects: $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser 00014 # 00015 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away! 00016 # 00017 # settings: $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*, 00018 # $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*, 00019 # $wgLocaltimezone 00020 # 00021 # * only within ParserOptions 00022 # 00023 # 00024 #---------------------------------------- 00025 # Variable substitution O(N^2) attack 00026 #----------------------------------------- 00027 # Without countermeasures, it would be possible to attack the parser by saving a page 00028 # filled with a large number of inclusions of large pages. The size of the generated 00029 # page would be proportional to the square of the input size. Hence, we limit the number 00030 # of inclusions of any given page, thus bringing any attack back to O(N). 00031 # 00032 00033 define( "MAX_INCLUDE_REPEAT", 5 ); 00034 00035 # Allowed values for $mOutputType 00036 define( "OT_HTML", 1 ); 00037 define( "OT_WIKI", 2 ); 00038 define( "OT_MSG", 3 ); 00039 00040 # string parameter for extractTags which will cause it 00041 # to strip HTML comments in addition to regular 00042 # <XML>-style tags. This should not be anything we 00043 # may want to use in wikisyntax 00044 define( "STRIP_COMMENTS", "HTMLCommentStrip" ); 00045 00046 # prefix for escaping, used in two functions at least 00047 define( "UNIQ_PREFIX", "NaodW29"); 00048 00049 class Parser 00050 { 00051 # Persistent: 00052 var $mTagHooks; 00053 00054 # Cleared with clearState(): 00055 var $mOutput, $mAutonumber, $mDTopen, $mStripState = array(); 00056 var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre; 00057 00058 # Temporary: 00059 var $mOptions, $mTitle, $mOutputType; 00060 00061 function Parser() { 00062 $this->mTagHooks = array(); 00063 $this->clearState(); 00064 } 00065 00066 function clearState() { 00067 $this->mOutput = new ParserOutput; 00068 $this->mAutonumber = 0; 00069 $this->mLastSection = ""; 00070 $this->mDTopen = false; 00071 $this->mVariables = false; 00072 $this->mIncludeCount = array(); 00073 $this->mStripState = array(); 00074 $this->mArgStack = array(); 00075 $this->mInPre = false; 00076 } 00077 00078 # First pass--just handle <nowiki> sections, pass the rest off 00079 # to internalParse() which does all the real work. 00080 # 00081 # Returns a ParserOutput 00082 # 00083 function parse( $text, &$title, $options, $linestart = true, $clearState = true ) { 00084 global $wgUseTidy; 00085 $fname = "Parser::parse"; 00086 wfProfileIn( $fname ); 00087 00088 if ( $clearState ) { 00089 $this->clearState(); 00090 } 00091 00092 $this->mOptions = $options; 00093 $this->mTitle =& $title; 00094 $this->mOutputType = OT_HTML; 00095 00096 $stripState = NULL; 00097 $text = $this->strip( $text, $this->mStripState ); 00098 $text = $this->internalParse( $text, $linestart ); 00099 $text = $this->unstrip( $text, $this->mStripState ); 00100 # Clean up special characters, only run once, next-to-last before doBlockLevels 00101 if(!$wgUseTidy) { 00102 $fixtags = array( 00103 # french spaces, last one Guillemet-left 00104 # only if there is something before the space 00105 '/(.) (\\?|:|!|\\302\\273)/i' => '\\1 \\2', 00106 # french spaces, Guillemet-right 00107 "/(\\302\\253) /i"=>"\\1 ", 00108 '/<hr *>/i' => '<hr />', 00109 '/<br *>/i' => '<br />', 00110 '/<center *>/i' => '<div class="center">', 00111 '/<\\/center *>/i' => '</div>', 00112 # Clean up spare ampersands; note that we probably ought to be 00113 # more careful about named entities. 00114 '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&' 00115 ); 00116 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text ); 00117 } else { 00118 $fixtags = array( 00119 # french spaces, last one Guillemet-left 00120 '/ (\\?|:|!|\\302\\273)/i' => ' \\1', 00121 # french spaces, Guillemet-right 00122 '/(\\302\\253) /i' => '\\1 ', 00123 '/<center *>/i' => '<div class="center">', 00124 '/<\\/center *>/i' => '</div>' 00125 ); 00126 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text ); 00127 } 00128 # only once and last 00129 $text = $this->doBlockLevels( $text, $linestart ); 00130 $text = $this->unstripNoWiki( $text, $this->mStripState ); 00131 if($wgUseTidy) { 00132 $text = $this->tidy($text); 00133 } 00134 $this->mOutput->setText( $text ); 00135 wfProfileOut( $fname ); 00136 return $this->mOutput; 00137 } 00138 00139 /* static */ function getRandomString() { 00140 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff)); 00141 } 00142 00143 # Replaces all occurrences of <$tag>content</$tag> in the text 00144 # with a random marker and returns the new text. the output parameter 00145 # $content will be an associative array filled with data on the form 00146 # $unique_marker => content. 00147 00148 # If $content is already set, the additional entries will be appended 00149 00150 # If $tag is set to STRIP_COMMENTS, the function will extract 00151 #  00152 00153 /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){ 00154 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString(); 00155 if ( !$content ) { 00156 $content = array( ); 00157 } 00158 $n = 1; 00159 $stripped = ''; 00160 00161 while ( '' != $text ) { 00162 if($tag==STRIP_COMMENTS) { 00163 $p = preg_split( '//i', $p[1], 2 ); 00173 } else { 00174 $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 ); 00175 } 00176 $marker = $rnd . sprintf('%08X', $n++); 00177 $content[$marker] = $q[0]; 00178 $stripped .= $marker; 00179 $text = $q[1]; 00180 } 00181 } 00182 return $stripped; 00183 } 00184 00185 # Strips and renders <nowiki>, <pre>, <math>, <hiero> 00186 # If $render is set, performs necessary rendering operations on plugins 00187 # Returns the text, and fills an array with data needed in unstrip() 00188 # If the $state is already a valid strip state, it adds to the state 00189 00190 # When $stripcomments is set, HTML comments  00191 # will be stripped in addition to other tags. This is important 00192 # for section editing, where these comments cause confusion when 00193 # counting the sections in the wikisource 00194 function strip( $text, &$state, $stripcomments = false ) { 00195 $render = ($this->mOutputType == OT_HTML); 00196 $nowiki_content = array(); 00197 $math_content = array(); 00198 $pre_content = array(); 00199 $comment_content = array(); 00200 $ext_content = array(); 00201 00202 # Replace any instances of the placeholders 00203 $uniq_prefix = UNIQ_PREFIX; 00204 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text ); 00205 00206 00207 # nowiki 00208 $text = Parser::extractTags('nowiki', $text, $nowiki_content, $uniq_prefix); 00209 foreach( $nowiki_content as $marker => $content ){ 00210 if( $render ){ 00211 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content ); 00212 } else { 00213 $nowiki_content[$marker] = "<nowiki>$content</nowiki>"; 00214 } 00215 } 00216 00217 # math 00218 $text = Parser::extractTags('math', $text, $math_content, $uniq_prefix); 00219 foreach( $math_content as $marker => $content ){ 00220 if( $render ) { 00221 if( $this->mOptions->getUseTeX() ) { 00222 $math_content[$marker] = renderMath( $content ); 00223 } else { 00224 $math_content[$marker] = "<math>$content<math>"; 00225 } 00226 } else { 00227 $math_content[$marker] = "<math>$content</math>"; 00228 } 00229 } 00230 00231 # pre 00232 $text = Parser::extractTags('pre', $text, $pre_content, $uniq_prefix); 00233 foreach( $pre_content as $marker => $content ){ 00234 if( $render ){ 00235 $pre_content[$marker] = '<pre>' . wfEscapeHTMLTagsOnly( $content ) . '</pre>'; 00236 } else { 00237 $pre_content[$marker] = "<pre>$content</pre>"; 00238 } 00239 } 00240 00241 # Comments 00242 if($stripcomments) { 00243 $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix); 00244 foreach( $comment_content as $marker => $content ){ 00245 $comment_content[$marker] = ""; 00246 } 00247 } 00248 00249 # Extensions 00250 foreach ( $this->mTagHooks as $tag => $callback ) { 00251 $ext_contents[$tag] = array(); 00252 $text = Parser::extractTags( $tag, $text, $ext_content[$tag], $uniq_prefix ); 00253 foreach( $ext_content[$tag] as $marker => $content ) { 00254 if ( $render ) { 00255 $ext_content[$tag][$marker] = $callback( $content ); 00256 } else { 00257 $ext_content[$tag][$marker] = "<$tag>$content</$tag>"; 00258 } 00259 } 00260 } 00261 00262 # Merge state with the pre-existing state, if there is one 00263 if ( $state ) { 00264 $state['nowiki'] = $state['nowiki'] + $nowiki_content; 00265 $state['math'] = $state['math'] + $math_content; 00266 $state['pre'] = $state['pre'] + $pre_content; 00267 $state['comment'] = $state['comment'] + $comment_content; 00268 00269 foreach( $ext_content as $tag => $array ) { 00270 if ( array_key_exists( $tag, $state ) ) { 00271 $state[$tag] = $state[$tag] + $array; 00272 } 00273 } 00274 } else { 00275 $state = array( 00276 'nowiki' => $nowiki_content, 00277 'math' => $math_content, 00278 'pre' => $pre_content, 00279 'comment' => $comment_content, 00280 ) + $ext_content; 00281 } 00282 return $text; 00283 } 00284 00285 # always call unstripNoWiki() after this one 00286 function unstrip( $text, &$state ) { 00287 # Must expand in reverse order, otherwise nested tags will be corrupted 00288 $contentDict = end( $state ); 00289 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) { 00290 if( key($state) != 'nowiki') { 00291 for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) { 00292 $text = str_replace( key( $contentDict ), $content, $text ); 00293 } 00294 } 00295 } 00296 00297 return $text; 00298 } 00299 # always call this after unstrip() to preserve the order 00300 function unstripNoWiki( $text, &$state ) { 00301 # Must expand in reverse order, otherwise nested tags will be corrupted 00302 for ( $content = end($state['nowiki']); $content !== false; $content = prev( $state['nowiki'] ) ) { 00303 $text = str_replace( key( $state['nowiki'] ), $content, $text ); 00304 } 00305 00306 return $text; 00307 } 00308 00309 # Add an item to the strip state 00310 # Returns the unique tag which must be inserted into the stripped text 00311 # The tag will be replaced with the original text in unstrip() 00312 00313 function insertStripItem( $text, &$state ) { 00314 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString(); 00315 if ( !$state ) { 00316 $state = array( 00317 'nowiki' => array(), 00318 'math' => array(), 00319 'pre' => array() 00320 ); 00321 } 00322 $state['item'][$rnd] = $text; 00323 return $rnd; 00324 } 00325 00326 # categoryMagic 00327 # generate a list of subcategories and pages for a category 00328 # depending on wfMsg("usenewcategorypage") it either calls the new 00329 # or the old code. The new code will not work properly for some 00330 # languages due to sorting issues, so they might want to turn it 00331 # off. 00332 function categoryMagic() { 00333 $msg = wfMsg('usenewcategorypage'); 00334 if ( '0' == @$msg[0] ) 00335 { 00336 return $this->oldCategoryMagic(); 00337 } else { 00338 return $this->newCategoryMagic(); 00339 } 00340 } 00341 00342 # This method generates the list of subcategories and pages for a category 00343 function oldCategoryMagic () { 00344 global $wgLang , $wgUser ; 00345 if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all 00346 00347 $cns = Namespace::getCategory() ; 00348 if ( $this->mTitle->getNamespace() != $cns ) return "" ; # This ain't a category page 00349 00350 $r = "<br style=\"clear:both;\"/>\n"; 00351 00352 00353 $sk =& $wgUser->getSkin() ; 00354 00355 $articles = array() ; 00356 $children = array() ; 00357 $data = array () ; 00358 $id = $this->mTitle->getArticleID() ; 00359 00360 # FIXME: add limits 00361 $t = wfStrencode( $this->mTitle->getDBKey() ); 00362 $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,categorylinks WHERE cl_to='$t' AND cl_from=cur_id ORDER BY cl_sortkey" ; 00363 $res = wfQuery ( $sql, DB_READ ) ; 00364 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ; 00365 00366 # For all pages that link to this category 00367 foreach ( $data AS $x ) 00368 { 00369 $t = $wgLang->getNsText ( $x->cur_namespace ) ; 00370 if ( $t != "" ) $t .= ":" ; 00371 $t .= $x->cur_title ; 00372 00373 if ( $x->cur_namespace == $cns ) { 00374 array_push ( $children , $sk->makeLink ( $t ) ) ; # Subcategory 00375 } else { 00376 array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in this category 00377 } 00378 } 00379 wfFreeResult ( $res ) ; 00380 00381 # Showing subcategories 00382 if ( count ( $children ) > 0 ) { 00383 $r .= '<h2>'.wfMsg('subcategories')."</h2>\n" ; 00384 $r .= implode ( ', ' , $children ) ; 00385 } 00386 00387 # Showing pages in this category 00388 if ( count ( $articles ) > 0 ) { 00389 $ti = $this->mTitle->getText() ; 00390 $h = wfMsg( 'category_header', $ti ); 00391 $r .= "<h2>{$h}</h2>\n" ; 00392 $r .= implode ( ', ' , $articles ) ; 00393 } 00394 00395 00396 return $r ; 00397 } 00398 00399 00400 00401 function newCategoryMagic () { 00402 global $wgLang , $wgUser ; 00403 if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all 00404 00405 $cns = Namespace::getCategory() ; 00406 if ( $this->mTitle->getNamespace() != $cns ) return '' ; # This ain't a category page 00407 00408 $r = "<br style=\"clear:both;\"/>\n"; 00409 00410 00411 $sk =& $wgUser->getSkin() ; 00412 00413 $articles = array() ; 00414 $articles_start_char = array(); 00415 $children = array() ; 00416 $children_start_char = array(); 00417 $data = array () ; 00418 $id = $this->mTitle->getArticleID() ; 00419 00420 # FIXME: add limits 00421 $t = wfStrencode( $this->mTitle->getDBKey() ); 00422 $sql = "SELECT DISTINCT cur_title,cur_namespace,cl_sortkey FROM 00423 cur,categorylinks WHERE cl_to='$t' AND cl_from=cur_id ORDER BY 00424 cl_sortkey" ; 00425 $res = wfQuery ( $sql, DB_READ ) ; 00426 while ( $x = wfFetchObject ( $res ) ) 00427 { 00428 $t = $ns = $wgLang->getNsText ( $x->cur_namespace ) ; 00429 if ( $t != '' ) $t .= ':' ; 00430 $t .= $x->cur_title ; 00431 00432 if ( $x->cur_namespace == $cns ) { 00433 $ctitle = str_replace( '_',' ',$x->cur_title ); 00434 array_push ( $children, $sk->makeKnownLink ( $t, $ctitle ) ) ; # Subcategory 00435 00436 // If there's a link from Category:A to Category:B, the sortkey of the resulting 00437 // entry in the categorylinks table is Category:A, not A, which it SHOULD be. 00438 // Workaround: If sortkey == "Category:".$title, than use $title for sorting, 00439 // else use sortkey... 00440 if ( ($ns.":".$ctitle) == $x->cl_sortkey ) { 00441 array_push ( $children_start_char, $wgLang->firstChar( $x->cur_title ) ); 00442 } else { 00443 array_push ( $children_start_char, $wgLang->firstChar( $x->cl_sortkey ) ) ; 00444 } 00445 } else { 00446 array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in this category 00447 array_push ( $articles_start_char, $wgLang->firstChar( $x->cl_sortkey ) ) ; 00448 } 00449 } 00450 wfFreeResult ( $res ) ; 00451 00452 $ti = $this->mTitle->getText() ; 00453 00454 # Don't show subcategories section if there are none. 00455 if ( count ( $children ) > 0 ) 00456 { 00457 # Showing subcategories 00458 $r .= '<h2>' . wfMsg( 'subcategories' ) . "</h2>\n" 00459 . wfMsg( 'subcategorycount', count( $children ) ); 00460 if ( count ( $children ) > 6 ) { 00461 00462 // divide list into three equal chunks 00463 $chunk = (int) (count ( $children ) / 3); 00464 00465 // get and display header 00466 $r .= '<table width="100%"><tr valign="top">'; 00467 00468 $startChunk = 0; 00469 $endChunk = $chunk; 00470 00471 // loop through the chunks 00472 for($startChunk = 0, $endChunk = $chunk, $chunkIndex = 0; 00473 $chunkIndex < 3; 00474 $chunkIndex++, $startChunk = $endChunk, $endChunk += $chunk + 1) 00475 { 00476 00477 $r .= '<td><ul>'; 00478 // output all subcategories to category 00479 for ($index = $startChunk ; 00480 $index < $endChunk && $index < count($children); 00481 $index++ ) 00482 { 00483 // check for change of starting letter or begging of chunk 00484 if ( ($children_start_char[$index] != $children_start_char[$index - 1]) 00485 || ($index == $startChunk) ) 00486 { 00487 $r .= "</ul><h3>{$children_start_char[$index]}</h3>\n<ul>"; 00488 } 00489 00490 $r .= "<li>{$children[$index]}</li>"; 00491 } 00492 $r .= '</ul></td>'; 00493 00494 00495 } 00496 $r .= '</tr></table>'; 00497 } else { 00498 // for short lists of subcategories to category. 00499 00500 $r .= "<h3>{$children_start_char[0]}</h3>\n"; 00501 $r .= '<ul><li>'.$children[0].'</li>'; 00502 for ($index = 1; $index < count($children); $index++ ) 00503 { 00504 if ($children_start_char[$index] != $children_start_char[$index - 1]) 00505 { 00506 $r .= "</ul><h3>{$children_start_char[$index]}</h3>\n<ul>"; 00507 } 00508 00509 $r .= "<li>{$children[$index]}</li>"; 00510 } 00511 $r .= '</ul>'; 00512 } 00513 } # END of if ( count($children) > 0 ) 00514 00515 $r .= '<h2>' . wfMsg( 'category_header', $ti ) . "</h2>\n" . 00516 wfMsg( 'categoryarticlecount', count( $articles ) ); 00517 00518 # Showing articles in this category 00519 if ( count ( $articles ) > 6) { 00520 $ti = $this->mTitle->getText() ; 00521 00522 // divide list into three equal chunks 00523 $chunk = (int) (count ( $articles ) / 3); 00524 00525 // get and display header 00526 $r .= '<table width="100%"><tr valign="top">'; 00527 00528 // loop through the chunks 00529 for($startChunk = 0, $endChunk = $chunk, $chunkIndex = 0; 00530 $chunkIndex < 3; 00531 $chunkIndex++, $startChunk = $endChunk, $endChunk += $chunk + 1) 00532 { 00533 00534 $r .= '<td><ul>'; 00535 00536 // output all articles in category 00537 for ($index = $startChunk ; 00538 $index < $endChunk && $index < count($articles); 00539 $index++ ) 00540 { 00541 // check for change of starting letter or begging of chunk 00542 if ( ($articles_start_char[$index] != $articles_start_char[$index - 1]) 00543 || ($index == $startChunk) ) 00544 { 00545 $r .= "</ul><h3>{$articles_start_char[$index]}</h3>\n<ul>"; 00546 } 00547 00548 $r .= "<li>{$articles[$index]}</li>"; 00549 } 00550 $r .= '</ul></td>'; 00551 00552 00553 } 00554 $r .= '</tr></table>'; 00555 } elseif ( count ( $articles ) > 0) { 00556 // for short lists of articles in categories. 00557 $ti = $this->mTitle->getText() ; 00558 00559 $r .= '<h3>'.$articles_start_char[0]."</h3>\n"; 00560 $r .= '<ul><li>'.$articles[0].'</li>'; 00561 for ($index = 1; $index < count($articles); $index++ ) 00562 { 00563 if ($articles_start_char[$index] != $articles_start_char[$index - 1]) 00564 { 00565 $r .= "</ul><h3>{$articles_start_char[$index]}</h3>\n<ul>"; 00566 } 00567 00568 $r .= "<li>{$articles[$index]}</li>"; 00569 } 00570 $r .= '</ul>'; 00571 } 00572 00573 00574 return $r ; 00575 } 00576 00577 # Return allowed HTML attributes 00578 function getHTMLattrs () { 00579 $htmlattrs = array( # Allowed attributes--no scripting, etc. 00580 'title', 'align', 'lang', 'dir', 'width', 'height', 00581 'bgcolor', 'clear', /* BR */ 'noshade', /* HR */ 00582 'cite', /* BLOCKQUOTE, Q */ 'size', 'face', 'color', 00583 /* FONT */ 'type', 'start', 'value', 'compact', 00584 /* For various lists, mostly deprecated but safe */ 00585 'summary', 'width', 'border', 'frame', 'rules', 00586 'cellspacing', 'cellpadding', 'valign', 'char', 00587 'charoff', 'colgroup', 'col', 'span', 'abbr', 'axis', 00588 'headers', 'scope', 'rowspan', 'colspan', /* Tables */ 00589 'id', 'class', 'name', 'style' /* For CSS */ 00590 ); 00591 return $htmlattrs ; 00592 } 00593 00594 # Remove non approved attributes and javascript in css 00595 function fixTagAttributes ( $t ) { 00596 if ( trim ( $t ) == '' ) return '' ; # Saves runtime ;-) 00597 $htmlattrs = $this->getHTMLattrs() ; 00598 00599 # Strip non-approved attributes from the tag 00600 $t = preg_replace( 00601 '/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e', 00602 "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')", 00603 $t); 00604 # Strip javascript "expression" from stylesheets. Brute force approach: 00605 # If anythin offensive is found, all attributes of the HTML tag are dropped 00606 00607 if( preg_match( 00608 '/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is', 00609 wfMungeToUtf8( $t ) ) ) 00610 { 00611 $t=''; 00612 } 00613 00614 return trim ( $t ) ; 00615 } 00616 00617 # interface with html tidy, used if $wgUseTidy = true 00618 function tidy ( $text ) { 00619 global $wgTidyConf, $wgTidyBin, $wgTidyOpts; 00620 global $wgInputEncoding, $wgOutputEncoding; 00621 $fname = 'Parser::tidy'; 00622 wfProfileIn( $fname ); 00623 00624 $cleansource = ''; 00625 switch(strtoupper($wgOutputEncoding)) { 00626 case 'ISO-8859-1': 00627 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -latin1':' -raw'; 00628 break; 00629 case 'UTF-8': 00630 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -utf8':' -raw'; 00631 break; 00632 default: 00633 $wgTidyOpts .= ' -raw'; 00634 } 00635 00636 $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'. 00637 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'. 00638 '<head><title>test</title></head><body>'.$text.'</body></html>'; 00639 $descriptorspec = array( 00640 0 => array('pipe', 'r'), 00641 1 => array('pipe', 'w'), 00642 2 => array('file', '/dev/null', 'a') 00643 ); 00644 $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts", $descriptorspec, $pipes); 00645 if (is_resource($process)) { 00646 fwrite($pipes[0], $wrappedtext); 00647 fclose($pipes[0]); 00648 while (!feof($pipes[1])) { 00649 $cleansource .= fgets($pipes[1], 1024); 00650 } 00651 fclose($pipes[1]); 00652 $return_value = proc_close($process); 00653 } 00654 00655 wfProfileOut( $fname ); 00656 00657 if( $cleansource == '' && $text != '') { 00658 wfDebug( "Tidy error detected!\n" ); 00659 return $text . "\n\n"; 00660 } else { 00661 return $cleansource; 00662 } 00663 } 00664 00665 # parse the wiki syntax used to render tables 00666 function doTableStuff ( $t ) { 00667 $t = explode ( "\n" , $t ) ; 00668 $td = array () ; # Is currently a td tag open? 00669 $ltd = array () ; # Was it TD or TH? 00670 $tr = array () ; # Is currently a tr tag open? 00671 $ltr = array () ; # tr attributes 00672 foreach ( $t AS $k => $x ) 00673 { 00674 $x = trim ( $x ) ; 00675 $fc = substr ( $x , 0 , 1 ) ; 00676 if ( '{|' == substr ( $x , 0 , 2 ) ) 00677 { 00678 $t[$k] = "\n<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . '>' ; 00679 array_push ( $td , false ) ; 00680 array_push ( $ltd , '' ) ; 00681 array_push ( $tr , false ) ; 00682 array_push ( $ltr , '' ) ; 00683 } 00684 else if ( count ( $td ) == 0 ) { } # Don't do any of the following 00685 else if ( '|}' == substr ( $x , 0 , 2 ) ) 00686 { 00687 $z = "</table>\n" ; 00688 $l = array_pop ( $ltd ) ; 00689 if ( array_pop ( $tr ) ) $z = '</tr>' . $z ; 00690 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ; 00691 array_pop ( $ltr ) ; 00692 $t[$k] = $z ; 00693 } 00694 /* else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption 00695 { 00696 $z = trim ( substr ( $x , 2 ) ) ; 00697 $t[$k] = "<caption>{$z}</caption>\n" ; 00698 }*/ 00699 else if ( '|-' == substr ( $x , 0 , 2 ) ) # Allows for |--------------- 00700 { 00701 $x = substr ( $x , 1 ) ; 00702 while ( $x != '' && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ; 00703 $z = '' ; 00704 $l = array_pop ( $ltd ) ; 00705 if ( array_pop ( $tr ) ) $z = '</tr>' . $z ; 00706 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ; 00707 array_pop ( $ltr ) ; 00708 $t[$k] = $z ; 00709 array_push ( $tr , false ) ; 00710 array_push ( $td , false ) ; 00711 array_push ( $ltd , '' ) ; 00712 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ; 00713 } 00714 else if ( '|' == $fc || '!' == $fc || '|+' == substr ( $x , 0 , 2 ) ) # Caption 00715 { 00716 if ( '|+' == substr ( $x , 0 , 2 ) ) 00717 { 00718 $fc = '+' ; 00719 $x = substr ( $x , 1 ) ; 00720 } 00721 $after = substr ( $x , 1 ) ; 00722 if ( $fc == '!' ) $after = str_replace ( '!!' , '||' , $after ) ; 00723 $after = explode ( '||' , $after ) ; 00724 $t[$k] = '' ; 00725 foreach ( $after AS $theline ) 00726 { 00727 $z = '' ; 00728 if ( $fc != '+' ) 00729 { 00730 $tra = array_pop ( $ltr ) ; 00731 if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ; 00732 array_push ( $tr , true ) ; 00733 array_push ( $ltr , '' ) ; 00734 } 00735 00736 $l = array_pop ( $ltd ) ; 00737 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ; 00738 if ( $fc == '|' ) $l = 'td' ; 00739 else if ( $fc == '!' ) $l = 'th' ; 00740 else if ( $fc == '+' ) $l = 'caption' ; 00741 else $l = '' ; 00742 array_push ( $ltd , $l ) ; 00743 $y = explode ( '|' , $theline , 2 ) ; 00744 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ; 00745 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ; 00746 $t[$k] .= $y ; 00747 array_push ( $td , true ) ; 00748 } 00749 } 00750 } 00751 00752 # Closing open td, tr && table 00753 while ( count ( $td ) > 0 ) 00754 { 00755 if ( array_pop ( $td ) ) $t[] = '</td>' ; 00756 if ( array_pop ( $tr ) ) $t[] = '</tr>' ; 00757 $t[] = '</table>' ; 00758 } 00759 00760 $t = implode ( "\n" , $t ) ; 00761 # $t = $this->removeHTMLtags( $t ); 00762 return $t ; 00763 } 00764 00765 # Parses the text and adds the result to the strip state 00766 # Returns the strip tag 00767 function stripParse( $text, $newline, $args ) 00768 { 00769 $text = $this->strip( $text, $this->mStripState ); 00770 $text = $this->internalParse( $text, (bool)$newline, $args, false ); 00771 return $newline.$this->insertStripItem( $text, $this->mStripState ); 00772 } 00773 00774 function internalParse( $text, $linestart, $args = array(), $isMain=true ) { 00775 $fname = 'Parser::internalParse'; 00776 wfProfileIn( $fname ); 00777 00778 $text = $this->removeHTMLtags( $text ); 00779 $text = $this->replaceVariables( $text, $args ); 00780 00781 $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text ); 00782 00783 $text = $this->doHeadings( $text ); 00784 if($this->mOptions->getUseDynamicDates()) { 00785 global $wgDateFormatter; 00786 $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text ); 00787 } 00788 $text = $this->doAllQuotes( $text ); 00789 // $text = $this->doExponent( $text ); 00790 $text = $this->replaceExternalLinks( $text ); 00791 $text = $this->replaceInternalLinks ( $text ); 00792 $text = $this->replaceInternalLinks ( $text ); 00793 //$text = $this->doTokenizedParser ( $text ); 00794 $text = $this->doTableStuff ( $text ) ; 00795 $text = $this->magicISBN( $text ); 00796 $text = $this->magicRFC( $text ); 00797 $text = $this->formatHeadings( $text, $isMain ); 00798 $sk =& $this->mOptions->getSkin(); 00799 $text = $sk->transformContent( $text ); 00800 00801 if ( !isset ( $this->categoryMagicDone ) ) { 00802 $text .= $this->categoryMagic () ; 00803 $this->categoryMagicDone = true ; 00804 } 00805 00806 wfProfileOut( $fname ); 00807 return $text; 00808 } 00809 00810 # Parse ^^ tokens and return html 00811 /* private */ function doExponent ( $text ) 00812 { 00813 $fname = 'Parser::doExponent'; 00814 wfProfileIn( $fname); 00815 $text = preg_replace('/\^\^(.*)\^\^/','<small><sup>\\1</sup></small>', $text); 00816 wfProfileOut( $fname); 00817 return $text; 00818 } 00819 00820 # Parse headers and return html 00821 /* private */ function doHeadings( $text ) { 00822 $fname = 'Parser::doHeadings'; 00823 wfProfileIn( $fname ); 00824 for ( $i = 6; $i >= 1; --$i ) { 00825 $h = substr( '======', 0, $i ); 00826 $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m", 00827 "<h{$i}>\\1</h{$i}>\\2", $text ); 00828 } 00829 wfProfileOut( $fname ); 00830 return $text; 00831 } 00832 00833 /* private */ function doAllQuotes( $text ) { 00834 $fname = 'Parser::doAllQuotes'; 00835 wfProfileIn( $fname ); 00836 $outtext = ''; 00837 $lines = explode( "\n", $text ); 00838 foreach ( $lines as $line ) { 00839 $outtext .= $this->doQuotes ( '', $line, '' ) . "\n"; 00840 } 00841 $outtext = substr($outtext, 0,-1); 00842 wfProfileOut( $fname ); 00843 return $outtext; 00844 } 00845 00846 /* private */ function doQuotes( $pre, $text, $mode ) { 00847 if ( preg_match( "/^(.*)''(.*)$/sU", $text, $m ) ) { 00848 $m1_strong = ($m[1] == "") ? "" : "<strong>{$m[1]}</strong>"; 00849 $m1_em = ($m[1] == "") ? "" : "<em>{$m[1]}</em>"; 00850 if ( substr ($m[2], 0, 1) == '\'' ) { 00851 $m[2] = substr ($m[2], 1); 00852 if ($mode == 'em') { 00853 return $this->doQuotes ( $m[1], $m[2], ($m[1] == '') ? 'both' : 'emstrong' ); 00854 } else if ($mode == 'strong') { 00855 return $m1_strong . $this->doQuotes ( '', $m[2], '' ); 00856 } else if (($mode == 'emstrong') || ($mode == 'both')) { 00857 return $this->doQuotes ( '', $pre.$m1_strong.$m[2], 'em' ); 00858 } else if ($mode == 'strongem') { 00859 return "<strong>{$pre}{$m1_em}</strong>" . $this->doQuotes ( '', $m[2], 'em' ); 00860 } else { 00861 return $m[1] . $this->doQuotes ( '', $m[2], 'strong' ); 00862 } 00863 } else { 00864 if ($mode == 'strong') { 00865 return $this->doQuotes ( $m[1], $m[2], ($m[1] == '') ? 'both' : 'strongem' ); 00866 } else if ($mode == 'em') { 00867 return $m1_em . $this->doQuotes ( '', $m[2], '' ); 00868 } else if ($mode == 'emstrong') { 00869 return "<em>{$pre}{$m1_strong}</em>" . $this->doQuotes ( '', $m[2], 'strong' ); 00870 } else if (($mode == 'strongem') || ($mode == 'both')) { 00871 return $this->doQuotes ( '', $pre.$m1_em.$m[2], 'strong' ); 00872 } else { 00873 return $m[1] . $this->doQuotes ( '', $m[2], 'em' ); 00874 } 00875 } 00876 } else { 00877 $text_strong = ($text == '') ? '' : "<strong>{$text}</strong>"; 00878 $text_em = ($text == '') ? '' : "<em>{$text}</em>"; 00879 if ($mode == '') { 00880 return $pre . $text; 00881 } else if ($mode == 'em') { 00882 return $pre . $text_em; 00883 } else if ($mode == 'strong') { 00884 return $pre . $text_strong; 00885 } else if ($mode == 'strongem') { 00886 return (($pre == '') && ($text == '')) ? '' : "<strong>{$pre}{$text_em}</strong>"; 00887 } else { 00888 return (($pre == '') && ($text == '')) ? '' : "<em>{$pre}{$text_strong}</em>"; 00889 } 00890 } 00891 } 00892 00893 # Note: we have to do external links before the internal ones, 00894 # and otherwise take great care in the order of things here, so 00895 # that we don't end up interpreting some URLs twice. 00896 00897 /* private */ function replaceExternalLinks( $text ) { 00898 $fname = 'Parser::replaceExternalLinks'; 00899 wfProfileIn( $fname ); 00900 $text = $this->subReplaceExternalLinks( $text, 'http', true ); 00901 $text = $this->subReplaceExternalLinks( $text, 'https', true ); 00902 $text = $this->subReplaceExternalLinks( $text, 'ftp', false ); 00903 $text = $this->subReplaceExternalLinks( $text, 'irc', false ); 00904 $text = $this->subReplaceExternalLinks( $text, 'gopher', false ); 00905 $text = $this->subReplaceExternalLinks( $text, 'news', false ); 00906 $text = $this->subReplaceExternalLinks( $text, 'mailto', false ); 00907 wfProfileOut( $fname ); 00908 return $text; 00909 } 00910 00911 /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber ) { 00912 $unique = '4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3'; 00913 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF"; 00914 00915 # this is the list of separators that should be ignored if they 00916 # are the last character of an URL but that should be included 00917 # if they occur within the URL, e.g. "go to www.foo.com, where .." 00918 # in this case, the last comma should not become part of the URL, 00919 # but in "www.foo.com/123,2342,32.htm" it should. 00920 $sep = ",;\.:"; 00921 $fnc = 'A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF'; 00922 $images = 'gif|png|jpg|jpeg'; 00923 00924 # PLEASE NOTE: The curly braces { } are not part of the regex, 00925 # they are interpreted as part of the string (used to tell PHP 00926 # that the content of the string should be inserted there). 00927 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." . 00928 "((?i){$images})([^{$uc}]|$)/"; 00929 00930 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/"; 00931 $sk =& $this->mOptions->getSkin(); 00932 00933 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls 00934 $s = preg_replace( $e1, '\\1' . $sk->makeImage( "{$unique}:\\3" . 00935 '/\\4.\\5', '\\4.\\5' ) . '\\6', $s ); 00936 } 00937 $s = preg_replace( $e2, '\\1' . "<a href=\"{$unique}:\\3\"" . 00938 $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML( 00939 "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) . 00940 '</a>\\5', $s ); 00941 $s = str_replace( $unique, $protocol, $s ); 00942 00943 $a = explode( "[{$protocol}:", " " . $s ); 00944 $s = array_shift( $a ); 00945 $s = substr( $s, 1 ); 00946 00947 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD"; 00948 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD"; 00949 00950 foreach ( $a as $line ) { 00951 if ( preg_match( $e1, $line, $m ) ) { 00952 $link = "{$protocol}:{$m[1]}"; 00953 $trail = $m[2]; 00954 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; } 00955 else { $text = wfEscapeHTML( $link ); } 00956 } else if ( preg_match( $e2, $line, $m ) ) { 00957 $link = "{$protocol}:{$m[1]}"; 00958 $text = $m[2]; 00959 $trail = $m[3]; 00960 } else { 00961 $s .= "[{$protocol}:" . $line; 00962 continue; 00963 } 00964 if( $link == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $link ) ) { 00965 $paren = ''; 00966 } else { 00967 # Expand the URL for printable version 00968 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $link ) . "</i>)</span>"; 00969 } 00970 $la = $sk->getExternalLinkAttributes( $link, $text ); 00971 $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}"; 00972 00973 } 00974 return $s; 00975 } 00976 00977 00978 /* private */ function replaceInternalLinks( $s ) { 00979 global $wgLang, $wgLinkCache; 00980 global $wgNamespacesWithSubpages, $wgLanguageCode; 00981 static $fname = 'Parser::replaceInternalLinks' ; 00982 wfProfileIn( $fname ); 00983 00984 wfProfileIn( $fname.'-setup' ); 00985 static $tc = FALSE; 00986 # the % is needed to support urlencoded titles as well 00987 if ( !$tc ) { $tc = Title::legalChars() . '#%'; } 00988 $sk =& $this->mOptions->getSkin(); 00989 00990 $a = explode( '[[', ' ' . $s ); 00991 $s = array_shift( $a ); 00992 $s = substr( $s, 1 ); 00993 00994 # Match a link having the form [[namespace:link|alternate]]trail 00995 static $e1 = FALSE; 00996 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; } 00997 # Match the end of a line for a word that's not followed by whitespace, 00998 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched 00999 static $e2 = '/^(.*?)([a-zA-Z\x80-\xff]+)$/sD'; 01000 01001 $useLinkPrefixExtension = $wgLang->linkPrefixExtension(); 01002 # Special and Media are pseudo-namespaces; no pages actually exist in them 01003 static $image = FALSE; 01004 static $special = FALSE; 01005 static $media = FALSE; 01006 static $category = FALSE; 01007 if ( !$image ) { $image = Namespace::getImage(); } 01008 if ( !$special ) { $special = Namespace::getSpecial(); } 01009 if ( !$media ) { $media = Namespace::getMedia(); } 01010 if ( !$category ) { $category = Namespace::getCategory(); } 01011 01012 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() ); 01013 01014 if ( $useLinkPrefixExtension ) { 01015 if ( preg_match( $e2, $s, $m ) ) { 01016 $first_prefix = $m[2]; 01017 $s = $m[1]; 01018 } else { 01019 $first_prefix = false; 01020 } 01021 } else { 01022 $prefix = ''; 01023 } 01024 01025 wfProfileOut( $fname.'-setup' ); 01026 01027 foreach ( $a as $line ) { 01028 wfProfileIn( $fname.'-prefixhandling' ); 01029 if ( $useLinkPrefixExtension ) { 01030 if ( preg_match( $e2, $s, $m ) ) { 01031 $prefix = $m[2]; 01032 $s = $m[1]; 01033 } else { 01034 $prefix=''; 01035 } 01036 # first link 01037 if($first_prefix) { 01038 $prefix = $first_prefix; 01039 $first_prefix = false; 01040 } 01041 } 01042 wfProfileOut( $fname.'-prefixhandling' ); 01043 01044 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt 01045 $text = $m[2]; 01046 # fix up urlencoded title texts 01047 if(preg_match('/%/', $m[1] )) $m[1] = urldecode($m[1]); 01048 $trail = $m[3]; 01049 } else { # Invalid form; output directly 01050 $s .= $prefix . '[[' . $line ; 01051 continue; 01052 } 01053 01054 /* Valid link forms: 01055 Foobar -- normal 01056 :Foobar -- override special treatment of prefix (images, language links) 01057 /Foobar -- convert to CurrentPage/Foobar 01058 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text 01059 */ 01060 $c = substr($m[1],0,1); 01061 $noforce = ($c != ':'); 01062 if( $c == '/' ) { # subpage 01063 if(substr($m[1],-1,1)=='/') { # / at end means we don't want the slash to be shown 01064 $m[1]=substr($m[1],1,strlen($m[1])-2); 01065 $noslash=$m[1]; 01066 } else { 01067 $noslash=substr($m[1],1); 01068 } 01069 if(!empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()])) { # subpages allowed here 01070 $link = $this->mTitle->getPrefixedText(). '/' . trim($noslash); 01071 if( '' == $text ) { 01072 $text= $m[1]; 01073 } # this might be changed for ugliness reasons 01074 } else { 01075 $link = $noslash; # no subpage allowed, use standard link 01076 } 01077 } elseif( $noforce ) { # no subpage 01078 $link = $m[1]; 01079 } else { 01080 $link = substr( $m[1], 1 ); 01081 } 01082 $wasblank = ( '' == $text ); 01083 if( $wasblank ) 01084 $text = $link; 01085 01086 $nt = Title::newFromText( $link ); 01087 if( !$nt ) { 01088 $s .= $prefix . '[[' . $line; 01089 continue; 01090 } 01091 $ns = $nt->getNamespace(); 01092 $iw = $nt->getInterWiki(); 01093 if( $noforce ) { 01094 if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) { 01095 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() ); 01096 $tmp = $prefix . $trail ; 01097 $s .= (trim($tmp) == '')? '': $tmp; 01098 continue; 01099 } 01100 if ( $ns == $image ) { 01101 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail; 01102 $wgLinkCache->addImageLinkObj( $nt ); 01103 continue; 01104 } 01105 if ( $ns == $category ) { 01106 $t = $nt->getText() ; 01107 $nnt = Title::newFromText ( Namespace::getCanonicalName($category).":".$t ) ; 01108 01109 $wgLinkCache->suspend(); # Don't save in links/brokenlinks 01110 $t = $sk->makeLinkObj( $nnt, $t, '', '' , $prefix ); 01111 $wgLinkCache->resume(); 01112 01113 $sortkey = $wasblank ? $this->mTitle->getPrefixedText() : $text; 01114 $wgLinkCache->addCategoryLinkObj( $nt, $sortkey ); 01115 $this->mOutput->mCategoryLinks[] = $t ; 01116 $s .= $prefix . $trail ; 01117 continue; 01118 } 01119 } 01120 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) && 01121 ( strpos( $link, '#' ) == FALSE ) ) { 01122 # Self-links are handled specially; generally de-link and change to bold. 01123 $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, '', $trail ); 01124 continue; 01125 } 01126 01127 if( $ns == $media ) { 01128 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail; 01129 $wgLinkCache->addImageLinkObj( $nt ); 01130 continue; 01131 } elseif( $ns == $special ) { 01132 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, '', $trail ); 01133 continue; 01134 } 01135 $s .= $sk->makeLinkObj( $nt, $text, '', $trail, $prefix ); 01136 } 01137 wfProfileOut( $fname ); 01138 return $s; 01139 } 01140 01141 # Some functions here used by doBlockLevels() 01142 # 01143 /* private */ function closeParagraph() { 01144 $result = ''; 01145 if ( '' != $this->mLastSection ) { 01146 $result = '</' . $this->mLastSection . ">\n"; 01147 } 01148 $this->mInPre = false; 01149 $this->mLastSection = ''; 01150 return $result; 01151 } 01152 # getCommon() returns the length of the longest common substring 01153 # of both arguments, starting at the beginning of both. 01154 # 01155 /* private */ function getCommon( $st1, $st2 ) { 01156 $fl = strlen( $st1 ); 01157 $shorter = strlen( $st2 ); 01158 if ( $fl < $shorter ) { $shorter = $fl; } 01159 01160 for ( $i = 0; $i < $shorter; ++$i ) { 01161 if ( $st1{$i} != $st2{$i} ) { break; } 01162 } 01163 return $i; 01164 } 01165 # These next three functions open, continue, and close the list 01166 # element appropriate to the prefix character passed into them. 01167 # 01168 /* private */ function openList( $char ) 01169 { 01170 $result = $this->closeParagraph(); 01171 01172 if ( '*' == $char ) { $result .= '<ul><li>'; } 01173 else if ( '#' == $char ) { $result .= '<ol><li>'; } 01174 else if ( ':' == $char ) { $result .= '<dl><dd>'; } 01175 else if ( ';' == $char ) { 01176 $result .= '<dl><dt>'; 01177 $this->mDTopen = true; 01178 } 01179 else { $result = ''; } 01180 01181 return $result; 01182 } 01183 01184 /* private */ function nextItem( $char ) { 01185 if ( '*' == $char || '#' == $char ) { return '</li><li>'; } 01186 else if ( ':' == $char || ';' == $char ) { 01187 $close = "</dd>"; 01188 if ( $this->mDTopen ) { $close = '</dt>'; } 01189 if ( ';' == $char ) { 01190 $this->mDTopen = true; 01191 return $close . '<dt>'; 01192 } else { 01193 $this->mDTopen = false; 01194 return $close . '<dd>'; 01195 } 01196 } 01197 return ''; 01198 } 01199 01200 /* private */function closeList( $char ) { 01201 if ( '*' == $char ) { $text = '</li></ul>'; } 01202 else if ( '#' == $char ) { $text = '</li></ol>'; } 01203 else if ( ':' == $char ) { 01204 if ( $this->mDTopen ) { 01205 $this->mDTopen = false; 01206 $text = '</dt></dl>'; 01207 } else { 01208 $text = '</dd></dl>'; 01209 } 01210 } 01211 else { return ''; } 01212 return $text."\n"; 01213 } 01214 01215 /* private */ function doBlockLevels( $text, $linestart ) { 01216 $fname = 'Parser::doBlockLevels'; 01217 wfProfileIn( $fname ); 01218 01219 # Parsing through the text line by line. The main thing 01220 # happening here is handling of block-level elements p, pre, 01221 # and making lists from lines starting with * # : etc. 01222 # 01223 $textLines = explode( "\n", $text ); 01224 01225 $lastPrefix = $output = $lastLine = ''; 01226 $this->mDTopen = $inBlockElem = false; 01227 $prefixLength = 0; 01228 $paragraphStack = false; 01229 01230 if ( !$linestart ) { 01231 $output .= array_shift( $textLines ); 01232 } 01233 foreach ( $textLines as $oLine ) { 01234 $lastPrefixLength = strlen( $lastPrefix ); 01235 $preCloseMatch = preg_match("/<\\/pre/i", $oLine ); 01236 $preOpenMatch = preg_match("/<pre/i", $oLine ); 01237 if (!$this->mInPre) { 01238 $this->mInPre = !empty($preOpenMatch); 01239 } 01240 if ( !$this->mInPre ) { 01241 # Multiple prefixes may abut each other for nested lists. 01242 $prefixLength = strspn( $oLine, '*#:;' ); 01243 $pref = substr( $oLine, 0, $prefixLength ); 01244 01245 # eh? 01246 $pref2 = str_replace( ';', ':', $pref ); 01247 $t = substr( $oLine, $prefixLength ); 01248 } else { 01249 # Don't interpret any other prefixes in preformatted text 01250 $prefixLength = 0; 01251 $pref = $pref2 = ''; 01252 $t = $oLine; 01253 } 01254 01255 # List generation 01256 if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) { 01257 # Same as the last item, so no need to deal with nesting or opening stuff 01258 $output .= $this->nextItem( substr( $pref, -1 ) ); 01259 $paragraphStack = false; 01260 01261 if ( ";" == substr( $pref, -1 ) ) { 01262 # The one nasty exception: definition lists work like this: 01263 # ; title : definition text 01264 # So we check for : in the remainder text to split up the 01265 # title and definition, without b0rking links. 01266 # FIXME: This is not foolproof. Something better in Tokenizer might help. 01267 if( preg_match( '/^(.*?(?:\s| )):(.*)$/', $t, $match ) ) { 01268 $term = $match[1]; 01269 $output .= $term . $this->nextItem( ':' ); 01270 $t = $match[2]; 01271 } 01272 } 01273 } elseif( $prefixLength || $lastPrefixLength ) { 01274 # Either open or close a level... 01275 $commonPrefixLength = $this->getCommon( $pref, $lastPrefix ); 01276 $paragraphStack = false; 01277 01278 while( $commonPrefixLength < $lastPrefixLength ) { 01279 $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} ); 01280 --$lastPrefixLength; 01281 } 01282 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) { 01283 $output .= $this->nextItem( $pref{$commonPrefixLength-1} ); 01284 } 01285 while ( $prefixLength > $commonPrefixLength ) { 01286 $char = substr( $pref, $commonPrefixLength, 1 ); 01287 $output .= $this->openList( $char ); 01288 01289 if ( ';' == $char ) { 01290 # FIXME: This is dupe of code above 01291 if( preg_match( '/^(.*?(?:\s| )):(.*)$/', $t, $match ) ) { 01292 $term = $match[1]; 01293 $output .= $term . $this->nextItem( ":" ); 01294 $t = $match[2]; 01295 } 01296 } 01297 ++$commonPrefixLength; 01298 } 01299 $lastPrefix = $pref2; 01300 } 01301 if( 0 == $prefixLength ) { 01302 # No prefix (not in list)--go to paragraph mode 01303 $uniq_prefix = UNIQ_PREFIX; 01304 // XXX: use a stack for nestable elements like span, table and div 01305 $openmatch = preg_match('/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<li|<\\/tr|<\\/td|<\\/th)/i', $t ); 01306 $closematch = preg_match( 01307 '/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'. 01308 '<td|<th|<div|<\\/div|<hr|<\\/pre|<\\/p|'.$uniq_prefix.'-pre|<\\/li|<\\/ul)/i', $t ); 01309 if ( $openmatch or $closematch ) { 01310 $paragraphStack = false; 01311 $output .= $this->closeParagraph(); 01312 if($preOpenMatch and !$preCloseMatch) { 01313 $this->mInPre = true; 01314 } 01315 if ( $closematch ) { 01316 $inBlockElem = false; 01317 } else { 01318 $inBlockElem = true; 01319 } 01320 } else if ( !$inBlockElem && !$this->mInPre ) { 01321 if ( " " == $t{0} and trim($t) != '' ) { 01322 // pre 01323 if ($this->mLastSection != 'pre') { 01324 $paragraphStack = false; 01325 $output .= $this->closeParagraph().'<pre>'; 01326 $this->mLastSection = 'pre'; 01327 } 01328 } else { 01329 // paragraph 01330 if ( '' == trim($t) ) { 01331 if ( $paragraphStack ) { 01332 $output .= $paragraphStack.'<br />'; 01333 $paragraphStack = false; 01334 $this->mLastSection = 'p'; 01335 } else { 01336 if ($this->mLastSection != 'p' ) { 01337 $output .= $this->closeParagraph(); 01338 $this->mLastSection = ''; 01339 $paragraphStack = '<p>'; 01340 } else { 01341 $paragraphStack = '</p><p>'; 01342 } 01343 } 01344 } else { 01345 if ( $paragraphStack ) { 01346 $output .= $paragraphStack; 01347 $paragraphStack = false; 01348 $this->mLastSection = 'p'; 01349 } else if ($this->mLastSection != 'p') { 01350 $output .= $this->closeParagraph().'<p>'; 01351 $this->mLastSection = 'p'; 01352 } 01353 } 01354 } 01355 } 01356 } 01357 if ($paragraphStack === false) { 01358 $output .= $t."\n"; 01359 } 01360 } 01361 while ( $prefixLength ) { 01362 $output .= $this->closeList( $pref2{$prefixLength-1} ); 01363 --$prefixLength; 01364 } 01365 if ( '' != $this->mLastSection ) { 01366 $output .= '</' . $this->mLastSection . '>'; 01367 $this->mLastSection = ''; 01368 } 01369 01370 wfProfileOut( $fname ); 01371 return $output; 01372 } 01373 01374 # Return value of a magic variable (like PAGENAME) 01375 function getVariableValue( $index ) { 01376 global $wgLang, $wgSitename, $wgServer; 01377 01378 switch ( $index ) { 01379 case MAG_CURRENTMONTH: 01380 return date( 'm' ); 01381 case MAG_CURRENTMONTHNAME: 01382 return $wgLang->getMonthName( date('n') ); 01383 case MAG_CURRENTMONTHNAMEGEN: 01384 return $wgLang->getMonthNameGen( date('n') ); 01385 case MAG_CURRENTDAY: 01386 return date('j'); 01387 case MAG_PAGENAME: 01388 return $this->mTitle->getText(); 01389 case MAG_NAMESPACE: 01390 # return Namespace::getCanonicalName($this->mTitle->getNamespace()); 01391 return $wgLang->getNsText($this->mTitle->getNamespace()); // Patch by Dori 01392 case MAG_CURRENTDAYNAME: 01393 return $wgLang->getWeekdayName( date('w')+1 ); 01394 case MAG_CURRENTYEAR: 01395 return date( 'Y' ); 01396 case MAG_CURRENTTIME: 01397 return $wgLang->time( wfTimestampNow(), false ); 01398 case MAG_NUMBEROFARTICLES: 01399 return wfNumberOfArticles(); 01400 case MAG_SITENAME: 01401 return $wgSitename; 01402 case MAG_SERVER: 01403 return $wgServer; 01404 default: 01405 return NULL; 01406 } 01407 } 01408 01409 # initialise the magic variables (like CURRENTMONTHNAME) 01410 function initialiseVariables() { 01411 global $wgVariableIDs; 01412 $this->mVariables = array(); 01413 foreach ( $wgVariableIDs as $id ) { 01414 $mw =& MagicWord::get( $id ); 01415 $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) ); 01416 } 01417 } 01418 01419 /* private */ function replaceVariables( $text, $args = array() ) { 01420 global $wgLang, $wgScript, $wgArticlePath; 01421 01422 $fname = 'Parser::replaceVariables'; 01423 wfProfileIn( $fname ); 01424 01425 $bail = false; 01426 if ( !$this->mVariables ) { 01427 $this->initialiseVariables(); 01428 } 01429 $titleChars = Title::legalChars(); 01430 $nonBraceChars = str_replace( array( '{', '}' ), array( '', '' ), $titleChars ); 01431 01432 # This function is called recursively. To keep track of arguments we need a stack: 01433 array_push( $this->mArgStack, $args ); 01434 01435 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array 01436 $GLOBALS['wgCurParser'] =& $this; 01437 01438 01439 if ( $this->mOutputType == OT_HTML ) { 01440 # Variable substitution 01441 $text = preg_replace_callback( "/{{([$nonBraceChars]*?)}}/", 'wfVariableSubstitution', $text ); 01442 01443 # Argument substitution 01444 $text = preg_replace_callback( "/(\\n?){{{([$titleChars]*?)}}}/", 'wfArgSubstitution', $text ); 01445 } 01446 # Template substitution 01447 $regex = '/(\\n?){{(['.$nonBraceChars.']*)(\\|.*?|)}}/s'; 01448 $text = preg_replace_callback( $regex, 'wfBraceSubstitution', $text ); 01449 01450 array_pop( $this->mArgStack ); 01451 01452 wfProfileOut( $fname ); 01453 return $text; 01454 } 01455 01456 function variableSubstitution( $matches ) { 01457 if ( array_key_exists( $matches[1], $this->mVariables ) ) { 01458 $text = $this->mVariables[$matches[1]]; 01459 $this->mOutput->mContainsOldMagic = true; 01460 } else { 01461 $text = $matches[0]; 01462 } 01463 return $text; 01464 } 01465 01466 function braceSubstitution( $matches ) { 01467 global $wgLinkCache, $wgLang; 01468 $fname = 'Parser::braceSubstitution'; 01469 $found = false; 01470 $nowiki = false; 01471 $noparse = false; 01472 01473 $title = NULL; 01474 01475 # $newline is an optional newline character before the braces 01476 # $part1 is the bit before the first |, and must contain only title characters 01477 # $args is a list of arguments, starting from index 0, not including $part1 01478 01479 $newline = $matches[1]; 01480 $part1 = $matches[2]; 01481 # If the third subpattern matched anything, it will start with | 01482 if ( $matches[3] !== '' ) { 01483 $args = explode( '|', substr( $matches[3], 1 ) ); 01484 } else { 01485 $args = array(); 01486 } 01487 $argc = count( $args ); 01488 01489 # {{{}}} 01490 if ( strpos( $matches[0], '{{{' ) !== false ) { 01491 $text = $matches[0]; 01492 $found = true; 01493 $noparse = true; 01494 } 01495 01496 # SUBST 01497 if ( !$found ) { 01498 $mwSubst =& MagicWord::get( MAG_SUBST ); 01499 if ( $mwSubst->matchStartAndRemove( $part1 ) ) { 01500 if ( $this->mOutputType != OT_WIKI ) { 01501 # Invalid SUBST not replaced at PST time 01502 # Return without further processing 01503 $text = $matches[0]; 01504 $found = true; 01505 $noparse= true; 01506 } 01507 } elseif ( $this->mOutputType == OT_WIKI ) { 01508 # SUBST not found in PST pass, do nothing 01509 $text = $matches[0]; 01510 $found = true; 01511 } 01512 } 01513 01514 # MSG, MSGNW and INT 01515 if ( !$found ) { 01516 # Check for MSGNW: 01517 $mwMsgnw =& MagicWord::get( MAG_MSGNW ); 01518 if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) { 01519 $nowiki = true; 01520 } else { 01521 # Remove obsolete MSG: 01522 $mwMsg =& MagicWord::get( MAG_MSG ); 01523 $mwMsg->matchStartAndRemove( $part1 ); 01524 } 01525 01526 # Check if it is an internal message 01527 $mwInt =& MagicWord::get( MAG_INT ); 01528 if ( $mwInt->matchStartAndRemove( $part1 ) ) { 01529 if ( $this->incrementIncludeCount( 'int:'.$part1 ) ) { 01530 $text = wfMsgReal( $part1, $args, true ); 01531 $found = true; 01532 } 01533 } 01534 } 01535 01536 # NS 01537 if ( !$found ) { 01538 # Check for NS: (namespace expansion) 01539 $mwNs = MagicWord::get( MAG_NS ); 01540 if ( $mwNs->matchStartAndRemove( $part1 ) ) { 01541 if ( intval( $part1 ) ) { 01542 $text = $wgLang->getNsText( intval( $part1 ) ); 01543 $found = true; 01544 } else { 01545 $index = Namespace::getCanonicalIndex( strtolower( $part1 ) ); 01546 if ( !is_null( $index ) ) { 01547 $text = $wgLang->getNsText( $index ); 01548 $found = true; 01549 } 01550 } 01551 } 01552 } 01553 01554 # LOCALURL and LOCALURLE 01555 if ( !$found ) { 01556 $mwLocal = MagicWord::get( MAG_LOCALURL ); 01557 $mwLocalE = MagicWord::get( MAG_LOCALURLE ); 01558 01559 if ( $mwLocal->matchStartAndRemove( $part1 ) ) { 01560 $func = 'getLocalURL'; 01561 } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) { 01562 $func = 'escapeLocalURL'; 01563 } else { 01564 $func = ''; 01565 } 01566 01567 if ( $func !== '' ) { 01568 $title = Title::newFromText( $part1 ); 01569 if ( !is_null( $title ) ) { 01570 if ( $argc > 0 ) { 01571 $text = $title->$func( $args[0] ); 01572 } else { 01573 $text = $title->$func(); 01574 } 01575 $found = true; 01576 } 01577 } 01578 } 01579 01580 # Internal variables 01581 if ( !$found && array_key_exists( $part1, $this->mVariables ) ) { 01582 $text = $this->mVariables[$part1]; 01583 $found = true; 01584 $this->mOutput->mContainsOldMagic = true; 01585 } 01586 /* 01587 # Arguments input from the caller 01588 $inputArgs = end( $this->mArgStack ); 01589 if ( !$found && array_key_exists( $part1, $inputArgs ) ) { 01590 $text = $inputArgs[$part1]; 01591 $found = true; 01592 } 01593 */ 01594 # Load from database 01595 if ( !$found ) { 01596 $title = Title::newFromText( $part1, NS_TEMPLATE ); 01597 if ( !is_null( $title ) && !$title->isExternal() ) { 01598 # Check for excessive inclusion 01599 $dbk = $title->getPrefixedDBkey(); 01600 if ( $this->incrementIncludeCount( $dbk ) ) { 01601 $article = new Article( $title ); 01602 $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals(); 01603 if ( $articleContent !== false ) { 01604 $found = true; 01605 $text = $articleContent; 01606 01607 } 01608 } 01609 01610 # If the title is valid but undisplayable, make a link to it 01611 if ( $this->mOutputType == OT_HTML && !$found ) { 01612 $text = '[[' . $title->getPrefixedText() . ']]'; 01613 $found = true; 01614 } 01615 } 01616 } 01617 01618 # Recursive parsing, escaping and link table handling 01619 # Only for HTML output 01620 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) { 01621 $text = wfEscapeWikiText( $text ); 01622 } elseif ( $this->mOutputType == OT_HTML && $found && !$noparse) { 01623 # Clean up argument array 01624 $assocArgs = array(); 01625 $index = 1; 01626 foreach( $args as $arg ) { 01627 $eqpos = strpos( $arg, '=' ); 01628 if ( $eqpos === false ) { 01629 $assocArgs[$index++] = $arg; 01630 } else { 01631 $name = trim( substr( $arg, 0, $eqpos ) ); 01632 $value = trim( substr( $arg, $eqpos+1 ) ); 01633 if ( $value === false ) { 01634 $value = ''; 01635 } 01636 if ( $name !== false ) { 01637 $assocArgs[$name] = $value; 01638 } 01639 } 01640 } 01641 01642 # Do not enter included links in link table 01643 if ( !is_null( $title ) ) { 01644 $wgLinkCache->suspend(); 01645 } 01646 01647 # Run full parser on the included text 01648 $text = $this->stripParse( $text, $newline, $assocArgs ); 01649 01650 # Resume the link cache and register the inclusion as a link 01651 if ( !is_null( $title ) ) { 01652 $wgLinkCache->resume(); 01653 $wgLinkCache->addLinkObj( $title ); 01654 } 01655 } 01656 01657 if ( !$found ) { 01658 return $matches[0]; 01659 } else { 01660 return $text; 01661 } 01662 } 01663 01664 # Triple brace replacement -- used for template arguments 01665 function argSubstitution( $matches ) { 01666 $newline = $matches[1]; 01667 $arg = trim( $matches[2] ); 01668 $text = $matches[0]; 01669 $inputArgs = end( $this->mArgStack ); 01670 01671 if ( array_key_exists( $arg, $inputArgs ) ) { 01672 $text = $this->stripParse( $inputArgs[$arg], $newline, array() ); 01673 } 01674 01675 return $text; 01676 } 01677 01678 # Returns true if the function is allowed to include this entity 01679 function incrementIncludeCount( $dbk ) { 01680 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) { 01681 $this->mIncludeCount[$dbk] = 0; 01682 } 01683 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) { 01684 return true; 01685 } else { 01686 return false; 01687 } 01688 } 01689 01690 01691 # Cleans up HTML, removes dangerous tags and attributes 01692 /* private */ function removeHTMLtags( $text ) { 01693 global $wgUseTidy, $wgUserHtml; 01694 $fname = 'Parser::removeHTMLtags'; 01695 wfProfileIn( $fname ); 01696 01697 if( $wgUserHtml ) { 01698 $htmlpairs = array( # Tags that must be closed 01699 'b', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1', 01700 'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em', 's', 01701 'strike', 'strong', 'tt', 'var', 'div', 'center', 01702 'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre', 01703 'ruby', 'rt' , 'rb' , 'rp', 'p' 01704 ); 01705 $htmlsingle = array( 01706 'br', 'hr', 'li', 'dt', 'dd' 01707 ); 01708 $htmlnest = array( # Tags that can be nested--?? 01709 'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul', 01710 'dl', 'font', 'big', 'small', 'sub', 'sup' 01711 ); 01712 $tabletags = array( # Can only appear inside table 01713 'td', 'th', 'tr' 01714 ); 01715 } else { 01716 $htmlpairs = array(); 01717 $htmlsingle = array(); 01718 $htmlnest = array(); 01719 $tabletags = array(); 01720 } 01721 01722 $htmlsingle = array_merge( $tabletags, $htmlsingle ); 01723 $htmlelements = array_merge( $htmlsingle, $htmlpairs ); 01724 01725 $htmlattrs = $this->getHTMLattrs () ; 01726 01727 # Remove HTML comments 01728 $text = preg_replace( '/(\\n * *(?=\\n)|)/sU', '$2', $text ); 01729 01730 $bits = explode( '<', $text ); 01731 $text = array_shift( $bits ); 01732 if(!$wgUseTidy) { 01733 $tagstack = array(); $tablestack = array(); 01734 foreach ( $bits as $x ) { 01735 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) ); 01736 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/', 01737 $x, $regs ); 01738 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs; 01739 error_reporting( $prev ); 01740 01741 $badtag = 0 ; 01742 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) { 01743 # Check our stack 01744 if ( $slash ) { 01745 # Closing a tag... 01746 if ( ! in_array( $t, $htmlsingle ) && 01747 ( $ot = @array_pop( $tagstack ) ) != $t ) { 01748 @array_push( $tagstack, $ot ); 01749 $badtag = 1; 01750 } else { 01751 if ( $t == 'table' ) { 01752 $tagstack = array_pop( $tablestack ); 01753 } 01754 $newparams = ''; 01755 } 01756 } else { 01757 # Keep track for later 01758 if ( in_array( $t, $tabletags ) && 01759 ! in_array( 'table', $tagstack ) ) { 01760 $badtag = 1; 01761 } else if ( in_array( $t, $tagstack ) && 01762 ! in_array ( $t , $htmlnest ) ) { 01763 $badtag = 1 ; 01764 } else if ( ! in_array( $t, $htmlsingle ) ) { 01765 if ( $t == 'table' ) { 01766 array_push( $tablestack, $tagstack ); 01767 $tagstack = array(); 01768 } 01769 array_push( $tagstack, $t ); 01770 } 01771 # Strip non-approved attributes from the tag 01772 $newparams = $this->fixTagAttributes($params); 01773 01774 } 01775 if ( ! $badtag ) { 01776 $rest = str_replace( '>', '>', $rest ); 01777 $text .= "<$slash$t $newparams$brace$rest"; 01778 continue; 01779 } 01780 } 01781 $text .= '<' . str_replace( '>', '>', $x); 01782 } 01783 # Close off any remaining tags 01784 while ( is_array( $tagstack ) && ($t = array_pop( $tagstack )) ) { 01785 $text .= "</$t>\n"; 01786 if ( $t == 'table' ) { $tagstack = array_pop( $tablestack ); } 01787 } 01788 } else { 01789 # this might be possible using tidy itself 01790 foreach ( $bits as $x ) { 01791 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/', 01792 $x, $regs ); 01793 @list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs; 01794 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) { 01795 $newparams = $this->fixTagAttributes($params); 01796 $rest = str_replace( '>', '>', $rest ); 01797 $text .= "<$slash$t $newparams$brace$rest"; 01798 } else { 01799 $text .= '<' . str_replace( '>', '>', $x); 01800 } 01801 } 01802 } 01803 wfProfileOut( $fname ); 01804 return $text; 01805 } 01806 01807 01808 /* 01809 * 01810 * This function accomplishes several tasks: 01811 * 1) Auto-number headings if that option is enabled 01812 * 2) Add an [edit] link to sections for logged in users who have enabled the option 01813 * 3) Add a Table of contents on the top for users who have enabled the option 01814 * 4) Auto-anchor headings 01815 * 01816 * It loops through all headlines, collects the necessary data, then splits up the 01817 * string and re-inserts the newly formatted headlines. 01818 * 01819 */ 01820 01821 /* private */ function formatHeadings( $text, $isMain=true ) { 01822 global $wgInputEncoding; 01823 01824 $doNumberHeadings = $this->mOptions->getNumberHeadings(); 01825 $doShowToc = $this->mOptions->getShowToc(); 01826 if( !$this->mTitle->userCanEdit() ) { 01827 $showEditLink = 0; 01828 $rightClickHack = 0; 01829 } else { 01830 $showEditLink = $this->mOptions->getEditSection(); 01831 $rightClickHack = $this->mOptions->getEditSectionOnRightClick(); 01832 } 01833 01834 # Inhibit editsection links if requested in the page 01835 $esw =& MagicWord::get( MAG_NOEDITSECTION ); 01836 if( $esw->matchAndRemove( $text ) ) { 01837 $showEditLink = 0; 01838 } 01839 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML, 01840 # do not add TOC 01841 $mw =& MagicWord::get( MAG_NOTOC ); 01842 if( $mw->matchAndRemove( $text ) ) { 01843 $doShowToc = 0; 01844 } 01845 01846 # never add the TOC to the Main Page. This is an entry page that should not 01847 # be more than 1-2 screens large anyway 01848 if( $this->mTitle->getPrefixedText() == wfMsg('mainpage') ) { 01849 $doShowToc = 0; 01850 } 01851 01852 # Get all headlines for numbering them and adding funky stuff like [edit] 01853 # links - this is for later, but we need the number of headlines right now 01854 $numMatches = preg_match_all( '/<H([1-6])(.*?' . '>)(.*?)<\/H[1-6]>/i', $text, $matches ); 01855 01856 # if there are fewer than 4 headlines in the article, do not show TOC 01857 if( $numMatches < 4 ) { 01858 $doShowToc = 0; 01859 } 01860 01861 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML, 01862 # override above conditions and always show TOC 01863 $mw =& MagicWord::get( MAG_FORCETOC ); 01864 if ($mw->matchAndRemove( $text ) ) { 01865 $doShowToc = 1; 01866 } 01867 01868 01869 # We need this to perform operations on the HTML 01870 $sk =& $this->mOptions->getSkin(); 01871 01872 # headline counter 01873 $headlineCount = 0; 01874 01875 # Ugh .. the TOC should have neat indentation levels which can be 01876 # passed to the skin functions. These are determined here 01877 $toclevel = 0; 01878 $toc = ''; 01879 $full = ''; 01880 $head = array(); 01881 $sublevelCount = array(); 01882 $level = 0; 01883 $prevlevel = 0; 01884 foreach( $matches[3] as $headline ) { 01885 $numbering = ''; 01886 if( $level ) { 01887 $prevlevel = $level; 01888 } 01889 $level = $matches[1][$headlineCount]; 01890 if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) { 01891 # reset when we enter a new level 01892 $sublevelCount[$level] = 0; 01893 $toc .= $sk->tocIndent( $level - $prevlevel ); 01894 $toclevel += $level - $prevlevel; 01895 } 01896 if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) { 01897 # reset when we step back a level 01898 $sublevelCount[$level+1]=0; 01899 $toc .= $sk->tocUnindent( $prevlevel - $level ); 01900 $toclevel -= $prevlevel - $level; 01901 } 01902 # count number of headlines for each level 01903 @$sublevelCount[$level]++; 01904 if( $doNumberHeadings || $doShowToc ) { 01905 $dot = 0; 01906 for( $i = 1; $i <= $level; $i++ ) { 01907 if( !empty( $sublevelCount[$i] ) ) { 01908 if( $dot ) { 01909 $numbering .= '.'; 01910 } 01911 $numbering .= $sublevelCount[$i]; 01912 $dot = 1; 01913 } 01914 } 01915 } 01916 01917 # The canonized header is a version of the header text safe to use for links 01918 # Avoid insertion of weird stuff like <math> by expanding the relevant sections 01919 $canonized_headline = $this->unstrip( $headline, $this->mStripState ); 01920 $canonized_headline = $this->unstripNoWiki( $headline, $this->mStripState ); 01921 01922 # strip out HTML 01923 $canonized_headline = preg_replace( '/<.*?' . '>/','',$canonized_headline ); 01924 $tocline = trim( $canonized_headline ); 01925 $canonized_headline = urlencode( do_html_entity_decode( str_replace(' ', '_', $tocline), ENT_COMPAT, $wgInputEncoding ) ); 01926 $replacearray = array( 01927 '%3A' => ':', 01928 '%' => '.' 01929 ); 01930 $canonized_headline = str_replace(array_keys($replacearray),array_values($replacearray),$canonized_headline); 01931 $refer[$headlineCount] = $canonized_headline; 01932 01933 # count how many in assoc. array so we can track dupes in anchors 01934 @$refers[$canonized_headline]++; 01935 $refcount[$headlineCount]=$refers[$canonized_headline]; 01936 01937 # Prepend the number to the heading text 01938 01939 if( $doNumberHeadings || $doShowToc ) { 01940 $tocline = $numbering . ' ' . $tocline; 01941 01942 # Don't number the heading if it is the only one (looks silly) 01943 if( $doNumberHeadings && count( $matches[3] ) > 1) { 01944 # the two are different if the line contains a link 01945 $headline=$numbering . ' ' . $headline; 01946 } 01947 } 01948 01949 # Create the anchor for linking from the TOC to the section 01950 $anchor = $canonized_headline; 01951 if($refcount[$headlineCount] > 1 ) { 01952 $anchor .= '_' . $refcount[$headlineCount]; 01953 } 01954 if( $doShowToc ) { 01955 $toc .= $sk->tocLine($anchor,$tocline,$toclevel); 01956 } 01957 if( $showEditLink ) { 01958 if ( empty( $head[$headlineCount] ) ) { 01959 $head[$headlineCount] = ''; 01960 } 01961 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1); 01962 } 01963 01964 # Add the edit section span 01965 if( $rightClickHack ) { 01966 $headline = $sk->editSectionScript($headlineCount+1,$headline); 01967 } 01968 01969 # give headline the correct <h#> tag 01970 @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">"; 01971 01972 $headlineCount++; 01973 } 01974 01975 if( $doShowToc ) { 01976 $toclines = $headlineCount; 01977 $toc .= $sk->tocUnindent( $toclevel ); 01978 $toc = $sk->tocTable( $toc ); 01979 } 01980 01981 # split up and insert constructed headlines 01982 01983 $blocks = preg_split( '/<H[1-6].*?' . '>.*?<\/H[1-6]>/i', $text ); 01984 $i = 0; 01985 01986 foreach( $blocks as $block ) { 01987 if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) { 01988 # This is the [edit] link that appears for the top block of text when 01989 # section editing is enabled 01990 01991 # Disabled because it broke block formatting 01992 # For example, a bullet point in the top line 01993 # $full .= $sk->editSectionLink(0); 01994 } 01995 $full .= $block; 01996 if( $doShowToc && !$i && $isMain) { 01997 # Top anchor now in skin 01998 $full = $full.$toc; 01999 } 02000 02001 if( !empty( $head[$i] ) ) { 02002 $full .= $head[$i]; 02003 } 02004 $i++; 02005 } 02006 02007 return $full; 02008 } 02009 02010 # Return an HTML link for the "ISBN 123456" text 02011 /* private */ function magicISBN( $text ) { 02012 global $wgLang; 02013 02014 $a = split( 'ISBN ', " $text" ); 02015 if ( count ( $a ) < 2 ) return $text; 02016 $text = substr( array_shift( $a ), 1); 02017 $valid = '0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ'; 02018 02019 foreach ( $a as $x ) { 02020 $isbn = $blank = '' ; 02021 while ( ' ' == $x{0} ) { 02022 $blank .= ' '; 02023 $x = substr( $x, 1 ); 02024 } 02025 while ( strstr( $valid, $x{0} ) != false ) { 02026 $isbn .= $x{0}; 02027 $x = substr( $x, 1 ); 02028 } 02029 $num = str_replace( '-', '', $isbn ); 02030 $num = str_replace( ' ', '', $num ); 02031 02032 if ( '' == $num ) { 02033 $text .= "ISBN $blank$x"; 02034 } else { 02035 $titleObj = Title::makeTitle( NS_SPECIAL, 'Booksources' ); 02036 $text .= '<a href="' . 02037 $titleObj->escapeLocalUrl( "isbn={$num}" ) . 02038 "\" class=\"internal\">ISBN $isbn</a>"; 02039 $text .= $x; 02040 } 02041 } 02042 return $text; 02043 } 02044 02045 # Return an HTML link for the "RFC 1234" text 02046 /* private */ function magicRFC( $text ) { 02047 global $wgLang; 02048 02049 $a = split( 'RFC ', ' '.$text ); 02050 if ( count ( $a ) < 2 ) return $text; 02051 $text = substr( array_shift( $a ), 1); 02052 $valid = '0123456789'; 02053 02054 foreach ( $a as $x ) { 02055 $rfc = $blank = '' ; 02056 while ( ' ' == $x{0} ) { 02057 $blank .= ' '; 02058 $x = substr( $x, 1 ); 02059 } 02060 while ( strstr( $valid, $x{0} ) != false ) { 02061 $rfc .= $x{0}; 02062 $x = substr( $x, 1 ); 02063 } 02064 02065 if ( '' == $rfc ) { 02066 $text .= "RFC $blank$x"; 02067 } else { 02068 $url = wfmsg( 'rfcurl' ); 02069 $url = str_replace( '$1', $rfc, $url); 02070 $sk =& $this->mOptions->getSkin(); 02071 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" ); 02072 $text .= "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}"; 02073 } 02074 } 02075 return $text; 02076 } 02077 02078 function preSaveTransform( $text, &$title, &$user, $options, $clearState = true ) { 02079 $this->mOptions = $options; 02080 $this->mTitle =& $title; 02081 $this->mOutputType = OT_WIKI; 02082 02083 if ( $clearState ) { 02084 $this->clearState(); 02085 } 02086 02087 $stripState = false; 02088 $pairs = array( 02089 "\r\n" => "\n", 02090 ); 02091 $text = str_replace(array_keys($pairs), array_values($pairs), $text); 02092 // now with regexes 02093 /* 02094 $pairs = array( 02095 "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>', 02096 "/<br *?>/i" => "<br />", 02097 ); 02098 $text = preg_replace(array_keys($pairs), array_values($pairs), $text); 02099 */ 02100 $text = $this->strip( $text, $stripState, false ); 02101 $text = $this->pstPass2( $text, $user ); 02102 $text = $this->unstrip( $text, $stripState ); 02103 $text = $this->unstripNoWiki( $text, $stripState ); 02104 return $text; 02105 } 02106 02107 /* private */ function pstPass2( $text, &$user ) { 02108 global $wgLang, $wgLocaltimezone, $wgCurParser; 02109 02110 # Variable replacement 02111 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags 02112 $text = $this->replaceVariables( $text ); 02113 02114 # Signatures 02115 # 02116 $n = $user->getName(); 02117 $k = $user->getOption( 'nickname' ); 02118 if ( '' == $k ) { $k = $n; } 02119 if(isset($wgLocaltimezone)) { 02120 $oldtz = getenv('TZ'); putenv('TZ='.$wgLocaltimezone); 02121 } 02122 /* Note: this is an ugly timezone hack for the European wikis */ 02123 $d = $wgLang->timeanddate( date( 'YmdHis' ), false ) . 02124 ' (' . date( 'T' ) . ')'; 02125 if(isset($wgLocaltimezone)) putenv('TZ='.$oldtzs); 02126 02127 $text = preg_replace( '/~~~~~/', $d, $text ); 02128 $text = preg_replace( '/~~~~/', '[[' . $wgLang->getNsText( 02129 Namespace::getUser() ) . ":$n|$k]] $d", $text ); 02130 $text = preg_replace( '/~~~/', '[[' . $wgLang->getNsText( 02131 Namespace::getUser() ) . ":$n|$k]]", $text ); 02132 02133 # Context links: [[|name]] and [[name (context)|]] 02134 # 02135 $tc = "[&;%\\-,.\$\$' _0-9A-Za-z\\/:\\x80-\\xff]"; 02136 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens 02137 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii! 02138 $conpat = "/^({$np}+) \$({$tc}+)\$$/"; 02139 02140 $p1 = "/\[\[({$np}+) \$({$np}+)\$\\|]]/"; # [[page (context)|]] 02141 $p2 = "/\[\[\\|({$tc}+)]]/"; # [[|page]] 02142 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/"; # [[namespace:page|]] 02143 $p4 = "/\[\[($namespacechar+):({$np}+) \$({$np}+)\$\\|]]/"; 02144 # [[ns:page (cont)|]] 02145 $context = ""; 02146 $t = $this->mTitle->getText(); 02147 if ( preg_match( $conpat, $t, $m ) ) { 02148 $context = $m[2]; 02149 } 02150 $text = preg_replace( $p4, '[[\\1:\\2 (\\3)|\\2]]', $text ); 02151 $text = preg_replace( $p1, '[[\\1 (\\2)|\\1]]', $text ); 02152 $text = preg_replace( $p3, '[[\\1:\\2|\\2]]', $text ); 02153 02154 if ( '' == $context ) { 02155 $text = preg_replace( $p2, '[[\\1]]', $text ); 02156 } else { 02157 $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text ); 02158 } 02159 02160 /* 02161 $mw =& MagicWord::get( MAG_SUBST ); 02162 $wgCurParser = $this->fork(); 02163 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" ); 02164 $this->merge( $wgCurParser ); 02165 */ 02166 02167 # Trim trailing whitespace 02168 # MAG_END (__END__) tag allows for trailing 02169 # whitespace to be deliberately included 02170 $text = rtrim( $text ); 02171 $mw =& MagicWord::get( MAG_END ); 02172 $mw->matchAndRemove( $text ); 02173 02174 return $text; 02175 } 02176 02177 # Set up some variables which are usually set up in parse() 02178 # so that an external function can call some class members with confidence 02179 function startExternalParse( &$title, $options, $outputType, $clearState = true ) { 02180 $this->mTitle =& $title; 02181 $this->mOptions = $options; 02182 $this->mOutputType = $outputType; 02183 if ( $clearState ) { 02184 $this->clearState(); 02185 } 02186 } 02187 02188 function transformMsg( $text, $options ) { 02189 global $wgTitle; 02190 static $executing = false; 02191 02192 # Guard against infinite recursion 02193 if ( $executing ) { 02194 return $text; 02195 } 02196 $executing = true; 02197 02198 $this->mTitle = $wgTitle; 02199 $this->mOptions = $options; 02200 $this->mOutputType = OT_MSG; 02201 $this->clearState(); 02202 $text = $this->replaceVariables( $text ); 02203 02204 $executing = false; 02205 return $text; 02206 } 02207 02208 # Create an HTML-style tag, e.g. <yourtag>special text</yourtag> 02209 # Callback will be called with the text within 02210 # Transform and return the text within 02211 function setHook( $tag, $callback ) { 02212 $oldVal = @$this->mTagHooks[$tag]; 02213 $this->mTagHooks[$tag] = $callback; 02214 return $oldVal; 02215 } 02216 } 02217 02218 class ParserOutput 02219 { 02220 var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic; 02221 var $mCacheTime; # Used in ParserCache 02222 02223 function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(), 02224 $containsOldMagic = false ) 02225 { 02226 $this->mText = $text; 02227 $this->mLanguageLinks = $languageLinks; 02228 $this->mCategoryLinks = $categoryLinks; 02229 $this->mContainsOldMagic = $containsOldMagic; 02230 $this->mCacheTime = ""; 02231 } 02232 02233 function getText() { return $this->mText; } 02234 function getLanguageLinks() { return $this->mLanguageLinks; } 02235 function getCategoryLinks() { return $this->mCategoryLinks; } 02236 function getCacheTime() { return $this->mCacheTime; } 02237 function containsOldMagic() { return $this->mContainsOldMagic; } 02238 function setText( $text ) { return wfSetVar( $this->mText, $text ); } 02239 function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); } 02240 function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); } 02241 function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); } 02242 function setCacheTime( $t ) { return wfSetVar( $this->mCacheTime, $t ); } 02243 02244 function merge( $other ) { 02245 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks ); 02246 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks ); 02247 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic; 02248 } 02249 02250 } 02251 02252 class ParserOptions 02253 { 02254 # All variables are private 02255 var $mUseTeX; # Use texvc to expand <math> tags 02256 var $mUseCategoryMagic; # Treat [[Category:xxxx]] tags specially 02257 var $mUseDynamicDates; # Use $wgDateFormatter to format dates 02258 var $mInterwikiMagic; # Interlanguage links are removed and returned in an array 02259 var $mAllowExternalImages; # Allow external images inline 02260 var $mSkin; # Reference to the preferred skin 02261 var $mDateFormat; # Date format index 02262 var $mEditSection; # Create "edit section" links 02263 var $mEditSectionOnRightClick; # Generate JavaScript to edit section on right click 02264 var $mNumberHeadings; # Automatically number headings 02265 var $mShowToc; # Show table of contents 02266 02267 function getUseTeX() { return $this->mUseTeX; } 02268 function getUseCategoryMagic() { return $this->mUseCategoryMagic; } 02269 function getUseDynamicDates() { return $this->mUseDynamicDates; } 02270 function getInterwikiMagic() { return $this->mInterwikiMagic; } 02271 function getAllowExternalImages() { return $this->mAllowExternalImages; } 02272 function getSkin() { return $this->mSkin; } 02273 function getDateFormat() { return $this->mDateFormat; } 02274 function getEditSection() { return $this->mEditSection; } 02275 function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; } 02276 function getNumberHeadings() { return $this->mNumberHeadings; } 02277 function getShowToc() { return $this->mShowToc; } 02278 02279 function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); } 02280 function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); } 02281 function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); } 02282 function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); } 02283 function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); } 02284 function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); } 02285 function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); } 02286 function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); } 02287 function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); } 02288 function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); } 02289 02290 function setSkin( &$x ) { $this->mSkin =& $x; } 02291 02292 /* static */ function newFromUser( &$user ) { 02293 $popts = new ParserOptions; 02294 $popts->initialiseFromUser( $user ); 02295 return $popts; 02296 } 02297 02298 function initialiseFromUser( &$userInput ) { 02299 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages; 02300 02301 if ( !$userInput ) { 02302 $user = new User; 02303 $user->setLoaded( true ); 02304 } else { 02305 $user =& $userInput; 02306 } 02307 02308 $this->mUseTeX = $wgUseTeX; 02309 $this->mUseCategoryMagic = $wgUseCategoryMagic; 02310 $this->mUseDynamicDates = $wgUseDynamicDates; 02311 $this->mInterwikiMagic = $wgInterwikiMagic; 02312 $this->mAllowExternalImages = $wgAllowExternalImages; 02313 $this->mSkin =& $user->getSkin(); 02314 $this->mDateFormat = $user->getOption( 'date' ); 02315 $this->mEditSection = $user->getOption( 'editsection' ); 02316 $this->mEditSectionOnRightClick = $user->getOption( 'editsectiononrightclick' ); 02317 $this->mNumberHeadings = $user->getOption( 'numberheadings' ); 02318 $this->mShowToc = $user->getOption( 'showtoc' ); 02319 } 02320 02321 02322 } 02323 02324 # Regex callbacks, used in Parser::replaceVariables 02325 function wfBraceSubstitution( $matches ) 02326 { 02327 global $wgCurParser; 02328 return $wgCurParser->braceSubstitution( $matches ); 02329 } 02330 02331 function wfArgSubstitution( $matches ) 02332 { 02333 global $wgCurParser; 02334 return $wgCurParser->argSubstitution( $matches ); 02335 } 02336 02337 function wfVariableSubstitution( $matches ) 02338 { 02339 global $wgCurParser; 02340 return $wgCurParser->variableSubstitution( $matches ); 02341 } 02342 02343 ?>