00001 <?php
00002
00003
00004
00005
# PHP Parser
00006
#
00007
# Processes wiki markup
00008
#
00009
# There are two main entry points into the Parser class: parse() and preSaveTransform().
00010
# The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
00011
#
00012
# Globals used:
00013
# objects: $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
00014
#
00015
# NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
00016
#
00017
# settings: $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
00018
# $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
00019
# $wgLocaltimezone
00020
#
00021
# * only within ParserOptions
00022
#
00023
#
00024
#----------------------------------------
00025
# Variable substitution O(N^2) attack
00026
#-----------------------------------------
00027
# Without countermeasures, it would be possible to attack the parser by saving a page
00028
# filled with a large number of inclusions of large pages. The size of the generated
00029
# page would be proportional to the square of the input size. Hence, we limit the number
00030
# of inclusions of any given page, thus bringing any attack back to O(N).
00031
#
00032
00033 define(
"MAX_INCLUDE_REPEAT", 5 );
00034
00035
# Allowed values for $mOutputType
00036 define(
"OT_HTML", 1 );
00037 define(
"OT_WIKI", 2 );
00038 define(
"OT_MSG", 3 );
00039
00040
# string parameter for extractTags which will cause it
00041
# to strip HTML comments in addition to regular
00042
# <XML>-style tags. This should not be anything we
00043
# may want to use in wikisyntax
00044 define(
"STRIP_COMMENTS",
"HTMLCommentStrip" );
00045
00046
# prefix for escaping, used in two functions at least
00047 define(
"UNIQ_PREFIX",
"NaodW29");
00048
00049
class Parser
00050 {
00051
# Persistent:
00052
var $mTagHooks;
00053
00054
# Cleared with clearState():
00055
var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
00056 var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
00057
00058
# Temporary:
00059
var $mOptions, $mTitle, $mOutputType;
00060
00061 function Parser() {
00062 $this->mTagHooks = array();
00063 $this->clearState();
00064 }
00065
00066 function clearState() {
00067 $this->mOutput =
new ParserOutput;
00068 $this->mAutonumber = 0;
00069 $this->mLastSection =
"";
00070 $this->mDTopen =
false;
00071 $this->mVariables =
false;
00072 $this->mIncludeCount = array();
00073 $this->mStripState = array();
00074 $this->mArgStack = array();
00075 $this->mInPre =
false;
00076 }
00077
00078
# First pass--just handle <nowiki> sections, pass the rest off
00079
# to internalParse() which does all the real work.
00080
#
00081
# Returns a ParserOutput
00082
#
00083
function parse( $text, &$title, $options, $linestart =
true, $clearState =
true ) {
00084 global
$wgUseTidy;
00085 $fname =
"Parser::parse";
00086
wfProfileIn( $fname );
00087
00088
if ( $clearState ) {
00089 $this->clearState();
00090 }
00091
00092 $this->mOptions =
$options;
00093 $this->mTitle =&
$title;
00094 $this->mOutputType =
OT_HTML;
00095
00096 $stripState = NULL;
00097 $text = $this->strip( $text, $this->mStripState );
00098 $text = $this->internalParse( $text, $linestart );
00099 $text = $this->unstrip( $text, $this->mStripState );
00100
# Clean up special characters, only run once, next-to-last before doBlockLevels
00101
if(!
$wgUseTidy) {
00102 $fixtags = array(
00103 # french spaces, last one Guillemet-left
00104 # only
if there is something before the space
00105 '/(.) (\\?|:|!|\\302\\273)/i' => '\\1 \\2',
00106 # french spaces, Guillemet-right
00107
"/(\\302\\253) /i"=>
"\\1 ",
00108 '/<hr *>/i' => '<hr />',
00109 '/<br *>/i' => '<br />',
00110 '/<center *>/i' => '<div
class=
"center">',
00111 '/<\\/center *>/i' => '</div>',
00112 # Clean up spare ampersands; note that we probably ought to be
00113 # more careful about named entities.
00114 '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&'
00115 );
00116 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
00117 }
else {
00118 $fixtags = array(
00119 # french spaces, last one Guillemet-left
00120 '/ (\\?|:|!|\\302\\273)/i' => ' \\1',
00121 # french spaces, Guillemet-right
00122 '/(\\302\\253) /i' => '\\1 ',
00123 '/<center *>/i' => '<div
class=
"center">',
00124 '/<\\/center *>/i' => '</div>'
00125 );
00126 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
00127 }
00128
# only once and last
00129
$text = $this->doBlockLevels( $text, $linestart );
00130 $text = $this->unstripNoWiki( $text, $this->mStripState );
00131
if(
$wgUseTidy) {
00132 $text = $this->tidy($text);
00133 }
00134 $this->mOutput->setText( $text );
00135
wfProfileOut( $fname );
00136
return $this->mOutput;
00137 }
00138
00139 function getRandomString() {
00140
return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
00141 }
00142
00143
# Replaces all occurrences of <$tag>content</$tag> in the text
00144
# with a random marker and returns the new text. the output parameter
00145
# $content will be an associative array filled with data on the form
00146
# $unique_marker => content.
00147
00148
# If $content is already set, the additional entries will be appended
00149
00150
# If $tag is set to STRIP_COMMENTS, the function will extract
00151
# <!-- HTML comments -->
00152
00153 function extractTags($tag, $text, &$content, $uniq_prefix =
""){
00154 $rnd = $uniq_prefix .
'-' . $tag . Parser::getRandomString();
00155
if ( !$content ) {
00156 $content = array( );
00157 }
00158
$n = 1;
00159 $stripped = '';
00160
00161
while ( '' != $text ) {
00162
if($tag==
STRIP_COMMENTS) {
00163 $p = preg_split( '/<!--/i', $text, 2 );
00164 }
else {
00165 $p = preg_split(
"/<\\s*$tag\\s*>/i", $text, 2 );
00166 }
00167 $stripped .= $p[0];
00168
if ( ( count( $p ) < 2 ) || ( '' == $p[1] ) ) {
00169 $text = '';
00170 }
else {
00171
if($tag==
STRIP_COMMENTS) {
00172 $q = preg_split( '/-->/i', $p[1], 2 );
00173 }
else {
00174 $q = preg_split(
"/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
00175 }
00176 $marker = $rnd . sprintf('%08X', $n++);
00177 $content[$marker] = $q[0];
00178 $stripped .= $marker;
00179 $text = $q[1];
00180 }
00181 }
00182
return $stripped;
00183 }
00184
00185
# Strips and renders <nowiki>, <pre>, <math>, <hiero>
00186
# If $render is set, performs necessary rendering operations on plugins
00187
# Returns the text, and fills an array with data needed in unstrip()
00188
# If the $state is already a valid strip state, it adds to the state
00189
00190
# When $stripcomments is set, HTML comments <!-- like this -->
00191
# will be stripped in addition to other tags. This is important
00192
# for section editing, where these comments cause confusion when
00193
# counting the sections in the wikisource
00194
function strip( $text, &$state, $stripcomments =
false ) {
00195 $render = ($this->mOutputType ==
OT_HTML);
00196 $nowiki_content = array();
00197 $math_content = array();
00198 $pre_content = array();
00199 $comment_content = array();
00200 $ext_content = array();
00201
00202
# Replace any instances of the placeholders
00203
$uniq_prefix =
UNIQ_PREFIX;
00204
#$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
00205
00206
00207
# nowiki
00208
$text = Parser::extractTags('nowiki', $text, $nowiki_content, $uniq_prefix);
00209 foreach( $nowiki_content as $marker => $content ){
00210
if( $render ){
00211 $nowiki_content[$marker] =
wfEscapeHTMLTagsOnly( $content );
00212 }
else {
00213 $nowiki_content[$marker] =
"<nowiki>$content</nowiki>";
00214 }
00215 }
00216
00217
# math
00218
$text = Parser::extractTags('math', $text, $math_content, $uniq_prefix);
00219 foreach( $math_content as $marker => $content ){
00220
if( $render ) {
00221
if( $this->mOptions->getUseTeX() ) {
00222 $math_content[$marker] =
renderMath( $content );
00223 }
else {
00224 $math_content[$marker] =
"<math>$content<math>";
00225 }
00226 }
else {
00227 $math_content[$marker] =
"<math>$content</math>";
00228 }
00229 }
00230
00231
# pre
00232
$text = Parser::extractTags('pre', $text, $pre_content, $uniq_prefix);
00233 foreach( $pre_content as $marker => $content ){
00234
if( $render ){
00235 $pre_content[$marker] = '<pre>' .
wfEscapeHTMLTagsOnly( $content ) . '</pre>';
00236 }
else {
00237 $pre_content[$marker] =
"<pre>$content</pre>";
00238 }
00239 }
00240
00241
# Comments
00242
if($stripcomments) {
00243 $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix);
00244 foreach( $comment_content as $marker => $content ){
00245 $comment_content[$marker] =
"<!--$content-->";
00246 }
00247 }
00248
00249
# Extensions
00250
foreach ( $this->mTagHooks as $tag => $callback ) {
00251 $ext_contents[$tag] = array();
00252 $text = Parser::extractTags( $tag, $text, $ext_content[$tag], $uniq_prefix );
00253 foreach( $ext_content[$tag] as $marker => $content ) {
00254
if ( $render ) {
00255 $ext_content[$tag][$marker] = $callback( $content );
00256 }
else {
00257 $ext_content[$tag][$marker] =
"<$tag>$content</$tag>";
00258 }
00259 }
00260 }
00261
00262
# Merge state with the pre-existing state, if there is one
00263
if ( $state ) {
00264 $state['nowiki'] = $state['nowiki'] + $nowiki_content;
00265 $state['math'] = $state['math'] + $math_content;
00266 $state['pre'] = $state['pre'] + $pre_content;
00267 $state['comment'] = $state['comment'] + $comment_content;
00268
00269 foreach( $ext_content as $tag => $array ) {
00270
if ( array_key_exists( $tag, $state ) ) {
00271 $state[$tag] = $state[$tag] + $array;
00272 }
00273 }
00274 }
else {
00275 $state = array(
00276 'nowiki' => $nowiki_content,
00277 'math' => $math_content,
00278 'pre' => $pre_content,
00279 'comment' => $comment_content,
00280 ) + $ext_content;
00281 }
00282
return $text;
00283 }
00284
00285
# always call unstripNoWiki() after this one
00286
function unstrip( $text, &$state ) {
00287
# Must expand in reverse order, otherwise nested tags will be corrupted
00288
$contentDict = end( $state );
00289
for ( $contentDict = end( $state ); $contentDict !==
false; $contentDict = prev( $state ) ) {
00290
if( key($state) != 'nowiki') {
00291
for ( $content = end( $contentDict ); $content !==
false; $content = prev( $contentDict ) ) {
00292 $text = str_replace( key( $contentDict ), $content, $text );
00293 }
00294 }
00295 }
00296
00297
return $text;
00298 }
00299
# always call this after unstrip() to preserve the order
00300
function unstripNoWiki( $text, &$state ) {
00301
# Must expand in reverse order, otherwise nested tags will be corrupted
00302
for ( $content = end($state['nowiki']); $content !==
false; $content = prev( $state['nowiki'] ) ) {
00303 $text = str_replace( key( $state['nowiki'] ), $content, $text );
00304 }
00305
00306
return $text;
00307 }
00308
00309
# Add an item to the strip state
00310
# Returns the unique tag which must be inserted into the stripped text
00311
# The tag will be replaced with the original text in unstrip()
00312
00313 function insertStripItem( $text, &$state ) {
00314 $rnd =
UNIQ_PREFIX . '-item' . Parser::getRandomString();
00315
if ( !$state ) {
00316 $state = array(
00317 'nowiki' => array(),
00318 'math' => array(),
00319 'pre' => array()
00320 );
00321 }
00322 $state['item'][$rnd] = $text;
00323
return $rnd;
00324 }
00325
00326
# categoryMagic
00327
# generate a list of subcategories and pages for a category
00328
# depending on wfMsg("usenewcategorypage") it either calls the new
00329
# or the old code. The new code will not work properly for some
00330
# languages due to sorting issues, so they might want to turn it
00331
# off.
00332
function categoryMagic() {
00333 $msg =
wfMsg('usenewcategorypage');
00334
if (
'0' == @$msg[0] )
00335 {
00336
return $this->oldCategoryMagic();
00337 }
else {
00338
return $this->newCategoryMagic();
00339 }
00340 }
00341
00342
# This method generates the list of subcategories and pages for a category
00343
function oldCategoryMagic () {
00344 global
$wgLang ,
$wgUser ;
00345
if ( !$this->mOptions->getUseCategoryMagic() )
return ; # Doesn't use categories at all
00346
00347 $cns =
Namespace::getCategory() ;
00348
if ( $this->mTitle->getNamespace() != $cns )
return "" ; # This ain't a category
page
00349
00350 $r =
"<br style=\"clear:both;\"/>\n";
00351
00352
00353 $sk =&
$wgUser->getSkin() ;
00354
00355 $articles = array() ;
00356 $children = array() ;
00357 $data = array () ;
00358 $id = $this->mTitle->getArticleID() ;
00359
00360
# FIXME: add limits
00361
$t =
wfStrencode( $this->mTitle->getDBKey() );
00362
$sql =
"SELECT DISTINCT cur_title,cur_namespace FROM cur,categorylinks WHERE cl_to='$t' AND cl_from=cur_id ORDER BY cl_sortkey" ;
00363
$res =
wfQuery ( $sql, DB_READ ) ;
00364
while ( $x =
wfFetchObject ( $res ) ) $data[] = $x ;
00365
00366
# For all pages that link to this category
00367
foreach ( $data AS $x )
00368 {
00369 $t =
$wgLang->getNsText ( $x->cur_namespace ) ;
00370
if ( $t !=
"" ) $t .=
":" ;
00371 $t .= $x->cur_title ;
00372
00373
if ( $x->cur_namespace == $cns ) {
00374 array_push ( $children , $sk->makeLink ( $t ) ) ; # Subcategory
00375 }
else {
00376 array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in
this category
00377 }
00378 }
00379
wfFreeResult ( $res ) ;
00380
00381
# Showing subcategories
00382
if ( count ( $children ) > 0 ) {
00383 $r .= '<h2>'.wfMsg('subcategories').
"</h2>\n" ;
00384 $r .= implode ( ', ' , $children ) ;
00385 }
00386
00387
# Showing pages in this category
00388
if ( count ( $articles ) > 0 ) {
00389 $ti = $this->mTitle->getText() ;
00390 $h =
wfMsg( 'category_header', $ti );
00391 $r .=
"<h2>{$h}</h2>\n" ;
00392 $r .= implode ( ', ' , $articles ) ;
00393 }
00394
00395
00396
return $r ;
00397 }
00398
00399
00400
00401 function newCategoryMagic () {
00402 global
$wgLang ,
$wgUser ;
00403
if ( !$this->mOptions->getUseCategoryMagic() )
return ; # Doesn't use categories at all
00404
00405 $cns =
Namespace::getCategory() ;
00406
if ( $this->mTitle->getNamespace() != $cns )
return '' ; # This ain't a category
page
00407
00408 $r =
"<br style=\"clear:both;\"/>\n";
00409
00410
00411 $sk =&
$wgUser->getSkin() ;
00412
00413 $articles = array() ;
00414 $articles_start_char = array();
00415 $children = array() ;
00416 $children_start_char = array();
00417 $data = array () ;
00418 $id = $this->mTitle->getArticleID() ;
00419
00420
# FIXME: add limits
00421
$t =
wfStrencode( $this->mTitle->getDBKey() );
00422
$sql =
"SELECT DISTINCT cur_title,cur_namespace,cl_sortkey FROM
00423
cur,categorylinks WHERE cl_to='$t' AND cl_from=cur_id ORDER BY
00424
cl_sortkey" ;
00425
$res =
wfQuery ( $sql, DB_READ ) ;
00426
while ( $x =
wfFetchObject ( $res ) )
00427 {
00428 $t = $ns =
$wgLang->getNsText ( $x->cur_namespace ) ;
00429
if ( $t != '' ) $t .=
':' ;
00430 $t .= $x->cur_title ;
00431
00432
if ( $x->cur_namespace == $cns ) {
00433 $ctitle = str_replace(
'_',
' ',$x->cur_title );
00434 array_push ( $children, $sk->makeKnownLink ( $t, $ctitle ) ) ; # Subcategory
00435
00436
00437
00438
00439
00440
if ( ($ns.
":".$ctitle) == $x->cl_sortkey ) {
00441 array_push ( $children_start_char, $wgLang->firstChar( $x->cur_title ) );
00442 }
else {
00443 array_push ( $children_start_char, $wgLang->firstChar( $x->cl_sortkey ) ) ;
00444 }
00445 }
else {
00446 array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in
this category
00447 array_push ( $articles_start_char, $wgLang->firstChar( $x->cl_sortkey ) ) ;
00448 }
00449 }
00450
wfFreeResult ( $res ) ;
00451
00452 $ti = $this->mTitle->getText() ;
00453
00454
# Don't show subcategories section if there are none.
00455
if ( count ( $children ) > 0 )
00456 {
00457
# Showing subcategories
00458
$r .= '<h2>' .
wfMsg( 'subcategories' ) .
"</h2>\n"
00459 .
wfMsg( 'subcategorycount', count( $children ) );
00460
if ( count ( $children ) > 6 ) {
00461
00462
00463 $chunk = (
int) (count ( $children ) / 3);
00464
00465
00466 $r .= '<table width=
"100%"><tr valign=
"top">';
00467
00468 $startChunk = 0;
00469 $endChunk = $chunk;
00470
00471
00472
for($startChunk = 0, $endChunk = $chunk, $chunkIndex = 0;
00473 $chunkIndex < 3;
00474 $chunkIndex++, $startChunk = $endChunk, $endChunk += $chunk + 1)
00475 {
00476
00477 $r .= '<td><ul>';
00478
00479
for ($index = $startChunk ;
00480 $index < $endChunk && $index < count($children);
00481 $index++ )
00482 {
00483
00484
if ( ($children_start_char[$index] != $children_start_char[$index - 1])
00485 || ($index == $startChunk) )
00486 {
00487 $r .=
"</ul><h3>{$children_start_char[$index]}</h3>\n<ul>";
00488 }
00489
00490 $r .=
"<li>{$children[$index]}</li>";
00491 }
00492 $r .= '</ul></td>';
00493
00494
00495 }
00496 $r .= '</tr></table>';
00497 }
else {
00498
00499
00500 $r .=
"<h3>{$children_start_char[0]}</h3>\n";
00501 $r .= '<ul><li>'.$children[0].'</li>';
00502
for ($index = 1; $index < count($children); $index++ )
00503 {
00504
if ($children_start_char[$index] != $children_start_char[$index - 1])
00505 {
00506 $r .=
"</ul><h3>{$children_start_char[$index]}</h3>\n<ul>";
00507 }
00508
00509 $r .=
"<li>{$children[$index]}</li>";
00510 }
00511 $r .= '</ul>';
00512 }
00513 } # END of
if ( count($children) > 0 )
00514
00515 $r .= '<h2>' .
wfMsg( 'category_header', $ti ) .
"</h2>\n" .
00516
wfMsg( 'categoryarticlecount', count( $articles ) );
00517
00518
# Showing articles in this category
00519
if ( count ( $articles ) > 6) {
00520 $ti = $this->mTitle->getText() ;
00521
00522
00523 $chunk = (
int) (count ( $articles ) / 3);
00524
00525
00526 $r .= '<table width=
"100%"><tr valign=
"top">';
00527
00528
00529
for($startChunk = 0, $endChunk = $chunk, $chunkIndex = 0;
00530 $chunkIndex < 3;
00531 $chunkIndex++, $startChunk = $endChunk, $endChunk += $chunk + 1)
00532 {
00533
00534 $r .= '<td><ul>';
00535
00536
00537
for ($index = $startChunk ;
00538 $index < $endChunk && $index < count($articles);
00539 $index++ )
00540 {
00541
00542
if ( ($articles_start_char[$index] != $articles_start_char[$index - 1])
00543 || ($index == $startChunk) )
00544 {
00545 $r .=
"</ul><h3>{$articles_start_char[$index]}</h3>\n<ul>";
00546 }
00547
00548 $r .=
"<li>{$articles[$index]}</li>";
00549 }
00550 $r .= '</ul></td>';
00551
00552
00553 }
00554 $r .= '</tr></table>';
00555 } elseif ( count ( $articles ) > 0) {
00556
00557 $ti = $this->mTitle->getText() ;
00558
00559 $r .= '<h3>'.$articles_start_char[0].
"</h3>\n";
00560 $r .= '<ul><li>'.$articles[0].'</li>';
00561
for ($index = 1; $index < count($articles); $index++ )
00562 {
00563
if ($articles_start_char[$index] != $articles_start_char[$index - 1])
00564 {
00565 $r .=
"</ul><h3>{$articles_start_char[$index]}</h3>\n<ul>";
00566 }
00567
00568 $r .=
"<li>{$articles[$index]}</li>";
00569 }
00570 $r .= '</ul>';
00571 }
00572
00573
00574
return $r ;
00575 }
00576
00577
# Return allowed HTML attributes
00578
function getHTMLattrs () {
00579 $htmlattrs = array( # Allowed attributes--no scripting, etc.
00580 'title', 'align', 'lang', 'dir', 'width', 'height',
00581 'bgcolor', 'clear', 'noshade',
00582 'cite', 'size', 'face', 'color',
00583 'type', 'start', 'value', 'compact',
00584
00585 'summary', 'width', 'border', 'frame', 'rules',
00586 'cellspacing', 'cellpadding', 'valign', '
char',
00587 'charoff', 'colgroup', 'col', 'span', 'abbr', 'axis',
00588 'headers', 'scope', 'rowspan', 'colspan',
00589 '
id', '
class', 'name', 'style'
00590 );
00591
return $htmlattrs ;
00592 }
00593
00594
# Remove non approved attributes and javascript in css
00595
function fixTagAttributes ( $t ) {
00596
if ( trim ( $t ) == '' )
return '' ; # Saves runtime ;-)
00597 $htmlattrs = $this->getHTMLattrs() ;
00598
00599
# Strip non-approved attributes from the tag
00600
$t = preg_replace(
00601 '/(\\w+)(\\s*=\\s*([^\\s\
">]+|\"[^\">]*\"))?/e',
00602
"(in_array(strtolower(\
"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
00603 $t);
00604 # Strip javascript
"expression" from stylesheets. Brute force approach:
00605 # If anythin offensive is found, all attributes of the HTML tag are dropped
00606
00607
if( preg_match(
00608 '/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is',
00609
wfMungeToUtf8( $t ) ) )
00610 {
00611 $t='';
00612 }
00613
00614
return trim ( $t ) ;
00615 }
00616
00617 # interface with html tidy, used
if $wgUseTidy =
true
00618 function tidy ( $text ) {
00619 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
00620 global $wgInputEncoding, $wgOutputEncoding;
00621 $fname = 'Parser::tidy';
00622
wfProfileIn( $fname );
00623
00624 $cleansource = '';
00625
switch(strtoupper($wgOutputEncoding)) {
00626
case 'ISO-8859-1':
00627 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -latin1
':' -raw';
00628
break;
00629
case 'UTF-8':
00630 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -utf8
':' -raw';
00631
break;
00632
default:
00633 $wgTidyOpts .= ' -raw';
00634 }
00635
00636 $wrappedtext = '<!DOCTYPE html PUBLIC
"-//W3C//DTD XHTML 1.0 Transitional//EN"'.
00637 '
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
00638 '<head><title>test</title></head><body>'.$text.'</body></html>';
00639 $descriptorspec = array(
00640 0 => array('pipe',
'r'),
00641 1 => array('pipe',
'w'),
00642 2 => array('file', '/dev/null',
'a')
00643 );
00644 $process = proc_open(
"$wgTidyBin -config $wgTidyConf $wgTidyOpts", $descriptorspec, $pipes);
00645
if (is_resource($process)) {
00646 fwrite($pipes[0], $wrappedtext);
00647 fclose($pipes[0]);
00648
while (!feof($pipes[1])) {
00649 $cleansource .= fgets($pipes[1], 1024);
00650 }
00651 fclose($pipes[1]);
00652 $return_value = proc_close($process);
00653 }
00654
00655
wfProfileOut( $fname );
00656
00657
if( $cleansource == '' && $text != '') {
00658
wfDebug(
"Tidy error detected!\n" );
00659
return $text .
"\n<!-- Tidy found serious XHTML errors -->\n";
00660 }
else {
00661
return $cleansource;
00662 }
00663 }
00664
00665 # parse the wiki syntax used to render tables
00666 function doTableStuff ( $t ) {
00667 $t = explode (
"\n" , $t ) ;
00668 $td = array () ; # Is currently a td tag open?
00669 $ltd = array () ; # Was it TD or TH?
00670 $tr = array () ; # Is currently a tr tag open?
00671 $ltr = array () ; # tr attributes
00672 foreach ( $t AS $k => $x )
00673 {
00674 $x = trim ( $x ) ;
00675 $fc = substr ( $x , 0 , 1 ) ;
00676
if ( '{|' == substr ( $x , 0 , 2 ) )
00677 {
00678 $t[$k] =
"\n<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) .
'>' ;
00679 array_push ( $td ,
false ) ;
00680 array_push ( $ltd , '' ) ;
00681 array_push ( $tr ,
false ) ;
00682 array_push ( $ltr , '' ) ;
00683 }
00684
else if ( count ( $td ) == 0 ) { } # Don't
do any of the
following
00685
else if ( '|}' == substr ( $x , 0 , 2 ) )
00686 {
00687 $z =
"</table>\n" ;
00688 $l = array_pop ( $ltd ) ;
00689
if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
00690
if ( array_pop ( $td ) ) $z =
"</{$l}>" . $z ;
00691 array_pop ( $ltr ) ;
00692 $t[$k] = $z ;
00693 }
00694
00695
00696
00697
00698
00699
else if ( '|-' == substr ( $x , 0 , 2 ) ) # Allows
for |---------------
00700 {
00701 $x = substr ( $x , 1 ) ;
00702
while ( $x != '' && substr ( $x , 0 , 1 ) ==
'-' ) $x = substr ( $x , 1 ) ;
00703 $z = '' ;
00704 $l = array_pop ( $ltd ) ;
00705
if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
00706
if ( array_pop ( $td ) ) $z =
"</{$l}>" . $z ;
00707 array_pop ( $ltr ) ;
00708 $t[$k] = $z ;
00709 array_push ( $tr ,
false ) ;
00710 array_push ( $td ,
false ) ;
00711 array_push ( $ltd , '' ) ;
00712 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
00713 }
00714
else if (
'|' == $fc ||
'!' == $fc || '|+' == substr ( $x , 0 , 2 ) ) # Caption
00715 {
00716
if ( '|+' == substr ( $x , 0 , 2 ) )
00717 {
00718 $fc =
'+' ;
00719 $x = substr ( $x , 1 ) ;
00720 }
00721 $after = substr ( $x , 1 ) ;
00722
if ( $fc ==
'!' ) $after = str_replace ( '!!' , '||' , $after ) ;
00723 $after = explode ( '||' , $after ) ;
00724 $t[$k] = '' ;
00725 foreach ( $after AS $theline )
00726 {
00727 $z = '' ;
00728
if ( $fc !=
'+' )
00729 {
00730 $tra = array_pop ( $ltr ) ;
00731
if ( !array_pop ( $tr ) ) $z =
"<tr {$tra}>\n" ;
00732 array_push ( $tr ,
true ) ;
00733 array_push ( $ltr , '' ) ;
00734 }
00735
00736 $l = array_pop ( $ltd ) ;
00737
if ( array_pop ( $td ) ) $z =
"</{$l}>" . $z ;
00738
if ( $fc ==
'|' ) $l = 'td' ;
00739
else if ( $fc ==
'!' ) $l = 'th' ;
00740
else if ( $fc ==
'+' ) $l = 'caption' ;
00741
else $l = '' ;
00742 array_push ( $ltd , $l ) ;
00743 $y = explode (
'|' , $theline , 2 ) ;
00744
if ( count ( $y ) == 1 ) $y =
"{$z}<{$l}>{$y[0]}" ;
00745
else $y = $y =
"{$z}<{$l} ".$this->fixTagAttributes($y[0]).
">{$y[1]}" ;
00746 $t[$k] .= $y ;
00747 array_push ( $td ,
true ) ;
00748 }
00749 }
00750 }
00751
00752
# Closing open td, tr && table
00753
while ( count ( $td ) > 0 )
00754 {
00755
if ( array_pop ( $td ) ) $t[] = '</td>' ;
00756
if ( array_pop ( $tr ) ) $t[] = '</tr>' ;
00757 $t[] = '</table>' ;
00758 }
00759
00760 $t = implode (
"\n" , $t ) ;
00761
# $t = $this->removeHTMLtags( $t );
00762
return $t ;
00763 }
00764
00765
# Parses the text and adds the result to the strip state
00766
# Returns the strip tag
00767
function stripParse( $text, $newline, $args )
00768 {
00769 $text = $this->strip( $text, $this->mStripState );
00770 $text = $this->internalParse( $text, (
bool)$newline, $args,
false );
00771
return $newline.$this->insertStripItem( $text, $this->mStripState );
00772 }
00773
00774 function internalParse( $text, $linestart, $args = array(), $isMain=
true ) {
00775 $fname = 'Parser::internalParse';
00776
wfProfileIn( $fname );
00777
00778 $text = $this->removeHTMLtags( $text );
00779 $text = $this->replaceVariables( $text, $args );
00780
00781 $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
00782
00783 $text = $this->doHeadings( $text );
00784
if($this->mOptions->getUseDynamicDates()) {
00785 global
$wgDateFormatter;
00786 $text =
$wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
00787 }
00788 $text = $this->doAllQuotes( $text );
00789
00790 $text = $this->replaceExternalLinks( $text );
00791 $text = $this->replaceInternalLinks ( $text );
00792 $text = $this->replaceInternalLinks ( $text );
00793
00794 $text = $this->doTableStuff ( $text ) ;
00795 $text = $this->magicISBN( $text );
00796 $text = $this->magicRFC( $text );
00797 $text = $this->formatHeadings( $text, $isMain );
00798 $sk =& $this->mOptions->getSkin();
00799 $text = $sk->transformContent( $text );
00800
00801
if ( !isset ( $this->categoryMagicDone ) ) {
00802 $text .= $this->categoryMagic () ;
00803 $this->categoryMagicDone =
true ;
00804 }
00805
00806
wfProfileOut( $fname );
00807
return $text;
00808 }
00809
00810
# Parse ^^ tokens and return html
00811
function doExponent ( $text )
00812 {
00813 $fname = 'Parser::doExponent';
00814
wfProfileIn( $fname);
00815 $text = preg_replace('/\^\^(.*)\^\^/
','<small><sup>\\1</sup></small>', $text);
00816
wfProfileOut( $fname);
00817
return $text;
00818 }
00819
00820
# Parse headers and return html
00821
function doHeadings( $text ) {
00822 $fname = 'Parser::doHeadings';
00823
wfProfileIn( $fname );
00824
for (
$i = 6;
$i >= 1; --
$i ) {
00825 $h = substr( '======', 0, $i );
00826 $text = preg_replace(
"/^{$h}(.+){$h}(\\s|$)/m",
00827
"<h{$i}>\\1</h{$i}>\\2", $text );
00828 }
00829
wfProfileOut( $fname );
00830
return $text;
00831 }
00832
00833 function doAllQuotes( $text ) {
00834 $fname = 'Parser::doAllQuotes';
00835
wfProfileIn( $fname );
00836 $outtext = '';
00837
$lines = explode(
"\n", $text );
00838 foreach ( $lines as $line ) {
00839 $outtext .= $this->doQuotes ( '', $line, '' ) .
"\n";
00840 }
00841 $outtext = substr($outtext, 0,-1);
00842
wfProfileOut( $fname );
00843
return $outtext;
00844 }
00845
00846 function doQuotes( $pre, $text, $mode ) {
00847
if ( preg_match(
"/^(.*)''(.*)$/sU", $text, $m ) ) {
00848 $m1_strong = ($m[1] ==
"") ?
"" :
"<strong>{$m[1]}</strong>";
00849 $m1_em = ($m[1] ==
"") ?
"" :
"<em>{$m[1]}</em>";
00850
if ( substr ($m[2], 0, 1) ==
'\'' ) {
00851 $m[2] = substr ($m[2], 1);
00852
if ($mode == 'em') {
00853
return $this->doQuotes ( $m[1], $m[2], ($m[1] == '') ? 'both' : 'emstrong' );
00854 }
else if ($mode == 'strong') {
00855
return $m1_strong . $this->doQuotes ( '', $m[2], '' );
00856 }
else if (($mode == 'emstrong') || ($mode == 'both')) {
00857
return $this->doQuotes ( '', $pre.$m1_strong.$m[2], 'em' );
00858 }
else if ($mode == 'strongem') {
00859
return "<strong>{$pre}{$m1_em}</strong>" . $this->doQuotes ( '', $m[2], 'em' );
00860 }
else {
00861
return $m[1] . $this->doQuotes ( '', $m[2], 'strong' );
00862 }
00863 }
else {
00864
if ($mode == 'strong') {
00865
return $this->doQuotes ( $m[1], $m[2], ($m[1] == '') ? 'both' : 'strongem' );
00866 }
else if ($mode == 'em') {
00867
return $m1_em . $this->doQuotes ( '', $m[2], '' );
00868 }
else if ($mode == 'emstrong') {
00869
return "<em>{$pre}{$m1_strong}</em>" . $this->doQuotes ( '', $m[2], 'strong' );
00870 }
else if (($mode == 'strongem') || ($mode == 'both')) {
00871
return $this->doQuotes ( '', $pre.$m1_em.$m[2], 'strong' );
00872 }
else {
00873
return $m[1] . $this->doQuotes ( '', $m[2], 'em' );
00874 }
00875 }
00876 }
else {
00877 $text_strong = ($text == '') ? '' :
"<strong>{$text}</strong>";
00878 $text_em = ($text == '') ? '' :
"<em>{$text}</em>";
00879
if ($mode == '') {
00880
return $pre . $text;
00881 }
else if ($mode == 'em') {
00882
return $pre . $text_em;
00883 }
else if ($mode == 'strong') {
00884
return $pre . $text_strong;
00885 }
else if ($mode == 'strongem') {
00886
return (($pre == '') && ($text == '')) ? '' :
"<strong>{$pre}{$text_em}</strong>";
00887 }
else {
00888
return (($pre == '') && ($text == '')) ? '' :
"<em>{$pre}{$text_strong}</em>";
00889 }
00890 }
00891 }
00892
00893
# Note: we have to do external links before the internal ones,
00894
# and otherwise take great care in the order of things here, so
00895
# that we don't end up interpreting some URLs twice.
00896
00897 function replaceExternalLinks( $text ) {
00898 $fname = 'Parser::replaceExternalLinks';
00899
wfProfileIn( $fname );
00900 $text = $this->subReplaceExternalLinks( $text, 'http',
true );
00901 $text = $this->subReplaceExternalLinks( $text, 'https',
true );
00902 $text = $this->subReplaceExternalLinks( $text, 'ftp',
false );
00903 $text = $this->subReplaceExternalLinks( $text, 'irc',
false );
00904 $text = $this->subReplaceExternalLinks( $text, 'gopher',
false );
00905 $text = $this->subReplaceExternalLinks( $text, 'news',
false );
00906 $text = $this->subReplaceExternalLinks( $text, 'mailto',
false );
00907
wfProfileOut( $fname );
00908
return $text;
00909 }
00910
00911 function subReplaceExternalLinks( $s, $protocol, $autonumber ) {
00912 $unique = '4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3';
00913 $uc =
"A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
00914
00915
# this is the list of separators that should be ignored if they
00916
# are the last character of an URL but that should be included
00917
# if they occur within the URL, e.g. "go to www.foo.com, where .."
00918
# in this case, the last comma should not become part of the URL,
00919
# but in "www.foo.com/123,2342,32.htm" it should.
00920
$sep =
",;\.:";
00921 $fnc = 'A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF';
00922 $images = 'gif|png|jpg|jpeg';
00923
00924
# PLEASE NOTE: The curly braces { } are not part of the regex,
00925
# they are interpreted as part of the string (used to tell PHP
00926
# that the content of the string should be inserted there).
00927
$e1 =
"/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
00928
"((?i){$images})([^{$uc}]|$)/";
00929
00930 $e2 =
"/(^|[^\\[])({$protocol}:)(([".$uc.
"]|[".
$sep.
"][".$uc.
"])+)([^". $uc .
$sep.
"]|[".$sep.
"]|$)/";
00931 $sk =& $this->mOptions->getSkin();
00932
00933
if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only
for HTTP urls
00934
$s = preg_replace( $e1, '\\1' . $sk->makeImage(
"{$unique}:\\3" .
00935 '/\\4.\\5', '\\4.\\5' ) . '\\6',
$s );
00936 }
00937
$s = preg_replace( $e2, '\\1' .
"<a href=\"{$unique}:\\3\"" .
00938 $sk->getExternalLinkAttributes(
"{$unique}:\\3",
wfEscapeHTML(
00939
"{$unique}:\\3" ) ) .
">" .
wfEscapeHTML(
"{$unique}:\\3" ) .
00940 '</a>\\5',
$s );
00941
$s = str_replace( $unique, $protocol, $s );
00942
00943 $a = explode(
"[{$protocol}:",
" " . $s );
00944
$s = array_shift( $a );
00945
$s = substr( $s, 1 );
00946
00947 $e1 =
"/^([{$uc}".
"{$sep}]+)](.*)\$/sD";
00948 $e2 =
"/^([{$uc}".
"{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
00949
00950 foreach ( $a as $line ) {
00951
if ( preg_match( $e1, $line, $m ) ) {
00952 $link =
"{$protocol}:{$m[1]}";
00953 $trail = $m[2];
00954
if ( $autonumber ) { $text =
"[" . ++$this->mAutonumber .
"]"; }
00955
else { $text =
wfEscapeHTML( $link ); }
00956 }
else if ( preg_match( $e2, $line, $m ) ) {
00957 $link =
"{$protocol}:{$m[1]}";
00958 $text = $m[2];
00959 $trail = $m[3];
00960 }
else {
00961
$s .=
"[{$protocol}:" .
$line;
00962
continue;
00963 }
00964
if( $link == $text || preg_match(
"!$protocol://" . preg_quote( $text,
"/" ) .
"/?$!", $link ) ) {
00965 $paren = '';
00966 }
else {
00967
# Expand the URL for printable version
00968
$paren =
"<span class='urlexpansion'> (<i>" . htmlspecialchars ( $link ) .
"</i>)</span>";
00969 }
00970 $la = $sk->getExternalLinkAttributes( $link, $text );
00971
$s .=
"<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
00972
00973 }
00974
return $s;
00975 }
00976
00977
00978 function replaceInternalLinks( $s ) {
00979 global
$wgLang,
$wgLinkCache;
00980 global
$wgNamespacesWithSubpages,
$wgLanguageCode;
00981
static $fname = 'Parser::replaceInternalLinks' ;
00982
wfProfileIn( $fname );
00983
00984
wfProfileIn( $fname.'-setup' );
00985
static $tc = FALSE;
00986
# the % is needed to support urlencoded titles as well
00987
if ( !$tc ) { $tc = Title::legalChars() . '#%'; }
00988 $sk =& $this->mOptions->getSkin();
00989
00990 $a = explode( '[[',
' ' . $s );
00991
$s = array_shift( $a );
00992
$s = substr( $s, 1 );
00993
00994
# Match a link having the form [[namespace:link|alternate]]trail
00995
static $e1 = FALSE;
00996
if ( !$e1 ) { $e1 =
"/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
00997
# Match the end of a line for a word that's not followed by whitespace,
00998
# e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
00999
static $e2 = '/^(.*?)([a-zA-Z\x80-\xff]+)$/sD';
01000
01001 $useLinkPrefixExtension =
$wgLang->linkPrefixExtension();
01002
# Special and Media are pseudo-namespaces; no pages actually exist in them
01003
static $image = FALSE;
01004
static $special = FALSE;
01005
static $media = FALSE;
01006
static $category = FALSE;
01007
if ( !$image ) { $image =
Namespace::getImage(); }
01008
if ( !$special ) { $special =
Namespace::getSpecial(); }
01009
if ( !$media ) { $media =
Namespace::getMedia(); }
01010
if ( !$category ) { $category =
Namespace::getCategory(); }
01011
01012 $nottalk = !
Namespace::isTalk( $this->mTitle->getNamespace() );
01013
01014
if ( $useLinkPrefixExtension ) {
01015
if ( preg_match( $e2, $s, $m ) ) {
01016 $first_prefix = $m[2];
01017
$s = $m[1];
01018 }
else {
01019 $first_prefix =
false;
01020 }
01021 }
else {
01022 $prefix = '';
01023 }
01024
01025
wfProfileOut( $fname.'-setup' );
01026
01027 foreach ( $a as $line ) {
01028
wfProfileIn( $fname.'-prefixhandling' );
01029
if ( $useLinkPrefixExtension ) {
01030
if ( preg_match( $e2, $s, $m ) ) {
01031 $prefix = $m[2];
01032
$s = $m[1];
01033 }
else {
01034 $prefix='';
01035 }
01036
# first link
01037
if($first_prefix) {
01038 $prefix = $first_prefix;
01039 $first_prefix =
false;
01040 }
01041 }
01042
wfProfileOut( $fname.'-prefixhandling' );
01043
01044
if ( preg_match( $e1, $line, $m ) ) { #
page with normal text or alt
01045 $text = $m[2];
01046
# fix up urlencoded title texts
01047
if(preg_match('/%/', $m[1] )) $m[1] = urldecode($m[1]);
01048 $trail = $m[3];
01049 }
else { # Invalid form; output directly
01050
$s .= $prefix . '[[' .
$line ;
01051
continue;
01052 }
01053
01054
01055
01056
01057
01058
01059
01060 $c = substr($m[1],0,1);
01061 $noforce = ($c !=
':');
01062
if( $c ==
'/' ) { # subpage
01063
if(substr($m[1],-1,1)==
'/') { # / at end means we don't
want the slash to be shown
01064 $m[1]=substr($m[1],1,strlen($m[1])-2);
01065 $noslash=$m[1];
01066 }
else {
01067 $noslash=substr($m[1],1);
01068 }
01069
if(!empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()])) { # subpages allowed here
01070 $link = $this->mTitle->getPrefixedText().
'/' . trim($noslash);
01071
if( '' == $text ) {
01072 $text= $m[1];
01073 } #
this might be changed
for ugliness reasons
01074 }
else {
01075 $link = $noslash; # no subpage allowed, use standard link
01076 }
01077 } elseif( $noforce ) { # no subpage
01078 $link = $m[1];
01079 }
else {
01080 $link = substr( $m[1], 1 );
01081 }
01082 $wasblank = ( '' == $text );
01083
if( $wasblank )
01084 $text = $link;
01085
01086 $nt = Title::newFromText( $link );
01087
if( !$nt ) {
01088
$s .= $prefix . '[[' .
$line;
01089
continue;
01090 }
01091 $ns = $nt->getNamespace();
01092 $iw = $nt->getInterWiki();
01093
if( $noforce ) {
01094
if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk &&
$wgLang->getLanguageName( $iw ) ) {
01095 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
01096 $tmp = $prefix . $trail ;
01097
$s .= (trim($tmp) == '')? '': $tmp;
01098
continue;
01099 }
01100
if ( $ns == $image ) {
01101
$s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
01102
$wgLinkCache->addImageLinkObj( $nt );
01103
continue;
01104 }
01105
if ( $ns == $category ) {
01106 $t = $nt->getText() ;
01107 $nnt = Title::newFromText ( Namespace::getCanonicalName($category).
":".$t ) ;
01108
01109
$wgLinkCache->suspend(); # Don't save in links/brokenlinks
01110 $t = $sk->makeLinkObj( $nnt, $t, '', '' , $prefix );
01111
$wgLinkCache->resume();
01112
01113 $sortkey = $wasblank ? $this->mTitle->getPrefixedText() : $text;
01114
$wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
01115 $this->mOutput->mCategoryLinks[] = $t ;
01116
$s .= $prefix . $trail ;
01117
continue;
01118 }
01119 }
01120
if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
01121 ( strpos( $link,
'#' ) == FALSE ) ) {
01122
# Self-links are handled specially; generally de-link and change to bold.
01123
$s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, '', $trail );
01124
continue;
01125 }
01126
01127
if( $ns == $media ) {
01128
$s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
01129
$wgLinkCache->addImageLinkObj( $nt );
01130
continue;
01131 } elseif( $ns == $special ) {
01132
$s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, '', $trail );
01133
continue;
01134 }
01135
$s .= $sk->makeLinkObj( $nt, $text, '', $trail, $prefix );
01136 }
01137
wfProfileOut( $fname );
01138
return $s;
01139 }
01140
01141
# Some functions here used by doBlockLevels()
01142
#
01143
function closeParagraph() {
01144 $result = '';
01145
if ( '' != $this->mLastSection ) {
01146 $result = '</' . $this->mLastSection .
">\n";
01147 }
01148 $this->mInPre =
false;
01149 $this->mLastSection = '';
01150
return $result;
01151 }
01152
# getCommon() returns the length of the longest common substring
01153
# of both arguments, starting at the beginning of both.
01154
#
01155
function getCommon( $st1, $st2 ) {
01156 $fl = strlen( $st1 );
01157 $shorter = strlen( $st2 );
01158
if ( $fl < $shorter ) { $shorter = $fl; }
01159
01160
for (
$i = 0;
$i < $shorter; ++
$i ) {
01161
if ( $st1{
$i} != $st2{
$i} ) {
break; }
01162 }
01163
return $i;
01164 }
01165
# These next three functions open, continue, and close the list
01166
# element appropriate to the prefix character passed into them.
01167
#
01168
function openList( $
char )
01169 {
01170 $result = $this->closeParagraph();
01171
01172
if (
'*' == $char ) { $result .= '<ul><li>'; }
01173
else if (
'#' == $char ) { $result .= '<ol><li>'; }
01174
else if (
':' == $char ) { $result .= '<dl><dd>'; }
01175
else if (
';' == $char ) {
01176 $result .= '<dl><dt>';
01177 $this->mDTopen =
true;
01178 }
01179
else { $result = '<!-- ERR 1 -->'; }
01180
01181
return $result;
01182 }
01183
01184 function nextItem( $
char ) {
01185
if (
'*' == $char ||
'#' == $char ) {
return '</li><li>'; }
01186
else if (
':' == $char ||
';' == $char ) {
01187 $close =
"</dd>";
01188
if ( $this->mDTopen ) { $close = '</dt>'; }
01189
if (
';' == $char ) {
01190 $this->mDTopen =
true;
01191
return $close . '<dt>';
01192 }
else {
01193 $this->mDTopen =
false;
01194
return $close . '<dd>';
01195 }
01196 }
01197
return '<!-- ERR 2 -->';
01198 }
01199
01200 function closeList( $
char ) {
01201
if (
'*' == $char ) { $text = '</li></ul>'; }
01202
else if (
'#' == $char ) { $text = '</li></ol>'; }
01203
else if (
':' == $char ) {
01204
if ( $this->mDTopen ) {
01205 $this->mDTopen =
false;
01206 $text = '</dt></dl>';
01207 }
else {
01208 $text = '</dd></dl>';
01209 }
01210 }
01211
else {
return '<!-- ERR 3 -->'; }
01212
return $text.
"\n";
01213 }
01214
01215 function doBlockLevels( $text, $linestart ) {
01216 $fname = 'Parser::doBlockLevels';
01217
wfProfileIn( $fname );
01218
01219
# Parsing through the text line by line. The main thing
01220
# happening here is handling of block-level elements p, pre,
01221
# and making lists from lines starting with * # : etc.
01222
#
01223
$textLines = explode(
"\n", $text );
01224
01225 $lastPrefix =
$output = $lastLine = '';
01226 $this->mDTopen = $inBlockElem =
false;
01227 $prefixLength = 0;
01228 $paragraphStack =
false;
01229
01230
if ( !$linestart ) {
01231
$output .= array_shift( $textLines );
01232 }
01233 foreach ( $textLines as $oLine ) {
01234 $lastPrefixLength = strlen( $lastPrefix );
01235 $preCloseMatch = preg_match(
"/<\\/pre/i", $oLine );
01236 $preOpenMatch = preg_match(
"/<pre/i", $oLine );
01237
if (!$this->mInPre) {
01238 $this->mInPre = !empty($preOpenMatch);
01239 }
01240
if ( !$this->mInPre ) {
01241
# Multiple prefixes may abut each other for nested lists.
01242
$prefixLength = strspn( $oLine, '*#:;' );
01243 $pref = substr( $oLine, 0, $prefixLength );
01244
01245
# eh?
01246
$pref2 = str_replace(
';',
':', $pref );
01247 $t = substr( $oLine, $prefixLength );
01248 }
else {
01249
# Don't interpret any other prefixes in preformatted text
01250
$prefixLength = 0;
01251 $pref = $pref2 = '';
01252 $t = $oLine;
01253 }
01254
01255
# List generation
01256
if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
01257
# Same as the last item, so no need to deal with nesting or opening stuff
01258
$output .= $this->nextItem( substr( $pref, -1 ) );
01259 $paragraphStack =
false;
01260
01261
if (
";" == substr( $pref, -1 ) ) {
01262
# The one nasty exception: definition lists work like this:
01263
# ; title : definition text
01264
# So we check for : in the remainder text to split up the
01265
# title and definition, without b0rking links.
01266
# FIXME: This is not foolproof. Something better in Tokenizer might help.
01267
if( preg_match( '/^(.*?(?:\s| )):(.*)$/', $t, $match ) ) {
01268 $term = $match[1];
01269
$output .= $term . $this->nextItem(
':' );
01270 $t = $match[2];
01271 }
01272 }
01273 } elseif( $prefixLength || $lastPrefixLength ) {
01274
# Either open or close a level...
01275
$commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
01276 $paragraphStack =
false;
01277
01278
while( $commonPrefixLength < $lastPrefixLength ) {
01279
$output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
01280 --$lastPrefixLength;
01281 }
01282
if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
01283
$output .= $this->nextItem( $pref{$commonPrefixLength-1} );
01284 }
01285
while ( $prefixLength > $commonPrefixLength ) {
01286 $char = substr( $pref, $commonPrefixLength, 1 );
01287
$output .= $this->openList( $
char );
01288
01289
if (
';' == $char ) {
01290
# FIXME: This is dupe of code above
01291
if( preg_match( '/^(.*?(?:\s| )):(.*)$/', $t, $match ) ) {
01292 $term = $match[1];
01293
$output .= $term . $this->nextItem(
":" );
01294 $t = $match[2];
01295 }
01296 }
01297 ++$commonPrefixLength;
01298 }
01299 $lastPrefix = $pref2;
01300 }
01301
if( 0 == $prefixLength ) {
01302
# No prefix (not in list)--go to paragraph mode
01303
$uniq_prefix =
UNIQ_PREFIX;
01304
01305 $openmatch = preg_match('/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<li|<\\/tr|<\\/td|<\\/th)/i', $t );
01306 $closematch = preg_match(
01307 '/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'.
01308 '<td|<th|<div|<\\/div|<hr|<\\/pre|<\\/p|'.$uniq_prefix.'-pre|<\\/li|<\\/ul)/i', $t );
01309
if ( $openmatch or $closematch ) {
01310 $paragraphStack =
false;
01311
$output .= $this->closeParagraph();
01312
if($preOpenMatch and !$preCloseMatch) {
01313 $this->mInPre =
true;
01314 }
01315
if ( $closematch ) {
01316 $inBlockElem =
false;
01317 }
else {
01318 $inBlockElem =
true;
01319 }
01320 }
else if ( !$inBlockElem && !$this->mInPre ) {
01321
if (
" " == $t{0} and trim($t) != '' ) {
01322
01323
if ($this->mLastSection != 'pre') {
01324 $paragraphStack =
false;
01325
$output .= $this->closeParagraph().'<pre>';
01326 $this->mLastSection = 'pre';
01327 }
01328 }
else {
01329
01330
if ( '' == trim($t) ) {
01331
if ( $paragraphStack ) {
01332
$output .= $paragraphStack.'<br />';
01333 $paragraphStack =
false;
01334 $this->mLastSection =
'p';
01335 }
else {
01336
if ($this->mLastSection !=
'p' ) {
01337
$output .= $this->closeParagraph();
01338 $this->mLastSection = '';
01339 $paragraphStack = '<p>';
01340 }
else {
01341 $paragraphStack = '</p><p>';
01342 }
01343 }
01344 }
else {
01345
if ( $paragraphStack ) {
01346
$output .= $paragraphStack;
01347 $paragraphStack =
false;
01348 $this->mLastSection =
'p';
01349 }
else if ($this->mLastSection !=
'p') {
01350
$output .= $this->closeParagraph().'<p>';
01351 $this->mLastSection =
'p';
01352 }
01353 }
01354 }
01355 }
01356 }
01357
if ($paragraphStack ===
false) {
01358
$output .= $t.
"\n";
01359 }
01360 }
01361
while ( $prefixLength ) {
01362
$output .= $this->closeList( $pref2{$prefixLength-1} );
01363 --$prefixLength;
01364 }
01365
if ( '' != $this->mLastSection ) {
01366
$output .= '</' . $this->mLastSection .
'>';
01367 $this->mLastSection = '';
01368 }
01369
01370
wfProfileOut( $fname );
01371
return $output;
01372 }
01373
01374
# Return value of a magic variable (like PAGENAME)
01375
function getVariableValue( $index ) {
01376 global
$wgLang,
$wgSitename, $wgServer;
01377
01378
switch ( $index ) {
01379
case MAG_CURRENTMONTH:
01380
return date(
'm' );
01381
case MAG_CURRENTMONTHNAME:
01382
return $wgLang->getMonthName( date(
'n') );
01383
case MAG_CURRENTMONTHNAMEGEN:
01384
return $wgLang->getMonthNameGen( date(
'n') );
01385
case MAG_CURRENTDAY:
01386
return date(
'j');
01387
case MAG_PAGENAME:
01388
return $this->mTitle->getText();
01389
case MAG_NAMESPACE:
01390
# return Namespace::getCanonicalName($this->mTitle->getNamespace());
01391
return $wgLang->getNsText($this->mTitle->getNamespace());
01392
case MAG_CURRENTDAYNAME:
01393
return $wgLang->getWeekdayName( date(
'w')+1 );
01394
case MAG_CURRENTYEAR:
01395
return date(
'Y' );
01396
case MAG_CURRENTTIME:
01397
return $wgLang->time(
wfTimestampNow(),
false );
01398
case MAG_NUMBEROFARTICLES:
01399
return wfNumberOfArticles();
01400
case MAG_SITENAME:
01401
return $wgSitename;
01402
case MAG_SERVER:
01403
return $wgServer;
01404
default:
01405
return NULL;
01406 }
01407 }
01408
01409
# initialise the magic variables (like CURRENTMONTHNAME)
01410
function initialiseVariables() {
01411 global $wgVariableIDs;
01412 $this->mVariables = array();
01413 foreach ( $wgVariableIDs as $
id ) {
01414 $mw =& MagicWord::get( $
id );
01415 $mw->addToArray( $this->mVariables, $this->getVariableValue( $
id ) );
01416 }
01417 }
01418
01419 function replaceVariables( $text, $args = array() ) {
01420 global
$wgLang,
$wgScript,
$wgArticlePath;
01421
01422 $fname = 'Parser::replaceVariables';
01423
wfProfileIn( $fname );
01424
01425 $bail =
false;
01426
if ( !$this->mVariables ) {
01427 $this->initialiseVariables();
01428 }
01429 $titleChars = Title::legalChars();
01430 $nonBraceChars = str_replace( array(
'{',
'}' ), array( '', '' ), $titleChars );
01431
01432
# This function is called recursively. To keep track of arguments we need a stack:
01433
array_push( $this->mArgStack, $args );
01434
01435
# PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
01436
$GLOBALS['wgCurParser'] =& $this;
01437
01438
01439
if ( $this->mOutputType ==
OT_HTML ) {
01440
# Variable substitution
01441
$text = preg_replace_callback(
"/{{([$nonBraceChars]*?)}}/", 'wfVariableSubstitution', $text );
01442
01443
# Argument substitution
01444
$text = preg_replace_callback(
"/(\\n?){{{([$titleChars]*?)}}}/", 'wfArgSubstitution', $text );
01445 }
01446
# Template substitution
01447
$regex = '/(\\
n?){{(['.$nonBraceChars.']*)(\\|.*?|)}}/s';
01448 $text = preg_replace_callback( $regex, 'wfBraceSubstitution', $text );
01449
01450 array_pop( $this->mArgStack );
01451
01452
wfProfileOut( $fname );
01453
return $text;
01454 }
01455
01456 function variableSubstitution( $matches ) {
01457
if ( array_key_exists( $matches[1], $this->mVariables ) ) {
01458 $text = $this->mVariables[$matches[1]];
01459 $this->mOutput->mContainsOldMagic =
true;
01460 }
else {
01461 $text = $matches[0];
01462 }
01463
return $text;
01464 }
01465
01466 function braceSubstitution( $matches ) {
01467 global
$wgLinkCache,
$wgLang;
01468 $fname = 'Parser::braceSubstitution';
01469 $found =
false;
01470 $nowiki =
false;
01471 $noparse =
false;
01472
01473
$title = NULL;
01474
01475
# $newline is an optional newline character before the braces
01476
# $part1 is the bit before the first |, and must contain only title characters
01477
# $args is a list of arguments, starting from index 0, not including $part1
01478
01479 $newline = $matches[1];
01480 $part1 = $matches[2];
01481
# If the third subpattern matched anything, it will start with |
01482
if ( $matches[3] !== '' ) {
01483
$args = explode(
'|', substr( $matches[3], 1 ) );
01484 }
else {
01485
$args = array();
01486 }
01487 $argc = count( $args );
01488
01489
# {{{}}}
01490
if ( strpos( $matches[0], '{{{' ) !==
false ) {
01491 $text = $matches[0];
01492 $found =
true;
01493 $noparse =
true;
01494 }
01495
01496
# SUBST
01497
if ( !$found ) {
01498 $mwSubst =& MagicWord::get( MAG_SUBST );
01499
if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
01500
if ( $this->mOutputType !=
OT_WIKI ) {
01501
# Invalid SUBST not replaced at PST time
01502
# Return without further processing
01503
$text = $matches[0];
01504 $found =
true;
01505 $noparse=
true;
01506 }
01507 } elseif ( $this->mOutputType == OT_WIKI ) {
01508
# SUBST not found in PST pass, do nothing
01509
$text = $matches[0];
01510 $found =
true;
01511 }
01512 }
01513
01514
# MSG, MSGNW and INT
01515
if ( !$found ) {
01516
# Check for MSGNW:
01517
$mwMsgnw =& MagicWord::get( MAG_MSGNW );
01518
if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
01519 $nowiki =
true;
01520 }
else {
01521
# Remove obsolete MSG:
01522
$mwMsg =& MagicWord::get( MAG_MSG );
01523 $mwMsg->matchStartAndRemove( $part1 );
01524 }
01525
01526
# Check if it is an internal message
01527
$mwInt =& MagicWord::get( MAG_INT );
01528
if ( $mwInt->matchStartAndRemove( $part1 ) ) {
01529
if ( $this->incrementIncludeCount( '
int:'.$part1 ) ) {
01530 $text =
wfMsgReal( $part1, $args,
true );
01531 $found =
true;
01532 }
01533 }
01534 }
01535
01536
# NS
01537
if ( !$found ) {
01538
# Check for NS: (namespace expansion)
01539
$mwNs = MagicWord::get( MAG_NS );
01540
if ( $mwNs->matchStartAndRemove( $part1 ) ) {
01541
if ( intval( $part1 ) ) {
01542 $text =
$wgLang->getNsText( intval( $part1 ) );
01543 $found =
true;
01544 }
else {
01545 $index =
Namespace::getCanonicalIndex( strtolower( $part1 ) );
01546
if ( !is_null( $index ) ) {
01547 $text =
$wgLang->getNsText( $index );
01548 $found =
true;
01549 }
01550 }
01551 }
01552 }
01553
01554
# LOCALURL and LOCALURLE
01555
if ( !$found ) {
01556 $mwLocal = MagicWord::get( MAG_LOCALURL );
01557 $mwLocalE = MagicWord::get( MAG_LOCALURLE );
01558
01559
if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
01560 $func = 'getLocalURL';
01561 } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
01562 $func = 'escapeLocalURL';
01563 }
else {
01564 $func = '';
01565 }
01566
01567
if ( $func !== '' ) {
01568
$title = Title::newFromText( $part1 );
01569
if ( !is_null( $title ) ) {
01570
if ( $argc > 0 ) {
01571 $text =
$title->$func( $args[0] );
01572 }
else {
01573 $text =
$title->$func();
01574 }
01575 $found =
true;
01576 }
01577 }
01578 }
01579
01580
# Internal variables
01581
if ( !$found && array_key_exists( $part1, $this->mVariables ) ) {
01582 $text = $this->mVariables[$part1];
01583 $found =
true;
01584 $this->mOutput->mContainsOldMagic =
true;
01585 }
01586
01587
01588
01589
01590
01591
01592
01593
01594
# Load from database
01595
if ( !$found ) {
01596
$title = Title::newFromText( $part1, NS_TEMPLATE );
01597
if ( !is_null( $title ) && !
$title->isExternal() ) {
01598
# Check for excessive inclusion
01599
$dbk =
$title->getPrefixedDBkey();
01600
if ( $this->incrementIncludeCount( $dbk ) ) {
01601 $article =
new Article( $title );
01602 $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
01603
if ( $articleContent !==
false ) {
01604 $found =
true;
01605 $text = $articleContent;
01606
01607 }
01608 }
01609
01610
# If the title is valid but undisplayable, make a link to it
01611
if ( $this->mOutputType ==
OT_HTML && !$found ) {
01612 $text = '[[' .
$title->getPrefixedText() . ']]';
01613 $found =
true;
01614 }
01615 }
01616 }
01617
01618
# Recursive parsing, escaping and link table handling
01619
# Only for HTML output
01620
if ( $nowiki && $found && $this->mOutputType ==
OT_HTML ) {
01621 $text =
wfEscapeWikiText( $text );
01622 } elseif ( $this->mOutputType == OT_HTML && $found && !$noparse) {
01623
# Clean up argument array
01624
$assocArgs = array();
01625 $index = 1;
01626 foreach( $args as $arg ) {
01627 $eqpos = strpos( $arg,
'=' );
01628
if ( $eqpos ===
false ) {
01629 $assocArgs[$index++] = $arg;
01630 }
else {
01631 $name = trim( substr( $arg, 0, $eqpos ) );
01632 $value = trim( substr( $arg, $eqpos+1 ) );
01633
if ( $value ===
false ) {
01634 $value = '';
01635 }
01636
if ( $name !==
false ) {
01637 $assocArgs[$name] = $value;
01638 }
01639 }
01640 }
01641
01642
# Do not enter included links in link table
01643
if ( !is_null( $title ) ) {
01644
$wgLinkCache->suspend();
01645 }
01646
01647
# Run full parser on the included text
01648
$text = $this->stripParse( $text, $newline, $assocArgs );
01649
01650
# Resume the link cache and register the inclusion as a link
01651
if ( !is_null( $title ) ) {
01652
$wgLinkCache->resume();
01653
$wgLinkCache->addLinkObj( $title );
01654 }
01655 }
01656
01657
if ( !$found ) {
01658
return $matches[0];
01659 }
else {
01660
return $text;
01661 }
01662 }
01663
01664
# Triple brace replacement -- used for template arguments
01665
function argSubstitution( $matches ) {
01666 $newline = $matches[1];
01667 $arg = trim( $matches[2] );
01668 $text = $matches[0];
01669 $inputArgs = end( $this->mArgStack );
01670
01671
if ( array_key_exists( $arg, $inputArgs ) ) {
01672 $text = $this->stripParse( $inputArgs[$arg], $newline, array() );
01673 }
01674
01675
return $text;
01676 }
01677
01678
# Returns true if the function is allowed to include this entity
01679
function incrementIncludeCount( $dbk ) {
01680
if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
01681 $this->mIncludeCount[$dbk] = 0;
01682 }
01683
if ( ++$this->mIncludeCount[$dbk] <=
MAX_INCLUDE_REPEAT ) {
01684
return true;
01685 }
else {
01686
return false;
01687 }
01688 }
01689
01690
01691
# Cleans up HTML, removes dangerous tags and attributes
01692
function removeHTMLtags( $text ) {
01693 global
$wgUseTidy,
$wgUserHtml;
01694 $fname = 'Parser::removeHTMLtags';
01695
wfProfileIn( $fname );
01696
01697
if(
$wgUserHtml ) {
01698 $htmlpairs = array( # Tags that must be closed
01699
'b', 'del',
'i', 'ins',
'u', 'font', 'big', 'small', 'sub', 'sup', 'h1',
01700 'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em',
's',
01701 'strike', 'strong', 'tt', 'var', 'div', 'center',
01702 'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre',
01703 'ruby', 'rt' , 'rb' , 'rp',
'p'
01704 );
01705 $htmlsingle = array(
01706 'br', 'hr', 'li', 'dt', 'dd'
01707 );
01708 $htmlnest = array( # Tags that can be nested--??
01709 'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul',
01710 'dl', 'font', 'big', 'small', 'sub', 'sup'
01711 );
01712 $tabletags = array( # Can only appear inside table
01713 'td', 'th', 'tr'
01714 );
01715 }
else {
01716 $htmlpairs = array();
01717 $htmlsingle = array();
01718 $htmlnest = array();
01719 $tabletags = array();
01720 }
01721
01722 $htmlsingle = array_merge( $tabletags, $htmlsingle );
01723 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
01724
01725 $htmlattrs = $this->getHTMLattrs () ;
01726
01727
# Remove HTML comments
01728
$text = preg_replace( '/(\\n *<!--.*--> *(?=\\n)|<!--.*-->)/sU', '$2', $text );
01729
01730
$bits = explode(
'<', $text );
01731 $text = array_shift( $bits );
01732
if(!
$wgUseTidy) {
01733 $tagstack = array(); $tablestack = array();
01734 foreach ( $bits as $x ) {
01735 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
01736 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
01737 $x, $regs );
01738 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
01739 error_reporting( $prev );
01740
01741 $badtag = 0 ;
01742
if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
01743
# Check our stack
01744
if ( $slash ) {
01745
# Closing a tag...
01746
if ( ! in_array( $t, $htmlsingle ) &&
01747 ( $ot = @array_pop( $tagstack ) ) != $t ) {
01748 @array_push( $tagstack, $ot );
01749 $badtag = 1;
01750 }
else {
01751
if ( $t == 'table' ) {
01752 $tagstack = array_pop( $tablestack );
01753 }
01754 $newparams = '';
01755 }
01756 }
else {
01757
# Keep track for later
01758
if ( in_array( $t, $tabletags ) &&
01759 ! in_array( 'table', $tagstack ) ) {
01760 $badtag = 1;
01761 }
else if ( in_array( $t, $tagstack ) &&
01762 ! in_array ( $t , $htmlnest ) ) {
01763 $badtag = 1 ;
01764 }
else if ( ! in_array( $t, $htmlsingle ) ) {
01765
if ( $t == 'table' ) {
01766 array_push( $tablestack, $tagstack );
01767 $tagstack = array();
01768 }
01769 array_push( $tagstack, $t );
01770 }
01771
# Strip non-approved attributes from the tag
01772
$newparams = $this->fixTagAttributes($params);
01773
01774 }
01775
if ( ! $badtag ) {
01776 $rest = str_replace(
'>', '>', $rest );
01777 $text .=
"<$slash$t $newparams$brace$rest";
01778
continue;
01779 }
01780 }
01781 $text .= '<' . str_replace(
'>', '>', $x);
01782 }
01783
# Close off any remaining tags
01784
while ( is_array( $tagstack ) && ($t = array_pop( $tagstack )) ) {
01785 $text .=
"</$t>\n";
01786
if ( $t == 'table' ) { $tagstack = array_pop( $tablestack ); }
01787 }
01788 }
else {
01789
# this might be possible using tidy itself
01790
foreach ( $bits as $x ) {
01791 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
01792 $x, $regs );
01793 @list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
01794
if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
01795 $newparams = $this->fixTagAttributes($params);
01796 $rest = str_replace(
'>', '>', $rest );
01797 $text .=
"<$slash$t $newparams$brace$rest";
01798 }
else {
01799 $text .= '<' . str_replace(
'>', '>', $x);
01800 }
01801 }
01802 }
01803
wfProfileOut( $fname );
01804
return $text;
01805 }
01806
01807
01808
01809
01810
01811
01812
01813
01814
01815
01816
01817
01818
01819
01820
01821 function formatHeadings( $text, $isMain=
true ) {
01822 global
$wgInputEncoding;
01823
01824 $doNumberHeadings = $this->mOptions->getNumberHeadings();
01825 $doShowToc = $this->mOptions->getShowToc();
01826
if( !$this->mTitle->userCanEdit() ) {
01827 $showEditLink = 0;
01828 $rightClickHack = 0;
01829 }
else {
01830 $showEditLink = $this->mOptions->getEditSection();
01831 $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
01832 }
01833
01834
# Inhibit editsection links if requested in the page
01835
$esw =& MagicWord::get( MAG_NOEDITSECTION );
01836
if( $esw->matchAndRemove( $text ) ) {
01837 $showEditLink = 0;
01838 }
01839
# if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
01840
# do not add TOC
01841
$mw =& MagicWord::get( MAG_NOTOC );
01842
if( $mw->matchAndRemove( $text ) ) {
01843 $doShowToc = 0;
01844 }
01845
01846
# never add the TOC to the Main Page. This is an entry page that should not
01847
# be more than 1-2 screens large anyway
01848
if( $this->mTitle->getPrefixedText() ==
wfMsg('mainpage') ) {
01849 $doShowToc = 0;
01850 }
01851
01852
# Get all headlines for numbering them and adding funky stuff like [edit]
01853
# links - this is for later, but we need the number of headlines right now
01854
$numMatches = preg_match_all( '/<H([1-6])(.*?' . '>)(.*?)<\/H[1-6]>/i', $text, $matches );
01855
01856
# if there are fewer than 4 headlines in the article, do not show TOC
01857
if( $numMatches < 4 ) {
01858 $doShowToc = 0;
01859 }
01860
01861
# if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
01862
# override above conditions and always show TOC
01863
$mw =& MagicWord::get( MAG_FORCETOC );
01864
if ($mw->matchAndRemove( $text ) ) {
01865 $doShowToc = 1;
01866 }
01867
01868
01869
# We need this to perform operations on the HTML
01870
$sk =& $this->mOptions->getSkin();
01871
01872
# headline counter
01873
$headlineCount = 0;
01874
01875
# Ugh .. the TOC should have neat indentation levels which can be
01876
# passed to the skin functions. These are determined here
01877
$toclevel = 0;
01878 $toc = '';
01879 $full = '';
01880 $head = array();
01881 $sublevelCount = array();
01882 $level = 0;
01883 $prevlevel = 0;
01884 foreach( $matches[3] as $headline ) {
01885 $numbering = '';
01886
if( $level ) {
01887 $prevlevel = $level;
01888 }
01889 $level = $matches[1][$headlineCount];
01890
if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
01891
# reset when we enter a new level
01892
$sublevelCount[$level] = 0;
01893 $toc .= $sk->tocIndent( $level - $prevlevel );
01894 $toclevel += $level - $prevlevel;
01895 }
01896
if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
01897
# reset when we step back a level
01898
$sublevelCount[$level+1]=0;
01899 $toc .= $sk->tocUnindent( $prevlevel - $level );
01900 $toclevel -= $prevlevel - $level;
01901 }
01902
# count number of headlines for each level
01903
@$sublevelCount[$level]++;
01904
if( $doNumberHeadings || $doShowToc ) {
01905 $dot = 0;
01906
for(
$i = 1;
$i <= $level;
$i++ ) {
01907
if( !empty( $sublevelCount[$i] ) ) {
01908
if( $dot ) {
01909 $numbering .=
'.';
01910 }
01911 $numbering .= $sublevelCount[
$i];
01912 $dot = 1;
01913 }
01914 }
01915 }
01916
01917
# The canonized header is a version of the header text safe to use for links
01918
# Avoid insertion of weird stuff like <math> by expanding the relevant sections
01919
$canonized_headline = $this->unstrip( $headline, $this->mStripState );
01920 $canonized_headline = $this->unstripNoWiki( $headline, $this->mStripState );
01921
01922
# strip out HTML
01923
$canonized_headline = preg_replace( '/<.*?' . '>/
','',$canonized_headline );
01924 $tocline = trim( $canonized_headline );
01925 $canonized_headline = urlencode(
do_html_entity_decode( str_replace(
' ',
'_', $tocline), ENT_COMPAT, $wgInputEncoding ) );
01926 $replacearray = array(
01927 '%3A' =>
':',
01928
'%' =>
'.'
01929 );
01930 $canonized_headline = str_replace(array_keys($replacearray),array_values($replacearray),$canonized_headline);
01931 $refer[$headlineCount] = $canonized_headline;
01932
01933
# count how many in assoc. array so we can track dupes in anchors
01934
@$refers[$canonized_headline]++;
01935 $refcount[$headlineCount]=$refers[$canonized_headline];
01936
01937
# Prepend the number to the heading text
01938
01939
if( $doNumberHeadings || $doShowToc ) {
01940 $tocline = $numbering .
' ' . $tocline;
01941
01942
# Don't number the heading if it is the only one (looks silly)
01943
if( $doNumberHeadings && count( $matches[3] ) > 1) {
01944
# the two are different if the line contains a link
01945
$headline=$numbering .
' ' . $headline;
01946 }
01947 }
01948
01949
# Create the anchor for linking from the TOC to the section
01950
$anchor = $canonized_headline;
01951
if($refcount[$headlineCount] > 1 ) {
01952 $anchor .=
'_' . $refcount[$headlineCount];
01953 }
01954
if( $doShowToc ) {
01955 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
01956 }
01957
if( $showEditLink ) {
01958
if ( empty( $head[$headlineCount] ) ) {
01959 $head[$headlineCount] = '';
01960 }
01961 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
01962 }
01963
01964
# Add the edit section span
01965
if( $rightClickHack ) {
01966 $headline = $sk->editSectionScript($headlineCount+1,$headline);
01967 }
01968
01969
# give headline the correct <h#> tag
01970
@$head[$headlineCount] .=
"<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline.
"</h".$level.
">";
01971
01972 $headlineCount++;
01973 }
01974
01975
if( $doShowToc ) {
01976 $toclines = $headlineCount;
01977 $toc .= $sk->tocUnindent( $toclevel );
01978 $toc = $sk->tocTable( $toc );
01979 }
01980
01981
# split up and insert constructed headlines
01982
01983 $blocks = preg_split( '/<H[1-6].*?' . '>.*?<\/H[1-6]>/i', $text );
01984
$i = 0;
01985
01986 foreach( $blocks as $block ) {
01987
if( $showEditLink && $headlineCount > 0 &&
$i == 0 && $block !=
"\n" ) {
01988
# This is the [edit] link that appears for the top block of text when
01989
# section editing is enabled
01990
01991
# Disabled because it broke block formatting
01992
# For example, a bullet point in the top line
01993
# $full .= $sk->editSectionLink(0);
01994
}
01995 $full .= $block;
01996
if( $doShowToc && !
$i && $isMain) {
01997
# Top anchor now in skin
01998
$full = $full.$toc;
01999 }
02000
02001
if( !empty( $head[$i] ) ) {
02002 $full .= $head[
$i];
02003 }
02004
$i++;
02005 }
02006
02007
return $full;
02008 }
02009
02010
# Return an HTML link for the "ISBN 123456" text
02011
function magicISBN( $text ) {
02012 global
$wgLang;
02013
02014 $a = split( 'ISBN ',
" $text" );
02015
if ( count ( $a ) < 2 )
return $text;
02016 $text = substr( array_shift( $a ), 1);
02017 $valid = '0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ';
02018
02019 foreach ( $a as $x ) {
02020 $isbn = $blank = '' ;
02021
while (
' ' == $x{0} ) {
02022 $blank .=
' ';
02023 $x = substr( $x, 1 );
02024 }
02025
while ( strstr( $valid, $x{0} ) !=
false ) {
02026 $isbn .= $x{0};
02027 $x = substr( $x, 1 );
02028 }
02029 $num = str_replace(
'-', '', $isbn );
02030 $num = str_replace(
' ', '', $num );
02031
02032
if ( '' == $num ) {
02033 $text .=
"ISBN $blank$x";
02034 }
else {
02035 $titleObj = Title::makeTitle( NS_SPECIAL, 'Booksources' );
02036 $text .= '<a href=
"' .
02037
$titleObj->escapeLocalUrl( "isbn={$num}
" ) .
02038
"\
" class=\"internal\">ISBN $isbn</a>";
02039 $text .= $x;
02040 }
02041 }
02042
return $text;
02043 }
02044
02045
# Return an HTML link for the "RFC 1234" text
02046
function magicRFC( $text ) {
02047 global
$wgLang;
02048
02049 $a = split( 'RFC ',
' '.$text );
02050
if ( count ( $a ) < 2 )
return $text;
02051 $text = substr( array_shift( $a ), 1);
02052 $valid = '0123456789';
02053
02054 foreach ( $a as $x ) {
02055 $rfc = $blank = '' ;
02056
while (
' ' == $x{0} ) {
02057 $blank .=
' ';
02058 $x = substr( $x, 1 );
02059 }
02060
while ( strstr( $valid, $x{0} ) !=
false ) {
02061 $rfc .= $x{0};
02062 $x = substr( $x, 1 );
02063 }
02064
02065
if ( '' == $rfc ) {
02066 $text .=
"RFC $blank$x";
02067 }
else {
02068
$url = wfmsg( 'rfcurl' );
02069
$url = str_replace( '$1', $rfc, $url);
02070 $sk =& $this->mOptions->getSkin();
02071 $la = $sk->getExternalLinkAttributes( $url,
"RFC {$rfc}" );
02072 $text .=
"<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
02073 }
02074 }
02075
return $text;
02076 }
02077
02078 function preSaveTransform( $text, &$title, &$user, $options, $clearState =
true ) {
02079 $this->mOptions =
$options;
02080 $this->mTitle =&
$title;
02081 $this->mOutputType =
OT_WIKI;
02082
02083
if ( $clearState ) {
02084 $this->clearState();
02085 }
02086
02087 $stripState =
false;
02088 $pairs = array(
02089
"\r\n" =>
"\n",
02090 );
02091 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
02092
02093
02094
02095
02096
02097
02098
02099
02100 $text = $this->strip( $text, $stripState,
false );
02101 $text = $this->pstPass2( $text, $user );
02102 $text = $this->unstrip( $text, $stripState );
02103 $text = $this->unstripNoWiki( $text, $stripState );
02104
return $text;
02105 }
02106
02107 function pstPass2( $text, &$user ) {
02108 global
$wgLang, $wgLocaltimezone, $wgCurParser;
02109
02110
# Variable replacement
02111
# Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
02112
$text = $this->replaceVariables( $text );
02113
02114
# Signatures
02115
#
02116
$n =
$user->getName();
02117 $k =
$user->getOption( 'nickname' );
02118
if ( '' == $k ) { $k =
$n; }
02119
if(isset($wgLocaltimezone)) {
02120 $oldtz = getenv('TZ'); putenv('TZ='.$wgLocaltimezone);
02121 }
02122
02123
$d =
$wgLang->timeanddate( date( 'YmdHis' ),
false ) .
02124 ' (' . date(
'T' ) .
')';
02125
if(isset($wgLocaltimezone)) putenv('TZ='.$oldtzs);
02126
02127 $text = preg_replace( '/~~~~~/', $d, $text );
02128 $text = preg_replace( '/~~~~/', '[[' . $wgLang->getNsText(
02129 Namespace::getUser() ) .
":$n|$k]] $d", $text );
02130 $text = preg_replace( '/~~~/', '[[' . $wgLang->getNsText(
02131 Namespace::getUser() ) .
":$n|$k]]", $text );
02132
02133
# Context links: [[|name]] and [[name (context)|]]
02134
#
02135
$tc =
"[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
02136 $np =
"[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
02137 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
02138 $conpat =
"/^({$np}+) \\(({$tc}+)\\)$/";
02139
02140 $p1 =
"/\[\[({$np}+) \\(({$np}+)\\)\\|]]/"; # [[
page (context)|]]
02141 $p2 =
"/\[\[\\|({$tc}+)]]/"; # [[|
page]]
02142 $p3 =
"/\[\[($namespacechar+):({$np}+)\\|]]/"; # [[
namespace:
page|]]
02143 $p4 =
"/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
02144
# [[ns:page (cont)|]]
02145
$context =
"";
02146 $t = $this->mTitle->getText();
02147
if ( preg_match( $conpat, $t, $m ) ) {
02148 $context = $m[2];
02149 }
02150 $text = preg_replace( $p4, '[[\\1:\\2 (\\3)|\\2]]', $text );
02151 $text = preg_replace( $p1, '[[\\1 (\\2)|\\1]]', $text );
02152 $text = preg_replace( $p3, '[[\\1:\\2|\\2]]', $text );
02153
02154
if ( '' == $context ) {
02155 $text = preg_replace( $p2, '[[\\1]]', $text );
02156 }
else {
02157 $text = preg_replace( $p2,
"[[\\1 ({$context})|\\1]]", $text );
02158 }
02159
02160
02161
02162
02163
02164
02165
02166
02167
# Trim trailing whitespace
02168
# MAG_END (__END__) tag allows for trailing
02169
# whitespace to be deliberately included
02170
$text = rtrim( $text );
02171 $mw =& MagicWord::get( MAG_END );
02172 $mw->matchAndRemove( $text );
02173
02174
return $text;
02175 }
02176
02177
# Set up some variables which are usually set up in parse()
02178
# so that an external function can call some class members with confidence
02179
function startExternalParse( &$title, $options, $outputType, $clearState =
true ) {
02180 $this->mTitle =&
$title;
02181 $this->mOptions =
$options;
02182 $this->mOutputType = $outputType;
02183
if ( $clearState ) {
02184 $this->clearState();
02185 }
02186 }
02187
02188 function transformMsg( $text, $options ) {
02189 global
$wgTitle;
02190
static $executing =
false;
02191
02192
# Guard against infinite recursion
02193
if ( $executing ) {
02194
return $text;
02195 }
02196 $executing =
true;
02197
02198 $this->mTitle =
$wgTitle;
02199 $this->mOptions =
$options;
02200 $this->mOutputType =
OT_MSG;
02201 $this->clearState();
02202 $text = $this->replaceVariables( $text );
02203
02204 $executing =
false;
02205
return $text;
02206 }
02207
02208
# Create an HTML-style tag, e.g. <yourtag>special text</yourtag>
02209
# Callback will be called with the text within
02210
# Transform and return the text within
02211
function setHook( $tag, $callback ) {
02212 $oldVal = @$this->mTagHooks[$tag];
02213 $this->mTagHooks[$tag] = $callback;
02214
return $oldVal;
02215 }
02216 }
02217
02218
class ParserOutput
02219 {
02220 var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
02221 var $mCacheTime; # Used in
ParserCache
02222
02223 function ParserOutput( $text =
"", $languageLinks = array(), $categoryLinks = array(),
02224 $containsOldMagic =
false )
02225 {
02226 $this->mText = $text;
02227 $this->mLanguageLinks = $languageLinks;
02228 $this->mCategoryLinks = $categoryLinks;
02229 $this->mContainsOldMagic = $containsOldMagic;
02230 $this->mCacheTime =
"";
02231 }
02232
02233 function getText() {
return $this->mText; }
02234 function getLanguageLinks() {
return $this->mLanguageLinks; }
02235 function getCategoryLinks() {
return $this->mCategoryLinks; }
02236 function getCacheTime() {
return $this->mCacheTime; }
02237 function containsOldMagic() {
return $this->mContainsOldMagic; }
02238 function setText( $text ) {
return wfSetVar( $this->mText, $text ); }
02239 function setLanguageLinks( $ll ) {
return wfSetVar( $this->mLanguageLinks, $ll ); }
02240 function setCategoryLinks( $cl ) {
return wfSetVar( $this->mCategoryLinks, $cl ); }
02241 function setContainsOldMagic( $com ) {
return wfSetVar( $this->mContainsOldMagic, $com ); }
02242 function setCacheTime( $t ) {
return wfSetVar( $this->mCacheTime, $t ); }
02243
02244 function merge( $other ) {
02245 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
02246 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
02247 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
02248 }
02249
02250 }
02251
02252
class ParserOptions
02253 {
02254
# All variables are private
02255
var $mUseTeX; # Use texvc to expand <math> tags
02256 var $mUseCategoryMagic; # Treat [[Category:xxxx]] tags specially
02257 var $mUseDynamicDates; # Use
$wgDateFormatter to format dates
02258 var $mInterwikiMagic; # Interlanguage links are removed and returned in an array
02259 var $mAllowExternalImages; # Allow external images
inline
02260 var $mSkin; # Reference to the preferred skin
02261 var $mDateFormat; # Date format index
02262 var $mEditSection; # Create
"edit section" links
02263 var $mEditSectionOnRightClick; # Generate JavaScript to edit section on right click
02264 var $mNumberHeadings; # Automatically number headings
02265 var $mShowToc; # Show table of contents
02266
02267 function getUseTeX() {
return $this->mUseTeX; }
02268 function getUseCategoryMagic() {
return $this->mUseCategoryMagic; }
02269 function getUseDynamicDates() {
return $this->mUseDynamicDates; }
02270 function getInterwikiMagic() {
return $this->mInterwikiMagic; }
02271 function getAllowExternalImages() {
return $this->mAllowExternalImages; }
02272 function getSkin() {
return $this->mSkin; }
02273 function getDateFormat() {
return $this->mDateFormat; }
02274 function getEditSection() {
return $this->mEditSection; }
02275 function getEditSectionOnRightClick() {
return $this->mEditSectionOnRightClick; }
02276 function getNumberHeadings() {
return $this->mNumberHeadings; }
02277 function getShowToc() {
return $this->mShowToc; }
02278
02279 function setUseTeX( $x ) {
return wfSetVar( $this->mUseTeX, $x ); }
02280 function setUseCategoryMagic( $x ) {
return wfSetVar( $this->mUseCategoryMagic, $x ); }
02281 function setUseDynamicDates( $x ) {
return wfSetVar( $this->mUseDynamicDates, $x ); }
02282 function setInterwikiMagic( $x ) {
return wfSetVar( $this->mInterwikiMagic, $x ); }
02283 function setAllowExternalImages( $x ) {
return wfSetVar( $this->mAllowExternalImages, $x ); }
02284 function setDateFormat( $x ) {
return wfSetVar( $this->mDateFormat, $x ); }
02285 function setEditSection( $x ) {
return wfSetVar( $this->mEditSection, $x ); }
02286 function setEditSectionOnRightClick( $x ) {
return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
02287 function setNumberHeadings( $x ) {
return wfSetVar( $this->mNumberHeadings, $x ); }
02288 function setShowToc( $x ) {
return wfSetVar( $this->mShowToc, $x ); }
02289
02290 function setSkin( &$x ) { $this->mSkin =& $x; }
02291
02292 function newFromUser( &$user ) {
02293 $popts =
new ParserOptions;
02294 $popts->initialiseFromUser( $user );
02295
return $popts;
02296 }
02297
02298 function initialiseFromUser( &$userInput ) {
02299 global
$wgUseTeX,
$wgUseCategoryMagic,
$wgUseDynamicDates,
$wgInterwikiMagic,
$wgAllowExternalImages;
02300
02301
if ( !$userInput ) {
02302
$user =
new User;
02303
$user->setLoaded(
true );
02304 }
else {
02305
$user =& $userInput;
02306 }
02307
02308 $this->mUseTeX =
$wgUseTeX;
02309 $this->mUseCategoryMagic =
$wgUseCategoryMagic;
02310 $this->mUseDynamicDates =
$wgUseDynamicDates;
02311 $this->mInterwikiMagic =
$wgInterwikiMagic;
02312 $this->mAllowExternalImages =
$wgAllowExternalImages;
02313 $this->mSkin =&
$user->getSkin();
02314 $this->mDateFormat =
$user->getOption( 'date' );
02315 $this->mEditSection =
$user->getOption( 'editsection' );
02316 $this->mEditSectionOnRightClick =
$user->getOption( 'editsectiononrightclick' );
02317 $this->mNumberHeadings =
$user->getOption( 'numberheadings' );
02318 $this->mShowToc =
$user->getOption( 'showtoc' );
02319 }
02320
02321
02322 }
02323
02324
# Regex callbacks, used in Parser::replaceVariables
02325
function wfBraceSubstitution( $matches )
02326 {
02327 global $wgCurParser;
02328
return $wgCurParser->braceSubstitution( $matches );
02329 }
02330
02331 function wfArgSubstitution( $matches )
02332 {
02333 global $wgCurParser;
02334
return $wgCurParser->argSubstitution( $matches );
02335 }
02336
02337 function wfVariableSubstitution( $matches )
02338 {
02339 global $wgCurParser;
02340
return $wgCurParser->variableSubstitution( $matches );
02341 }
02342
02343 ?>