Main Page | Namespace List | Class Hierarchy | Class List | File List | Class Members | File Members | Related Pages

LanguageUtf8.php

Go to the documentation of this file.
00001 <?php 00002 if( defined( "MEDIAWIKI" ) ) { 00003 00004 $wgInputEncoding = "utf-8"; 00005 $wgOutputEncoding = "utf-8"; 00006 00007 $wikiUpperChars = $wgMemc->get( $key1 = "$wgDBname:utf8:upper" ); 00008 $wikiLowerChars = $wgMemc->get( $key2 = "$wgDBname:utf8:lower" ); 00009 00010 if(empty( $wikiUpperChars) || empty($wikiLowerChars )) { 00011 require_once( "includes/Utf8Case.php" ); 00012 $wgMemc->set( $key1, $wikiUpperChars ); 00013 $wgMemc->set( $key2, $wikiLowerChars ); 00014 } 00015 00016 # Base stuff useful to all UTF-8 based language files 00017 class LanguageUtf8 extends Language { 00018 00019 function ucfirst( $string ) { 00020 # For most languages, this is a wrapper for ucfirst() 00021 # But that doesn't work right in a UTF-8 locale 00022 global $wikiUpperChars; 00023 return preg_replace ( 00024 "/^([\\x00-\\x7f]|[\\xc0-\\xff][\\x80-\\xbf]*)/e", 00025 "strtr ( \"\$1\" , \$wikiUpperChars )", 00026 $string ); 00027 } 00028 00029 function lcfirst( $string ) { 00030 global $wikiLowerChars; 00031 return preg_replace ( 00032 "/^([\\x00-\\x7f]|[\\xc0-\\xff][\\x80-\\xbf]*)/e", 00033 "strtr ( \"\$1\" , \$wikiLowerChars )", 00034 $string ); 00035 } 00036 00037 function stripForSearch( $string ) { 00038 # MySQL fulltext index doesn't grok utf-8, so we 00039 # need to fold cases and convert to hex 00040 global $wikiLowerChars; 00041 return preg_replace( 00042 "/([\\xc0-\\xff][\\x80-\\xbf]*)/e", 00043 "'U8' . bin2hex( strtr( \"\$1\", \$wikiLowerChars ) )", 00044 $string ); 00045 } 00046 00047 function fallback8bitEncoding() { 00048 # Windows codepage 1252 is a superset of iso 8859-1 00049 # override this to use difference source encoding to 00050 # translate incoming 8-bit URLs. 00051 return "windows-1252"; 00052 } 00053 00054 function checkTitleEncoding( $s ) { 00055 global $wgInputEncoding; 00056 00057 # Check for non-UTF-8 URLs 00058 $ishigh = preg_match( '/[\x80-\xff]/', $s); 00059 if(!$ishigh) return $s; 00060 00061 $isutf8 = preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' . 00062 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s ); 00063 if( $isutf8 ) return $s; 00064 00065 return $this->iconv( $this->fallback8bitEncoding(), "utf-8", $s ); 00066 } 00067 00068 function firstChar( $s ) { 00069 preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' . 00070 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})/', $s, $matches); 00071 00072 return isset( $matches[1] ) ? $matches[1] : ""; 00073 } 00074 } 00075 00076 } # ifdef MEDIAWIKI 00077 00078 ?>

Generated on Tue Jun 29 23:40:05 2004 for Mediawiki by doxygen 1.3.7