Merge branch 'utf8refactor' into psr2
* utf8refactor: replaced deprecated utf8 functions formatting cleanup mark old utf8 functions deprecated Some cleanup for the UTF-8 stuff Moved all utf8 methods to their own namespaced classes Create separate table files for UTF-8 handling
This commit is contained in:
commit
b47790f975
|
@ -88,6 +88,7 @@
|
|||
<exclude-pattern>*/inc/Mailer.class.php</exclude-pattern>
|
||||
<exclude-pattern>*/doku.php</exclude-pattern>
|
||||
<exclude-pattern>*/install.php</exclude-pattern>
|
||||
<exclude-pattern>*/inc/utf8.php</exclude-pattern>
|
||||
<exclude-pattern>*/feed.php</exclude-pattern>
|
||||
<exclude-pattern>*/inc/load.php</exclude-pattern>
|
||||
<exclude-pattern>*/bin/*.php</exclude-pattern>
|
||||
|
|
|
@ -124,7 +124,7 @@ class media_searchlist_test extends DokuWikiTest {
|
|||
$info = array();
|
||||
$info['id'] = $this->upload_ns . ':' . $rel_id;
|
||||
$info['perm'] = auth_quickaclcheck(getNS($info['id']).':*');
|
||||
$info['file'] = utf8_basename($file);
|
||||
$info['file'] = \dokuwiki\Utf8\PhpString::basename($file);
|
||||
$info['size'] = filesize($file);
|
||||
$info['mtime'] = filemtime($file);
|
||||
$info['writable'] = is_writable($file);
|
||||
|
|
|
@ -84,8 +84,8 @@ class utf8_basename_test extends DokuWikiTest {
|
|||
);
|
||||
|
||||
foreach($data as $test){
|
||||
$this->assertEquals($test[2], utf8_basename($test[0], $test[1]), "input: ('".$test[0]."', '".$test[1]."')");
|
||||
$this->assertEquals($test[2], \dokuwiki\Utf8\PhpString::basename($test[0], $test[1]), "input: ('".$test[0]."', '".$test[1]."')");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -15,7 +15,7 @@ class utf8_correctidx_test extends DokuWikiTest {
|
|||
$tests[] = array('aaживπά우리をあöä',1,true,1);
|
||||
|
||||
foreach($tests as $test){
|
||||
$this->assertEquals(utf8_correctIdx($test[0],$test[1],$test[2]),$test[3]);
|
||||
$this->assertEquals(\dokuwiki\Utf8\Clean::correctIdx($test[0],$test[1],$test[2]),$test[3]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -33,7 +33,7 @@ class utf8_correctidx_test extends DokuWikiTest {
|
|||
$tests[] = array('aaживπά우리をあöä',4,true,4);
|
||||
|
||||
foreach($tests as $test){
|
||||
$this->assertEquals(utf8_correctIdx($test[0],$test[1],$test[2]),$test[3]);
|
||||
$this->assertEquals(\dokuwiki\Utf8\Clean::correctIdx($test[0],$test[1],$test[2]),$test[3]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -53,7 +53,7 @@ class utf8_correctidx_test extends DokuWikiTest {
|
|||
$tests[] = array('aaживπά우리をあöä',13,true,13);
|
||||
|
||||
foreach($tests as $test){
|
||||
$this->assertEquals(utf8_correctIdx($test[0],$test[1],$test[2]),$test[3]);
|
||||
$this->assertEquals(\dokuwiki\Utf8\Clean::correctIdx($test[0],$test[1],$test[2]),$test[3]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -69,7 +69,7 @@ class utf8_correctidx_test extends DokuWikiTest {
|
|||
$tests[] = array('aaживπά우리をあöä',128,true,29);
|
||||
|
||||
foreach($tests as $test){
|
||||
$this->assertEquals(utf8_correctIdx($test[0],$test[1],$test[2]),$test[3]);
|
||||
$this->assertEquals(\dokuwiki\Utf8\Clean::correctIdx($test[0],$test[1],$test[2]),$test[3]);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -8,61 +8,61 @@ class utf8_html_test extends DokuWikiTest {
|
|||
function test_from_1byte(){
|
||||
$in = 'a';
|
||||
$out = 'a';
|
||||
$this->assertEquals(utf8_tohtml($in),$out);
|
||||
$this->assertEquals(\dokuwiki\Utf8\Conversion::toHtml($in),$out);
|
||||
}
|
||||
|
||||
function test_from_2byte(){
|
||||
$in = "\xc3\xbc";
|
||||
$out = 'ü';
|
||||
$this->assertEquals(utf8_tohtml($in),$out);
|
||||
$this->assertEquals(\dokuwiki\Utf8\Conversion::toHtml($in),$out);
|
||||
}
|
||||
|
||||
function test_from_3byte(){
|
||||
$in = "\xe2\x99\x8a";
|
||||
$out = '♊';
|
||||
$this->assertEquals(utf8_tohtml($in),$out);
|
||||
$this->assertEquals(\dokuwiki\Utf8\Conversion::toHtml($in),$out);
|
||||
}
|
||||
|
||||
function test_from_4byte(){
|
||||
$in = "\xf4\x80\x80\x81";
|
||||
$out = '􀀁';
|
||||
$this->assertEquals(utf8_tohtml($in),$out);
|
||||
$this->assertEquals(\dokuwiki\Utf8\Conversion::toHtml($in),$out);
|
||||
}
|
||||
|
||||
function test_to_1byte(){
|
||||
$out = 'a';
|
||||
$in = 'a';
|
||||
$this->assertEquals(utf8_unhtml($in),$out);
|
||||
$this->assertEquals(\dokuwiki\Utf8\Conversion::fromHtml($in),$out);
|
||||
}
|
||||
|
||||
function test_to_2byte(){
|
||||
$out = "\xc3\xbc";
|
||||
$in = 'ü';
|
||||
$this->assertEquals(utf8_unhtml($in),$out);
|
||||
$this->assertEquals(\dokuwiki\Utf8\Conversion::fromHtml($in),$out);
|
||||
}
|
||||
|
||||
function test_to_3byte(){
|
||||
$out = "\xe2\x99\x8a";
|
||||
$in = '♊';
|
||||
$this->assertEquals(utf8_unhtml($in),$out);
|
||||
$this->assertEquals(\dokuwiki\Utf8\Conversion::fromHtml($in),$out);
|
||||
}
|
||||
|
||||
function test_to_4byte(){
|
||||
$out = "\xf4\x80\x80\x81";
|
||||
$in = '􀀁';
|
||||
$this->assertEquals(utf8_unhtml($in),$out);
|
||||
$this->assertEquals(\dokuwiki\Utf8\Conversion::fromHtml($in),$out);
|
||||
}
|
||||
|
||||
function test_without_entities(){
|
||||
$out = '&#38;&#38;';
|
||||
$in = '&#38;&amp;#38;';
|
||||
$this->assertEquals(utf8_unhtml($in),$out);
|
||||
$this->assertEquals(\dokuwiki\Utf8\Conversion::fromHtml($in),$out);
|
||||
}
|
||||
|
||||
function test_with_entities(){
|
||||
$out = '&&#38;';
|
||||
$in = '&#38;&amp;#38;';
|
||||
$this->assertEquals(utf8_unhtml($in,HTML_ENTITIES),$out);
|
||||
$this->assertEquals(\dokuwiki\Utf8\Conversion::fromHtml($in,HTML_ENTITIES),$out);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -18,7 +18,7 @@ class utf8_romanize_test extends DokuWikiTest {
|
|||
foreach($tests as $test){
|
||||
list($jap,$rom) = explode(';',trim($test));
|
||||
|
||||
$chk = utf8_romanize($jap);
|
||||
$chk = \dokuwiki\Utf8\Clean::romanize($jap);
|
||||
$this->assertEquals($rom,$chk,"$jap\t->\t$chk\t!=\t$rom\t($line)");
|
||||
$line++;
|
||||
}
|
||||
|
@ -31,7 +31,7 @@ class utf8_romanize_test extends DokuWikiTest {
|
|||
* @author Andreas Gohr <andi@splitbrain.org>
|
||||
*/
|
||||
function test_deaccented(){
|
||||
$this->assertEquals("a A a A a o O",utf8_romanize("å Å ä Ä ä ö Ö"));
|
||||
$this->assertEquals("a A a A a o O",\dokuwiki\Utf8\Clean::romanize("å Å ä Ä ä ö Ö"));
|
||||
}
|
||||
}
|
||||
//Setup VIM: ex: et ts=4 :
|
||||
|
|
|
@ -19,7 +19,7 @@ class utf8_stripspecials extends DokuWikiTest {
|
|||
$tests[] = array('string with nbsps','_','\*','string_with_nbsps');
|
||||
|
||||
foreach($tests as $test){
|
||||
$this->assertEquals(utf8_stripspecials($test[0],$test[1],$test[2]),$test[3]);
|
||||
$this->assertEquals(\dokuwiki\Utf8\Clean::stripspecials($test[0],$test[1],$test[2]),$test[3]);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -10,7 +10,7 @@ class utf8_strtolower_test extends DokuWikiTest {
|
|||
);
|
||||
|
||||
foreach($data as $input => $expected) {
|
||||
$this->assertEquals($expected, utf8_strtolower($input));
|
||||
$this->assertEquals($expected, \dokuwiki\Utf8\PhpString::strtolower($input));
|
||||
}
|
||||
|
||||
// just make sure our data was correct
|
||||
|
@ -20,4 +20,4 @@ class utf8_strtolower_test extends DokuWikiTest {
|
|||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,7 +21,7 @@ class utf8_substr_test extends DokuWikiTest {
|
|||
$tests[] = array('живπά우리をあöä',-6,-2,'우리をあ');
|
||||
|
||||
foreach($tests as $test){
|
||||
$this->assertEquals(utf8_substr($test[0],$test[1],$test[2]),$test[3]);
|
||||
$this->assertEquals(\dokuwiki\Utf8\PhpString::substr($test[0],$test[1],$test[2]),$test[3]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -34,7 +34,7 @@ class utf8_substr_test extends DokuWikiTest {
|
|||
$tests[] = array($str,0,66002,$str);
|
||||
|
||||
foreach($tests as $test){
|
||||
$this->assertEquals(utf8_substr($test[0],$test[1],$test[2]),$test[3]);
|
||||
$this->assertEquals(\dokuwiki\Utf8\PhpString::substr($test[0],$test[1],$test[2]),$test[3]);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -8,49 +8,49 @@ class utf8_unicode_test extends DokuWikiTest {
|
|||
function test_from_1byte(){
|
||||
$in = 'a';
|
||||
$out = array(97);
|
||||
$this->assertEquals(utf8_to_unicode($in),$out);
|
||||
$this->assertEquals(\dokuwiki\Utf8\Unicode::fromUtf8($in),$out);
|
||||
}
|
||||
|
||||
function test_from_2byte(){
|
||||
$in = "\xc3\xbc";
|
||||
$out = array(252);
|
||||
$this->assertEquals(utf8_to_unicode($in),$out);
|
||||
$this->assertEquals(\dokuwiki\Utf8\Unicode::fromUtf8($in),$out);
|
||||
}
|
||||
|
||||
function test_from_3byte(){
|
||||
$in = "\xe2\x99\x8a";
|
||||
$out = array(9802);
|
||||
$this->assertEquals(utf8_to_unicode($in),$out);
|
||||
$this->assertEquals(\dokuwiki\Utf8\Unicode::fromUtf8($in),$out);
|
||||
}
|
||||
|
||||
function test_from_4byte(){
|
||||
$in = "\xf4\x80\x80\x81";
|
||||
$out = array(1048577);
|
||||
$this->assertEquals(utf8_to_unicode($in),$out);
|
||||
$this->assertEquals(\dokuwiki\Utf8\Unicode::fromUtf8($in),$out);
|
||||
}
|
||||
|
||||
function test_to_1byte(){
|
||||
$out = 'a';
|
||||
$in = array(97);
|
||||
$this->assertEquals(unicode_to_utf8($in),$out);
|
||||
$this->assertEquals(\dokuwiki\Utf8\Unicode::toUtf8($in),$out);
|
||||
}
|
||||
|
||||
function test_to_2byte(){
|
||||
$out = "\xc3\xbc";
|
||||
$in = array(252);
|
||||
$this->assertEquals(unicode_to_utf8($in),$out);
|
||||
$this->assertEquals(\dokuwiki\Utf8\Unicode::toUtf8($in),$out);
|
||||
}
|
||||
|
||||
function test_to_3byte(){
|
||||
$out = "\xe2\x99\x8a";
|
||||
$in = array(9802);
|
||||
$this->assertEquals(unicode_to_utf8($in),$out);
|
||||
$this->assertEquals(\dokuwiki\Utf8\Unicode::toUtf8($in),$out);
|
||||
}
|
||||
|
||||
function test_to_4byte(){
|
||||
$out = "\xf4\x80\x80\x81";
|
||||
$in = array(1048577);
|
||||
$this->assertEquals(unicode_to_utf8($in),$out);
|
||||
$this->assertEquals(\dokuwiki\Utf8\Unicode::toUtf8($in),$out);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -12,14 +12,14 @@ class utf8_utf16be_test extends DokuWikiTest {
|
|||
* Convert from UTF-8 to UTF-16BE
|
||||
*/
|
||||
function test_to16be(){
|
||||
$this->assertEquals(utf8_to_utf16be($this->utf8), $this->utf16);
|
||||
$this->assertEquals(\dokuwiki\Utf8\Conversion::toUtf16Be($this->utf8), $this->utf16);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert from UTF-16BE to UTF-8
|
||||
*/
|
||||
function test_from16be(){
|
||||
$this->assertEquals(utf16be_to_utf8($this->utf16),$this->utf8);
|
||||
$this->assertEquals(\dokuwiki\Utf8\Conversion::fromUtf16Be($this->utf16),$this->utf8);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -182,7 +182,7 @@ class PageCLI extends CLI {
|
|||
}
|
||||
|
||||
if(empty($localfile)) {
|
||||
$localfile = getcwd() . '/' . utf8_basename($wiki_fn);
|
||||
$localfile = getcwd() . '/' . \dokuwiki\Utf8\PhpString::basename($wiki_fn);
|
||||
}
|
||||
|
||||
if(!file_exists(dirname($localfile))) {
|
||||
|
|
|
@ -47,7 +47,7 @@ class Sitemap extends AbstractAction {
|
|||
if(is_readable($sitemap)) {
|
||||
// Send headers
|
||||
header('Content-Type: ' . $mime);
|
||||
header('Content-Disposition: attachment; filename=' . utf8_basename($sitemap));
|
||||
header('Content-Disposition: attachment; filename=' . \dokuwiki\Utf8\PhpString::basename($sitemap));
|
||||
|
||||
http_conditionalRequest(filemtime($sitemap));
|
||||
|
||||
|
|
|
@ -1302,7 +1302,7 @@ class JpegMeta {
|
|||
function _parseFileInfo() {
|
||||
if (file_exists($this->_fileName) && is_file($this->_fileName)) {
|
||||
$this->_info['file'] = array();
|
||||
$this->_info['file']['Name'] = utf8_decodeFN(utf8_basename($this->_fileName));
|
||||
$this->_info['file']['Name'] = utf8_decodeFN(\dokuwiki\Utf8\PhpString::basename($this->_fileName));
|
||||
$this->_info['file']['Path'] = fullpath($this->_fileName);
|
||||
$this->_info['file']['Size'] = filesize($this->_fileName);
|
||||
if ($this->_info['file']['Size'] < 1024) {
|
||||
|
@ -1393,7 +1393,7 @@ class JpegMeta {
|
|||
}
|
||||
} else {
|
||||
$this->_info['file'] = array();
|
||||
$this->_info['file']['Name'] = utf8_basename($this->_fileName);
|
||||
$this->_info['file']['Name'] = \dokuwiki\Utf8\PhpString::basename($this->_fileName);
|
||||
$this->_info['file']['Url'] = $this->_fileName;
|
||||
}
|
||||
|
||||
|
|
|
@ -80,7 +80,7 @@ class Mailer {
|
|||
*/
|
||||
public function attachFile($path, $mime, $name = '', $embed = '') {
|
||||
if(!$name) {
|
||||
$name = utf8_basename($path);
|
||||
$name = \dokuwiki\Utf8\PhpString::basename($path);
|
||||
}
|
||||
|
||||
$this->attach[] = array(
|
||||
|
@ -387,7 +387,7 @@ class Mailer {
|
|||
}
|
||||
|
||||
// FIXME: is there a way to encode the localpart of a emailaddress?
|
||||
if(!utf8_isASCII($addr)) {
|
||||
if(!\dokuwiki\Utf8\Clean::isASCII($addr)) {
|
||||
msg(hsc("E-Mail address <$addr> is not ASCII"), -1);
|
||||
continue;
|
||||
}
|
||||
|
@ -403,11 +403,11 @@ class Mailer {
|
|||
$addr = "<$addr>";
|
||||
|
||||
if(defined('MAILHEADER_ASCIIONLY')) {
|
||||
$text = utf8_deaccent($text);
|
||||
$text = utf8_strip($text);
|
||||
$text = \dokuwiki\Utf8\Clean::deaccent($text);
|
||||
$text = \dokuwiki\Utf8\Clean::strip($text);
|
||||
}
|
||||
|
||||
if(strpos($text, ',') !== false || !utf8_isASCII($text)) {
|
||||
if(strpos($text, ',') !== false || !\dokuwiki\Utf8\Clean::isASCII($text)) {
|
||||
$text = '=?UTF-8?B?'.base64_encode($text).'?=';
|
||||
}
|
||||
} else {
|
||||
|
@ -553,10 +553,10 @@ class Mailer {
|
|||
if(isset($this->headers['Subject'])) {
|
||||
// add prefix to subject
|
||||
if(empty($conf['mailprefix'])) {
|
||||
if(utf8_strlen($conf['title']) < 20) {
|
||||
if(\dokuwiki\Utf8\PhpString::strlen($conf['title']) < 20) {
|
||||
$prefix = '['.$conf['title'].']';
|
||||
} else {
|
||||
$prefix = '['.utf8_substr($conf['title'], 0, 20).'...]';
|
||||
$prefix = '['.\dokuwiki\Utf8\PhpString::substr($conf['title'], 0, 20).'...]';
|
||||
}
|
||||
} else {
|
||||
$prefix = '['.$conf['mailprefix'].']';
|
||||
|
@ -568,10 +568,10 @@ class Mailer {
|
|||
|
||||
// encode subject
|
||||
if(defined('MAILHEADER_ASCIIONLY')) {
|
||||
$this->headers['Subject'] = utf8_deaccent($this->headers['Subject']);
|
||||
$this->headers['Subject'] = utf8_strip($this->headers['Subject']);
|
||||
$this->headers['Subject'] = \dokuwiki\Utf8\Clean::deaccent($this->headers['Subject']);
|
||||
$this->headers['Subject'] = \dokuwiki\Utf8\Clean::strip($this->headers['Subject']);
|
||||
}
|
||||
if(!utf8_isASCII($this->headers['Subject'])) {
|
||||
if(!\dokuwiki\Utf8\Clean::isASCII($this->headers['Subject'])) {
|
||||
$this->headers['Subject'] = '=?UTF-8?B?'.base64_encode($this->headers['Subject']).'?=';
|
||||
}
|
||||
}
|
||||
|
|
|
@ -45,7 +45,7 @@ class SafeFN {
|
|||
* @author Christopher Smith <chris@jalakai.co.uk>
|
||||
*/
|
||||
public static function encode($filename) {
|
||||
return self::unicodeToSafe(utf8_to_unicode($filename));
|
||||
return self::unicodeToSafe(\dokuwiki\Utf8\Unicode::fromUtf8($filename));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -74,7 +74,7 @@ class SafeFN {
|
|||
* @author Christopher Smith <chris@jalakai.co.uk>
|
||||
*/
|
||||
public static function decode($filename) {
|
||||
return unicode_to_utf8(self::safeToUnicode(strtolower($filename)));
|
||||
return \dokuwiki\Utf8\Unicode::toUtf8(self::safeToUnicode(strtolower($filename)));
|
||||
}
|
||||
|
||||
public static function validatePrintableUtf8($printable_utf8) {
|
||||
|
|
|
@ -500,7 +500,7 @@ class Search extends Ui
|
|||
public function createPagenameFromQuery($parsedQuery)
|
||||
{
|
||||
$cleanedQuery = cleanID($parsedQuery['query']); // already strtolowered
|
||||
if ($cleanedQuery === utf8_strtolower($parsedQuery['query'])) {
|
||||
if ($cleanedQuery === \dokuwiki\Utf8\PhpString::strtolower($parsedQuery['query'])) {
|
||||
return ':' . $cleanedQuery;
|
||||
}
|
||||
$pagename = '';
|
||||
|
|
|
@ -0,0 +1,204 @@
|
|||
<?php
|
||||
|
||||
namespace dokuwiki\Utf8;
|
||||
|
||||
/**
|
||||
* Methods to assess and clean UTF-8 strings
|
||||
*/
|
||||
class Clean
|
||||
{
|
||||
/**
|
||||
* Checks if a string contains 7bit ASCII only
|
||||
*
|
||||
* @author Andreas Haerter <andreas.haerter@dev.mail-node.com>
|
||||
*
|
||||
* @param string $str
|
||||
* @return bool
|
||||
*/
|
||||
public static function isASCII($str)
|
||||
{
|
||||
return (preg_match('/(?:[^\x00-\x7F])/', $str) !== 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tries to detect if a string is in Unicode encoding
|
||||
*
|
||||
* @author <bmorel@ssi.fr>
|
||||
* @link http://php.net/manual/en/function.utf8-encode.php
|
||||
*
|
||||
* @param string $str
|
||||
* @return bool
|
||||
*/
|
||||
public static function isUtf8($str)
|
||||
{
|
||||
$len = strlen($str);
|
||||
for ($i = 0; $i < $len; $i++) {
|
||||
$b = ord($str[$i]);
|
||||
if ($b < 0x80) continue; # 0bbbbbbb
|
||||
elseif (($b & 0xE0) === 0xC0) $n = 1; # 110bbbbb
|
||||
elseif (($b & 0xF0) === 0xE0) $n = 2; # 1110bbbb
|
||||
elseif (($b & 0xF8) === 0xF0) $n = 3; # 11110bbb
|
||||
elseif (($b & 0xFC) === 0xF8) $n = 4; # 111110bb
|
||||
elseif (($b & 0xFE) === 0xFC) $n = 5; # 1111110b
|
||||
else return false; # Does not match any model
|
||||
|
||||
for ($j = 0; $j < $n; $j++) { # n bytes matching 10bbbbbb follow ?
|
||||
if ((++$i === $len) || ((ord($str[$i]) & 0xC0) !== 0x80))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Strips all high byte chars
|
||||
*
|
||||
* Returns a pure ASCII7 string
|
||||
*
|
||||
* @author Andreas Gohr <andi@splitbrain.org>
|
||||
*
|
||||
* @param string $str
|
||||
* @return string
|
||||
*/
|
||||
public static function strip($str)
|
||||
{
|
||||
$ascii = '';
|
||||
$len = strlen($str);
|
||||
for ($i = 0; $i < $len; $i++) {
|
||||
if (ord($str{$i}) < 128) {
|
||||
$ascii .= $str{$i};
|
||||
}
|
||||
}
|
||||
return $ascii;
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes special characters (nonalphanumeric) from a UTF-8 string
|
||||
*
|
||||
* This function adds the controlchars 0x00 to 0x19 to the array of
|
||||
* stripped chars (they are not included in $UTF8_SPECIAL_CHARS)
|
||||
*
|
||||
* @author Andreas Gohr <andi@splitbrain.org>
|
||||
*
|
||||
* @param string $string The UTF8 string to strip of special chars
|
||||
* @param string $repl Replace special with this string
|
||||
* @param string $additional Additional chars to strip (used in regexp char class)
|
||||
* @return string
|
||||
*/
|
||||
public static function stripspecials($string, $repl = '', $additional = '')
|
||||
{
|
||||
static $specials = null;
|
||||
if ($specials === null) {
|
||||
$specials = preg_quote(Table::specialChars(), '/');
|
||||
}
|
||||
|
||||
return preg_replace('/[' . $additional . '\x00-\x19' . $specials . ']/u', $repl, $string);
|
||||
}
|
||||
|
||||
/**
|
||||
* Replace bad bytes with an alternative character
|
||||
*
|
||||
* ASCII character is recommended for replacement char
|
||||
*
|
||||
* PCRE Pattern to locate bad bytes in a UTF-8 string
|
||||
* Comes from W3 FAQ: Multilingual Forms
|
||||
* Note: modified to include full ASCII range including control chars
|
||||
*
|
||||
* @author Harry Fuecks <hfuecks@gmail.com>
|
||||
* @see http://www.w3.org/International/questions/qa-forms-utf-8
|
||||
*
|
||||
* @param string $str to search
|
||||
* @param string $replace to replace bad bytes with (defaults to '?') - use ASCII
|
||||
* @return string
|
||||
*/
|
||||
public static function replaceBadBytes($str, $replace = '')
|
||||
{
|
||||
$UTF8_BAD =
|
||||
'([\x00-\x7F]' . # ASCII (including control chars)
|
||||
'|[\xC2-\xDF][\x80-\xBF]' . # non-overlong 2-byte
|
||||
'|\xE0[\xA0-\xBF][\x80-\xBF]' . # excluding overlongs
|
||||
'|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}' . # straight 3-byte
|
||||
'|\xED[\x80-\x9F][\x80-\xBF]' . # excluding surrogates
|
||||
'|\xF0[\x90-\xBF][\x80-\xBF]{2}' . # planes 1-3
|
||||
'|[\xF1-\xF3][\x80-\xBF]{3}' . # planes 4-15
|
||||
'|\xF4[\x80-\x8F][\x80-\xBF]{2}' . # plane 16
|
||||
'|(.{1}))'; # invalid byte
|
||||
ob_start();
|
||||
while (preg_match('/' . $UTF8_BAD . '/S', $str, $matches)) {
|
||||
if (!isset($matches[2])) {
|
||||
echo $matches[0];
|
||||
} else {
|
||||
echo $replace;
|
||||
}
|
||||
$str = substr($str, strlen($matches[0]));
|
||||
}
|
||||
return ob_get_clean();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Replace accented UTF-8 characters by unaccented ASCII-7 equivalents
|
||||
*
|
||||
* Use the optional parameter to just deaccent lower ($case = -1) or upper ($case = 1)
|
||||
* letters. Default is to deaccent both cases ($case = 0)
|
||||
*
|
||||
* @author Andreas Gohr <andi@splitbrain.org>
|
||||
*
|
||||
* @param string $string
|
||||
* @param int $case
|
||||
* @return string
|
||||
*/
|
||||
public static function deaccent($string, $case = 0)
|
||||
{
|
||||
if ($case <= 0) {
|
||||
$string = strtr($string, Table::lowerAccents());
|
||||
}
|
||||
if ($case >= 0) {
|
||||
$string = strtr($string, Table::upperAccents());
|
||||
}
|
||||
return $string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Romanize a non-latin string
|
||||
*
|
||||
* @author Andreas Gohr <andi@splitbrain.org>
|
||||
*
|
||||
* @param string $string
|
||||
* @return string
|
||||
*/
|
||||
public static function romanize($string)
|
||||
{
|
||||
if (self::isASCII($string)) return $string; //nothing to do
|
||||
|
||||
return strtr($string, Table::romanization());
|
||||
}
|
||||
|
||||
/**
|
||||
* adjust a byte index into a utf8 string to a utf8 character boundary
|
||||
*
|
||||
* @author chris smith <chris@jalakai.co.uk>
|
||||
*
|
||||
* @param string $str utf8 character string
|
||||
* @param int $i byte index into $str
|
||||
* @param bool $next direction to search for boundary, false = up (current character) true = down (next character)
|
||||
* @return int byte index into $str now pointing to a utf8 character boundary
|
||||
*/
|
||||
public static function correctIdx($str, $i, $next = false)
|
||||
{
|
||||
|
||||
if ($i <= 0) return 0;
|
||||
|
||||
$limit = strlen($str);
|
||||
if ($i >= $limit) return $limit;
|
||||
|
||||
if ($next) {
|
||||
while (($i < $limit) && ((ord($str[$i]) & 0xC0) === 0x80)) $i++;
|
||||
} else {
|
||||
while ($i && ((ord($str[$i]) & 0xC0) === 0x80)) $i--;
|
||||
}
|
||||
|
||||
return $i;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,161 @@
|
|||
<?php
|
||||
|
||||
namespace dokuwiki\Utf8;
|
||||
|
||||
/**
|
||||
* Methods to convert from and to UTF-8 strings
|
||||
*/
|
||||
class Conversion
|
||||
{
|
||||
|
||||
/**
|
||||
* Encodes UTF-8 characters to HTML entities
|
||||
*
|
||||
* @author Tom N Harris <tnharris@whoopdedo.org>
|
||||
* @author <vpribish at shopping dot com>
|
||||
* @link http://php.net/manual/en/function.utf8-decode.php
|
||||
*
|
||||
* @param string $str
|
||||
* @return string
|
||||
*/
|
||||
public static function toHtml($str)
|
||||
{
|
||||
$ret = '';
|
||||
foreach (Unicode::fromUtf8($str) as $cp) {
|
||||
if ($cp < 0x80) {
|
||||
$ret .= chr($cp);
|
||||
} elseif ($cp < 0x100) {
|
||||
$ret .= "&#$cp;";
|
||||
} else {
|
||||
$ret .= '&#x' . dechex($cp) . ';';
|
||||
}
|
||||
}
|
||||
return $ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes HTML entities to UTF-8 characters
|
||||
*
|
||||
* Convert any &#..; entity to a codepoint,
|
||||
* The entities flag defaults to only decoding numeric entities.
|
||||
* Pass HTML_ENTITIES and named entities, including & < etc.
|
||||
* are handled as well. Avoids the problem that would occur if you
|
||||
* had to decode "&#38;&amp;#38;"
|
||||
*
|
||||
* unhtmlspecialchars(\dokuwiki\Utf8\Conversion::fromHtml($s)) -> "&&"
|
||||
* \dokuwiki\Utf8\Conversion::fromHtml(unhtmlspecialchars($s)) -> "&&#38;"
|
||||
* what it should be -> "&&#38;"
|
||||
*
|
||||
* @author Tom N Harris <tnharris@whoopdedo.org>
|
||||
*
|
||||
* @param string $str UTF-8 encoded string
|
||||
* @param boolean $entities decode name entities in addtition to numeric ones
|
||||
* @return string UTF-8 encoded string with numeric (and named) entities replaced.
|
||||
*/
|
||||
public static function fromHtml($str, $entities = false)
|
||||
{
|
||||
if (!$entities) {
|
||||
return preg_replace_callback(
|
||||
'/(&#([Xx])?([0-9A-Za-z]+);)/m',
|
||||
[__CLASS__, 'decodeNumericEntity'],
|
||||
$str
|
||||
);
|
||||
}
|
||||
|
||||
return preg_replace_callback(
|
||||
'/&(#)?([Xx])?([0-9A-Za-z]+);/m',
|
||||
[__CLASS__, 'decodeAnyEntity'],
|
||||
$str
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes any HTML entity to it's correct UTF-8 char equivalent
|
||||
*
|
||||
* @param string $ent An entity
|
||||
* @return string
|
||||
*/
|
||||
protected static function decodeAnyEntity($ent)
|
||||
{
|
||||
// create the named entity lookup table
|
||||
static $table = null;
|
||||
if ($table === null) {
|
||||
$table = get_html_translation_table(HTML_ENTITIES);
|
||||
$table = array_flip($table);
|
||||
$table = array_map(
|
||||
static function ($c) {
|
||||
return Unicode::toUtf8(array(ord($c)));
|
||||
},
|
||||
$table
|
||||
);
|
||||
}
|
||||
|
||||
if ($ent[1] === '#') {
|
||||
return self::decodeNumericEntity($ent);
|
||||
}
|
||||
|
||||
if (array_key_exists($ent[0], $table)) {
|
||||
return $table[$ent[0]];
|
||||
}
|
||||
|
||||
return $ent[0];
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes numeric HTML entities to their correct UTF-8 characters
|
||||
*
|
||||
* @param $ent string A numeric entity
|
||||
* @return string|false
|
||||
*/
|
||||
protected static function decodeNumericEntity($ent)
|
||||
{
|
||||
switch ($ent[2]) {
|
||||
case 'X':
|
||||
case 'x':
|
||||
$cp = hexdec($ent[3]);
|
||||
break;
|
||||
default:
|
||||
$cp = intval($ent[3]);
|
||||
break;
|
||||
}
|
||||
return Unicode::toUtf8(array($cp));
|
||||
}
|
||||
|
||||
/**
|
||||
* UTF-8 to UTF-16BE conversion.
|
||||
*
|
||||
* Maybe really UCS-2 without mb_string due to utf8_to_unicode limits
|
||||
*
|
||||
* @param string $str
|
||||
* @param bool $bom
|
||||
* @return string
|
||||
*/
|
||||
public static function toUtf16be($str, $bom = false)
|
||||
{
|
||||
$out = $bom ? "\xFE\xFF" : '';
|
||||
if (UTF8_MBSTRING) {
|
||||
return $out . mb_convert_encoding($str, 'UTF-16BE', 'UTF-8');
|
||||
}
|
||||
|
||||
$uni = Unicode::fromUtf8($str);
|
||||
foreach ($uni as $cp) {
|
||||
$out .= pack('n', $cp);
|
||||
}
|
||||
return $out;
|
||||
}
|
||||
|
||||
/**
|
||||
* UTF-8 to UTF-16BE conversion.
|
||||
*
|
||||
* Maybe really UCS-2 without mb_string due to utf8_to_unicode limits
|
||||
*
|
||||
* @param string $str
|
||||
* @return false|string
|
||||
*/
|
||||
public static function fromUtf16be($str)
|
||||
{
|
||||
$uni = unpack('n*', $str);
|
||||
return Unicode::toUtf8($uni);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,381 @@
|
|||
<?php
|
||||
|
||||
namespace dokuwiki\Utf8;
|
||||
|
||||
/**
|
||||
* UTF-8 aware equivalents to PHP's string functions
|
||||
*/
|
||||
class PhpString
|
||||
{
|
||||
|
||||
/**
|
||||
* A locale independent basename() implementation
|
||||
*
|
||||
* works around a bug in PHP's basename() implementation
|
||||
*
|
||||
* @param string $path A path
|
||||
* @param string $suffix If the name component ends in suffix this will also be cut off
|
||||
* @return string
|
||||
* @link https://bugs.php.net/bug.php?id=37738
|
||||
*
|
||||
* @see basename()
|
||||
*/
|
||||
public static function basename($path, $suffix = '')
|
||||
{
|
||||
$path = trim($path, '\\/');
|
||||
$rpos = max(strrpos($path, '/'), strrpos($path, '\\'));
|
||||
if ($rpos) {
|
||||
$path = substr($path, $rpos + 1);
|
||||
}
|
||||
|
||||
$suflen = strlen($suffix);
|
||||
if ($suflen && (substr($path, -$suflen) === $suffix)) {
|
||||
$path = substr($path, 0, -$suflen);
|
||||
}
|
||||
|
||||
return $path;
|
||||
}
|
||||
|
||||
/**
|
||||
* Unicode aware replacement for strlen()
|
||||
*
|
||||
* utf8_decode() converts characters that are not in ISO-8859-1
|
||||
* to '?', which, for the purpose of counting, is alright - It's
|
||||
* even faster than mb_strlen.
|
||||
*
|
||||
* @param string $string
|
||||
* @return int
|
||||
* @see utf8_decode()
|
||||
*
|
||||
* @author <chernyshevsky at hotmail dot com>
|
||||
* @see strlen()
|
||||
*/
|
||||
public static function strlen($string)
|
||||
{
|
||||
if (function_exists('utf8_decode')) {
|
||||
return strlen(utf8_decode($string));
|
||||
}
|
||||
|
||||
if (UTF8_MBSTRING) {
|
||||
return mb_strlen($string, 'UTF-8');
|
||||
}
|
||||
|
||||
if (function_exists('iconv_strlen')) {
|
||||
return iconv_strlen($string, 'UTF-8');
|
||||
}
|
||||
|
||||
return strlen($string);
|
||||
}
|
||||
|
||||
/**
|
||||
* UTF-8 aware alternative to substr
|
||||
*
|
||||
* Return part of a string given character offset (and optionally length)
|
||||
*
|
||||
* @param string $str
|
||||
* @param int $offset number of UTF-8 characters offset (from left)
|
||||
* @param int $length (optional) length in UTF-8 characters from offset
|
||||
* @return string
|
||||
* @author Harry Fuecks <hfuecks@gmail.com>
|
||||
* @author Chris Smith <chris@jalakai.co.uk>
|
||||
*
|
||||
*/
|
||||
public static function substr($str, $offset, $length = null)
|
||||
{
|
||||
if (UTF8_MBSTRING) {
|
||||
if ($length === null) {
|
||||
return mb_substr($str, $offset);
|
||||
}
|
||||
|
||||
return mb_substr($str, $offset, $length);
|
||||
}
|
||||
|
||||
/*
|
||||
* Notes:
|
||||
*
|
||||
* no mb string support, so we'll use pcre regex's with 'u' flag
|
||||
* pcre only supports repetitions of less than 65536, in order to accept up to MAXINT values for
|
||||
* offset and length, we'll repeat a group of 65535 characters when needed (ok, up to MAXINT-65536)
|
||||
*
|
||||
* substr documentation states false can be returned in some cases (e.g. offset > string length)
|
||||
* mb_substr never returns false, it will return an empty string instead.
|
||||
*
|
||||
* calculating the number of characters in the string is a relatively expensive operation, so
|
||||
* we only carry it out when necessary. It isn't necessary for +ve offsets and no specified length
|
||||
*/
|
||||
|
||||
// cast parameters to appropriate types to avoid multiple notices/warnings
|
||||
$str = (string)$str; // generates E_NOTICE for PHP4 objects, but not PHP5 objects
|
||||
$offset = (int)$offset;
|
||||
if ($length !== null) $length = (int)$length;
|
||||
|
||||
// handle trivial cases
|
||||
if ($length === 0) return '';
|
||||
if ($offset < 0 && $length < 0 && $length < $offset) return '';
|
||||
|
||||
$offset_pattern = '';
|
||||
$length_pattern = '';
|
||||
|
||||
// normalise -ve offsets (we could use a tail anchored pattern, but they are horribly slow!)
|
||||
if ($offset < 0) {
|
||||
$strlen = self::strlen($str); // see notes
|
||||
$offset = $strlen + $offset;
|
||||
if ($offset < 0) $offset = 0;
|
||||
}
|
||||
|
||||
// establish a pattern for offset, a non-captured group equal in length to offset
|
||||
if ($offset > 0) {
|
||||
$Ox = (int)($offset / 65535);
|
||||
$Oy = $offset % 65535;
|
||||
|
||||
if ($Ox) $offset_pattern = '(?:.{65535}){' . $Ox . '}';
|
||||
$offset_pattern = '^(?:' . $offset_pattern . '.{' . $Oy . '})';
|
||||
} else {
|
||||
$offset_pattern = '^'; // offset == 0; just anchor the pattern
|
||||
}
|
||||
|
||||
// establish a pattern for length
|
||||
if ($length === null) {
|
||||
$length_pattern = '(.*)$'; // the rest of the string
|
||||
} else {
|
||||
|
||||
if (!isset($strlen)) $strlen = self::strlen($str); // see notes
|
||||
if ($offset > $strlen) return ''; // another trivial case
|
||||
|
||||
if ($length > 0) {
|
||||
|
||||
// reduce any length that would go past the end of the string
|
||||
$length = min($strlen - $offset, $length);
|
||||
|
||||
$Lx = (int)($length / 65535);
|
||||
$Ly = $length % 65535;
|
||||
|
||||
// +ve length requires ... a captured group of length characters
|
||||
if ($Lx) $length_pattern = '(?:.{65535}){' . $Lx . '}';
|
||||
$length_pattern = '(' . $length_pattern . '.{' . $Ly . '})';
|
||||
|
||||
} else if ($length < 0) {
|
||||
|
||||
if ($length < ($offset - $strlen)) return '';
|
||||
|
||||
$Lx = (int)((-$length) / 65535);
|
||||
$Ly = (-$length) % 65535;
|
||||
|
||||
// -ve length requires ... capture everything except a group of -length characters
|
||||
// anchored at the tail-end of the string
|
||||
if ($Lx) $length_pattern = '(?:.{65535}){' . $Lx . '}';
|
||||
$length_pattern = '(.*)(?:' . $length_pattern . '.{' . $Ly . '})$';
|
||||
}
|
||||
}
|
||||
|
||||
if (!preg_match('#' . $offset_pattern . $length_pattern . '#us', $str, $match)) return '';
|
||||
return $match[1];
|
||||
}
|
||||
|
||||
/**
|
||||
* Unicode aware replacement for substr_replace()
|
||||
*
|
||||
* @param string $string input string
|
||||
* @param string $replacement the replacement
|
||||
* @param int $start the replacing will begin at the start'th offset into string.
|
||||
* @param int $length If given and is positive, it represents the length of the portion of string which is
|
||||
* to be replaced. If length is zero then this function will have the effect of inserting
|
||||
* replacement into string at the given start offset.
|
||||
* @return string
|
||||
* @see substr_replace()
|
||||
*
|
||||
* @author Andreas Gohr <andi@splitbrain.org>
|
||||
*/
|
||||
public static function substr_replace($string, $replacement, $start, $length = 0)
|
||||
{
|
||||
$ret = '';
|
||||
if ($start > 0) $ret .= self::substr($string, 0, $start);
|
||||
$ret .= $replacement;
|
||||
$ret .= self::substr($string, $start + $length);
|
||||
return $ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* Unicode aware replacement for ltrim()
|
||||
*
|
||||
* @param string $str
|
||||
* @param string $charlist
|
||||
* @return string
|
||||
* @see ltrim()
|
||||
*
|
||||
* @author Andreas Gohr <andi@splitbrain.org>
|
||||
*/
|
||||
public static function ltrim($str, $charlist = '')
|
||||
{
|
||||
if ($charlist === '') return ltrim($str);
|
||||
|
||||
//quote charlist for use in a characterclass
|
||||
$charlist = preg_replace('!([\\\\\\-\\]\\[/])!', '\\\${1}', $charlist);
|
||||
|
||||
return preg_replace('/^[' . $charlist . ']+/u', '', $str);
|
||||
}
|
||||
|
||||
/**
|
||||
* Unicode aware replacement for rtrim()
|
||||
*
|
||||
* @param string $str
|
||||
* @param string $charlist
|
||||
* @return string
|
||||
* @see rtrim()
|
||||
*
|
||||
* @author Andreas Gohr <andi@splitbrain.org>
|
||||
*/
|
||||
public static function rtrim($str, $charlist = '')
|
||||
{
|
||||
if ($charlist === '') return rtrim($str);
|
||||
|
||||
//quote charlist for use in a characterclass
|
||||
$charlist = preg_replace('!([\\\\\\-\\]\\[/])!', '\\\${1}', $charlist);
|
||||
|
||||
return preg_replace('/[' . $charlist . ']+$/u', '', $str);
|
||||
}
|
||||
|
||||
/**
|
||||
* Unicode aware replacement for trim()
|
||||
*
|
||||
* @param string $str
|
||||
* @param string $charlist
|
||||
* @return string
|
||||
* @see trim()
|
||||
*
|
||||
* @author Andreas Gohr <andi@splitbrain.org>
|
||||
*/
|
||||
public static function trim($str, $charlist = '')
|
||||
{
|
||||
if ($charlist === '') return trim($str);
|
||||
|
||||
return self::ltrim(self::rtrim($str, $charlist), $charlist);
|
||||
}
|
||||
|
||||
/**
|
||||
* This is a unicode aware replacement for strtolower()
|
||||
*
|
||||
* Uses mb_string extension if available
|
||||
*
|
||||
* @param string $string
|
||||
* @return string
|
||||
* @see \dokuwiki\Utf8\PhpString::strtoupper()
|
||||
*
|
||||
* @author Leo Feyer <leo@typolight.org>
|
||||
* @see strtolower()
|
||||
*/
|
||||
public static function strtolower($string)
|
||||
{
|
||||
if (UTF8_MBSTRING) {
|
||||
if (class_exists('Normalizer', $autoload = false)) {
|
||||
return \Normalizer::normalize(mb_strtolower($string, 'utf-8'));
|
||||
}
|
||||
return (mb_strtolower($string, 'utf-8'));
|
||||
}
|
||||
return strtr($string, Table::upperCaseToLowerCase());
|
||||
}
|
||||
|
||||
/**
|
||||
* This is a unicode aware replacement for strtoupper()
|
||||
*
|
||||
* Uses mb_string extension if available
|
||||
*
|
||||
* @param string $string
|
||||
* @return string
|
||||
* @see \dokuwiki\Utf8\PhpString::strtoupper()
|
||||
*
|
||||
* @author Leo Feyer <leo@typolight.org>
|
||||
* @see strtoupper()
|
||||
*/
|
||||
public static function strtoupper($string)
|
||||
{
|
||||
if (UTF8_MBSTRING) return mb_strtoupper($string, 'utf-8');
|
||||
|
||||
return strtr($string, Table::lowerCaseToUpperCase());
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* UTF-8 aware alternative to ucfirst
|
||||
* Make a string's first character uppercase
|
||||
*
|
||||
* @param string $str
|
||||
* @return string with first character as upper case (if applicable)
|
||||
* @author Harry Fuecks
|
||||
*
|
||||
*/
|
||||
public static function ucfirst($str)
|
||||
{
|
||||
switch (self::strlen($str)) {
|
||||
case 0:
|
||||
return '';
|
||||
case 1:
|
||||
return self::strtoupper($str);
|
||||
default:
|
||||
preg_match('/^(.{1})(.*)$/us', $str, $matches);
|
||||
return self::strtoupper($matches[1]) . $matches[2];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* UTF-8 aware alternative to ucwords
|
||||
* Uppercase the first character of each word in a string
|
||||
*
|
||||
* @param string $str
|
||||
* @return string with first char of each word uppercase
|
||||
* @author Harry Fuecks
|
||||
* @see http://php.net/ucwords
|
||||
*
|
||||
*/
|
||||
public static function ucwords($str)
|
||||
{
|
||||
// Note: [\x0c\x09\x0b\x0a\x0d\x20] matches;
|
||||
// form feeds, horizontal tabs, vertical tabs, linefeeds and carriage returns
|
||||
// This corresponds to the definition of a "word" defined at http://php.net/ucwords
|
||||
$pattern = '/(^|([\x0c\x09\x0b\x0a\x0d\x20]+))([^\x0c\x09\x0b\x0a\x0d\x20]{1})[^\x0c\x09\x0b\x0a\x0d\x20]*/u';
|
||||
|
||||
return preg_replace_callback(
|
||||
$pattern,
|
||||
function ($matches) {
|
||||
$leadingws = $matches[2];
|
||||
$ucfirst = self::strtoupper($matches[3]);
|
||||
$ucword = self::substr_replace(ltrim($matches[0]), $ucfirst, 0, 1);
|
||||
return $leadingws . $ucword;
|
||||
},
|
||||
$str
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* This is an Unicode aware replacement for strpos
|
||||
*
|
||||
* @param string $haystack
|
||||
* @param string $needle
|
||||
* @param integer $offset
|
||||
* @return integer
|
||||
* @author Leo Feyer <leo@typolight.org>
|
||||
* @see strpos()
|
||||
*
|
||||
*/
|
||||
public static function strpos($haystack, $needle, $offset = 0)
|
||||
{
|
||||
$comp = 0;
|
||||
$length = null;
|
||||
|
||||
while ($length === null || $length < $offset) {
|
||||
$pos = strpos($haystack, $needle, $offset + $comp);
|
||||
|
||||
if ($pos === false)
|
||||
return false;
|
||||
|
||||
$length = self::strlen(substr($haystack, 0, $pos));
|
||||
|
||||
if ($length < $offset)
|
||||
$comp = $pos - $length;
|
||||
}
|
||||
|
||||
return $length;
|
||||
}
|
||||
|
||||
|
||||
}
|
|
@ -0,0 +1,93 @@
|
|||
<?php
|
||||
|
||||
namespace dokuwiki\Utf8;
|
||||
|
||||
/**
|
||||
* Provides static access to the UTF-8 conversion tables
|
||||
*
|
||||
* Lazy-Loads tables on first access
|
||||
*/
|
||||
class Table
|
||||
{
|
||||
|
||||
/**
|
||||
* Get the upper to lower case conversion table
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public static function upperCaseToLowerCase()
|
||||
{
|
||||
static $table = null;
|
||||
if ($table === null) $table = include __DIR__ . '/tables/case.php';
|
||||
return $table;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the lower to upper case conversion table
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public static function lowerCaseToUpperCase()
|
||||
{
|
||||
static $table = null;
|
||||
if ($table === null) {
|
||||
$uclc = self::upperCaseToLowerCase();
|
||||
$table = array_flip($uclc);
|
||||
}
|
||||
return $table;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the lower case accent table
|
||||
* @return array
|
||||
*/
|
||||
public static function lowerAccents()
|
||||
{
|
||||
static $table = null;
|
||||
if ($table === null) {
|
||||
$table = include __DIR__ . '/tables/loweraccents.php';
|
||||
}
|
||||
return $table;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the lower case accent table
|
||||
* @return array
|
||||
*/
|
||||
public static function upperAccents()
|
||||
{
|
||||
static $table = null;
|
||||
if ($table === null) {
|
||||
$table = include __DIR__ . '/tables/upperaccents.php';
|
||||
}
|
||||
return $table;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the romanization table
|
||||
* @return array
|
||||
*/
|
||||
public static function romanization()
|
||||
{
|
||||
static $table = null;
|
||||
if ($table === null) {
|
||||
$table = include __DIR__ . '/tables/romanization.php';
|
||||
}
|
||||
return $table;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the special chars as a concatenated string
|
||||
* @return string
|
||||
*/
|
||||
public static function specialChars()
|
||||
{
|
||||
static $string = null;
|
||||
if ($string === null) {
|
||||
$table = include __DIR__ . '/tables/specials.php';
|
||||
// FIXME should we cache this to file system?
|
||||
$string = Unicode::toUtf8($table);
|
||||
}
|
||||
return $string;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,277 @@
|
|||
<?php
|
||||
|
||||
namespace dokuwiki\Utf8;
|
||||
|
||||
/**
|
||||
* Convert between UTF-8 and a list of Unicode Code Points
|
||||
*/
|
||||
class Unicode
|
||||
{
|
||||
|
||||
/**
|
||||
* Takes an UTF-8 string and returns an array of ints representing the
|
||||
* Unicode characters. Astral planes are supported ie. the ints in the
|
||||
* output can be > 0xFFFF. Occurrances of the BOM are ignored. Surrogates
|
||||
* are not allowed.
|
||||
*
|
||||
* If $strict is set to true the function returns false if the input
|
||||
* string isn't a valid UTF-8 octet sequence and raises a PHP error at
|
||||
* level E_USER_WARNING
|
||||
*
|
||||
* Note: this function has been modified slightly in this library to
|
||||
* trigger errors on encountering bad bytes
|
||||
*
|
||||
* @author <hsivonen@iki.fi>
|
||||
* @author Harry Fuecks <hfuecks@gmail.com>
|
||||
* @see unicode_to_utf8
|
||||
* @link http://hsivonen.iki.fi/php-utf8/
|
||||
* @link http://sourceforge.net/projects/phputf8/
|
||||
* @todo break into less complex chunks
|
||||
* @todo use exceptions instead of user errors
|
||||
*
|
||||
* @param string $str UTF-8 encoded string
|
||||
* @param boolean $strict Check for invalid sequences?
|
||||
* @return mixed array of unicode code points or false if UTF-8 invalid
|
||||
*/
|
||||
public static function fromUtf8($str, $strict = false)
|
||||
{
|
||||
$mState = 0; // cached expected number of octets after the current octet
|
||||
// until the beginning of the next UTF8 character sequence
|
||||
$mUcs4 = 0; // cached Unicode character
|
||||
$mBytes = 1; // cached expected number of octets in the current sequence
|
||||
|
||||
$out = array();
|
||||
|
||||
$len = strlen($str);
|
||||
|
||||
for ($i = 0; $i < $len; $i++) {
|
||||
|
||||
$in = ord($str{$i});
|
||||
|
||||
if ($mState === 0) {
|
||||
|
||||
// When mState is zero we expect either a US-ASCII character or a
|
||||
// multi-octet sequence.
|
||||
if (0 === (0x80 & $in)) {
|
||||
// US-ASCII, pass straight through.
|
||||
$out[] = $in;
|
||||
$mBytes = 1;
|
||||
|
||||
} else if (0xC0 === (0xE0 & $in)) {
|
||||
// First octet of 2 octet sequence
|
||||
$mUcs4 = $in;
|
||||
$mUcs4 = ($mUcs4 & 0x1F) << 6;
|
||||
$mState = 1;
|
||||
$mBytes = 2;
|
||||
|
||||
} else if (0xE0 === (0xF0 & $in)) {
|
||||
// First octet of 3 octet sequence
|
||||
$mUcs4 = $in;
|
||||
$mUcs4 = ($mUcs4 & 0x0F) << 12;
|
||||
$mState = 2;
|
||||
$mBytes = 3;
|
||||
|
||||
} else if (0xF0 === (0xF8 & $in)) {
|
||||
// First octet of 4 octet sequence
|
||||
$mUcs4 = $in;
|
||||
$mUcs4 = ($mUcs4 & 0x07) << 18;
|
||||
$mState = 3;
|
||||
$mBytes = 4;
|
||||
|
||||
} else if (0xF8 === (0xFC & $in)) {
|
||||
/* First octet of 5 octet sequence.
|
||||
*
|
||||
* This is illegal because the encoded codepoint must be either
|
||||
* (a) not the shortest form or
|
||||
* (b) outside the Unicode range of 0-0x10FFFF.
|
||||
* Rather than trying to resynchronize, we will carry on until the end
|
||||
* of the sequence and let the later error handling code catch it.
|
||||
*/
|
||||
$mUcs4 = $in;
|
||||
$mUcs4 = ($mUcs4 & 0x03) << 24;
|
||||
$mState = 4;
|
||||
$mBytes = 5;
|
||||
|
||||
} else if (0xFC === (0xFE & $in)) {
|
||||
// First octet of 6 octet sequence, see comments for 5 octet sequence.
|
||||
$mUcs4 = $in;
|
||||
$mUcs4 = ($mUcs4 & 1) << 30;
|
||||
$mState = 5;
|
||||
$mBytes = 6;
|
||||
|
||||
} elseif ($strict) {
|
||||
/* Current octet is neither in the US-ASCII range nor a legal first
|
||||
* octet of a multi-octet sequence.
|
||||
*/
|
||||
trigger_error(
|
||||
'utf8_to_unicode: Illegal sequence identifier ' .
|
||||
'in UTF-8 at byte ' . $i,
|
||||
E_USER_WARNING
|
||||
);
|
||||
return false;
|
||||
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
// When mState is non-zero, we expect a continuation of the multi-octet
|
||||
// sequence
|
||||
if (0x80 === (0xC0 & $in)) {
|
||||
|
||||
// Legal continuation.
|
||||
$shift = ($mState - 1) * 6;
|
||||
$tmp = $in;
|
||||
$tmp = ($tmp & 0x0000003F) << $shift;
|
||||
$mUcs4 |= $tmp;
|
||||
|
||||
/**
|
||||
* End of the multi-octet sequence. mUcs4 now contains the final
|
||||
* Unicode codepoint to be output
|
||||
*/
|
||||
if (0 === --$mState) {
|
||||
|
||||
/*
|
||||
* Check for illegal sequences and codepoints.
|
||||
*/
|
||||
// From Unicode 3.1, non-shortest form is illegal
|
||||
if (((2 === $mBytes) && ($mUcs4 < 0x0080)) ||
|
||||
((3 === $mBytes) && ($mUcs4 < 0x0800)) ||
|
||||
((4 === $mBytes) && ($mUcs4 < 0x10000)) ||
|
||||
(4 < $mBytes) ||
|
||||
// From Unicode 3.2, surrogate characters are illegal
|
||||
(($mUcs4 & 0xFFFFF800) === 0xD800) ||
|
||||
// Codepoints outside the Unicode range are illegal
|
||||
($mUcs4 > 0x10FFFF)) {
|
||||
|
||||
if ($strict) {
|
||||
trigger_error(
|
||||
'utf8_to_unicode: Illegal sequence or codepoint ' .
|
||||
'in UTF-8 at byte ' . $i,
|
||||
E_USER_WARNING
|
||||
);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (0xFEFF !== $mUcs4) {
|
||||
// BOM is legal but we don't want to output it
|
||||
$out[] = $mUcs4;
|
||||
}
|
||||
|
||||
//initialize UTF8 cache
|
||||
$mState = 0;
|
||||
$mUcs4 = 0;
|
||||
$mBytes = 1;
|
||||
}
|
||||
|
||||
} elseif ($strict) {
|
||||
/**
|
||||
*((0xC0 & (*in) != 0x80) && (mState != 0))
|
||||
* Incomplete multi-octet sequence.
|
||||
*/
|
||||
trigger_error(
|
||||
'utf8_to_unicode: Incomplete multi-octet ' .
|
||||
' sequence in UTF-8 at byte ' . $i,
|
||||
E_USER_WARNING
|
||||
);
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return $out;
|
||||
}
|
||||
|
||||
/**
|
||||
* Takes an array of ints representing the Unicode characters and returns
|
||||
* a UTF-8 string. Astral planes are supported ie. the ints in the
|
||||
* input can be > 0xFFFF. Occurrances of the BOM are ignored. Surrogates
|
||||
* are not allowed.
|
||||
*
|
||||
* If $strict is set to true the function returns false if the input
|
||||
* array contains ints that represent surrogates or are outside the
|
||||
* Unicode range and raises a PHP error at level E_USER_WARNING
|
||||
*
|
||||
* Note: this function has been modified slightly in this library to use
|
||||
* output buffering to concatenate the UTF-8 string (faster) as well as
|
||||
* reference the array by it's keys
|
||||
*
|
||||
* @param array $arr of unicode code points representing a string
|
||||
* @param boolean $strict Check for invalid sequences?
|
||||
* @return string|false UTF-8 string or false if array contains invalid code points
|
||||
*
|
||||
* @author <hsivonen@iki.fi>
|
||||
* @author Harry Fuecks <hfuecks@gmail.com>
|
||||
* @see utf8_to_unicode
|
||||
* @link http://hsivonen.iki.fi/php-utf8/
|
||||
* @link http://sourceforge.net/projects/phputf8/
|
||||
* @todo use exceptions instead of user errors
|
||||
*/
|
||||
public static function toUtf8($arr, $strict = false)
|
||||
{
|
||||
if (!is_array($arr)) return '';
|
||||
ob_start();
|
||||
|
||||
foreach (array_keys($arr) as $k) {
|
||||
|
||||
if (($arr[$k] >= 0) && ($arr[$k] <= 0x007f)) {
|
||||
# ASCII range (including control chars)
|
||||
|
||||
echo chr($arr[$k]);
|
||||
|
||||
} else if ($arr[$k] <= 0x07ff) {
|
||||
# 2 byte sequence
|
||||
|
||||
echo chr(0xc0 | ($arr[$k] >> 6));
|
||||
echo chr(0x80 | ($arr[$k] & 0x003f));
|
||||
|
||||
} else if ($arr[$k] == 0xFEFF) {
|
||||
# Byte order mark (skip)
|
||||
// nop -- zap the BOM
|
||||
|
||||
} else if ($arr[$k] >= 0xD800 && $arr[$k] <= 0xDFFF) {
|
||||
# Test for illegal surrogates
|
||||
|
||||
// found a surrogate
|
||||
if ($strict) {
|
||||
trigger_error(
|
||||
'unicode_to_utf8: Illegal surrogate ' .
|
||||
'at index: ' . $k . ', value: ' . $arr[$k],
|
||||
E_USER_WARNING
|
||||
);
|
||||
return false;
|
||||
}
|
||||
|
||||
} else if ($arr[$k] <= 0xffff) {
|
||||
# 3 byte sequence
|
||||
|
||||
echo chr(0xe0 | ($arr[$k] >> 12));
|
||||
echo chr(0x80 | (($arr[$k] >> 6) & 0x003f));
|
||||
echo chr(0x80 | ($arr[$k] & 0x003f));
|
||||
|
||||
} else if ($arr[$k] <= 0x10ffff) {
|
||||
# 4 byte sequence
|
||||
|
||||
echo chr(0xf0 | ($arr[$k] >> 18));
|
||||
echo chr(0x80 | (($arr[$k] >> 12) & 0x3f));
|
||||
echo chr(0x80 | (($arr[$k] >> 6) & 0x3f));
|
||||
echo chr(0x80 | ($arr[$k] & 0x3f));
|
||||
|
||||
} elseif ($strict) {
|
||||
|
||||
trigger_error(
|
||||
'unicode_to_utf8: Codepoint out of Unicode range ' .
|
||||
'at index: ' . $k . ', value: ' . $arr[$k],
|
||||
E_USER_WARNING
|
||||
);
|
||||
|
||||
// out of range
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return ob_get_clean();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,567 @@
|
|||
<?php
|
||||
/**
|
||||
* UTF-8 Case lookup table
|
||||
*
|
||||
* This lookuptable defines the lower case letters to their corresponding
|
||||
* upper case letter in UTF-8
|
||||
*
|
||||
* @author Andreas Gohr <andi@splitbrain.org>
|
||||
*/
|
||||
return [
|
||||
'A' => 'a',
|
||||
'A' => 'a',
|
||||
'Á' => 'á',
|
||||
'À' => 'à',
|
||||
'Ă' => 'ă',
|
||||
'Ắ' => 'ắ',
|
||||
'Ẵ' => 'ẵ',
|
||||
'Ẳ' => 'ẳ',
|
||||
'Â' => 'â',
|
||||
'Ấ' => 'ấ',
|
||||
'Ầ' => 'ầ',
|
||||
'Ẫ' => 'ẫ',
|
||||
'Ǎ' => 'ǎ',
|
||||
'Å' => 'å',
|
||||
'Ǻ' => 'ǻ',
|
||||
'Ä' => 'ä',
|
||||
'Ǟ' => 'ǟ',
|
||||
'Ã' => 'ã',
|
||||
'Ǡ' => 'ǡ',
|
||||
'Ą' => 'ą',
|
||||
'Ā' => 'ā',
|
||||
'Ả' => 'ả',
|
||||
'Ȁ' => 'ȁ',
|
||||
'Ȃ' => 'ȃ',
|
||||
'Ặ' => 'ặ',
|
||||
'Ậ' => 'ậ',
|
||||
'Ḁ' => 'ḁ',
|
||||
'Æ' => 'æ',
|
||||
'Ǽ' => 'ǽ',
|
||||
'Ǣ' => 'ǣ',
|
||||
'B' => 'b',
|
||||
'Ḃ' => 'ḃ',
|
||||
'Ḅ' => 'ḅ',
|
||||
'Ḇ' => 'ḇ',
|
||||
'Ɓ' => 'ɓ',
|
||||
'Ƃ' => 'ƃ',
|
||||
'C' => 'c',
|
||||
'Ć' => 'ć',
|
||||
'Ĉ' => 'ĉ',
|
||||
'Č' => 'č',
|
||||
'Ċ' => 'ċ',
|
||||
'Ç' => 'ç',
|
||||
'Ƈ' => 'ƈ',
|
||||
'D' => 'd',
|
||||
'D' => 'd',
|
||||
'Ď' => 'ď',
|
||||
'Ḋ' => 'ḋ',
|
||||
'Ḑ' => 'ḑ',
|
||||
'Ḍ' => 'ḍ',
|
||||
'Ḓ' => 'ḓ',
|
||||
'Ḏ' => 'ḏ',
|
||||
'Ð' => 'ð',
|
||||
'Dz' => 'dz', //FIXME
|
||||
'Dž' => 'dž', //FIXME
|
||||
'Ɗ' => 'ɗ',
|
||||
'Ƌ' => 'ƌ',
|
||||
'E' => 'e',
|
||||
'E' => 'e',
|
||||
'É' => 'é',
|
||||
'È' => 'è',
|
||||
'Ê' => 'ê',
|
||||
'Ế' => 'ế',
|
||||
'Ề' => 'ề',
|
||||
'Ễ' => 'ễ',
|
||||
'Ể' => 'ể',
|
||||
'Ě' => 'ě',
|
||||
'Ẽ' => 'ẽ',
|
||||
'Ė' => 'ė',
|
||||
'Ȩ' => 'ȩ',
|
||||
'Ḝ' => 'ḝ',
|
||||
'Ę' => 'ę',
|
||||
'Ē' => 'ē',
|
||||
'Ḕ' => 'ḕ',
|
||||
'Ẻ' => 'ẻ',
|
||||
'Ȅ' => 'ȅ',
|
||||
'Ȇ' => 'ȇ',
|
||||
'Ẹ' => 'ẹ',
|
||||
'Ệ' => 'ệ',
|
||||
'Ḛ' => 'ḛ',
|
||||
'Ǝ' => 'ǝ',
|
||||
'Ə' => 'ə',
|
||||
'Ɛ' => 'ɛ',
|
||||
'F' => 'f',
|
||||
'F' => 'f',
|
||||
'Ƒ' => 'ƒ',
|
||||
'G' => 'g',
|
||||
'G' => 'g',
|
||||
'Ǵ' => 'ǵ',
|
||||
'Ğ' => 'ğ',
|
||||
'Ĝ' => 'ĝ',
|
||||
'Ġ' => 'ġ',
|
||||
'Ģ' => 'ģ',
|
||||
'Ḡ' => 'ḡ',
|
||||
'Ǥ' => 'ǥ',
|
||||
'Ɣ' => 'ɣ',
|
||||
'Ƣ' => 'ƣ',
|
||||
'H' => 'h',
|
||||
'Ĥ' => 'ĥ',
|
||||
'Ȟ' => 'ȟ',
|
||||
'Ḧ' => 'ḧ',
|
||||
'Ḣ' => 'ḣ',
|
||||
'Ḩ' => 'ḩ',
|
||||
'Ḥ' => 'ḥ',
|
||||
'Ḫ' => 'ḫ',
|
||||
'Ƕ' => 'ƕ',
|
||||
'I' => 'i',
|
||||
'I' => 'i',
|
||||
'Í' => 'í',
|
||||
'Ĭ' => 'ĭ',
|
||||
'Î' => 'î',
|
||||
'Ǐ' => 'ǐ',
|
||||
'Ï' => 'ï',
|
||||
'Ḯ' => 'ḯ',
|
||||
'Ĩ' => 'ĩ',
|
||||
'Ī' => 'ī',
|
||||
'Ỉ' => 'ỉ',
|
||||
'Ȉ' => 'ȉ',
|
||||
'Ȋ' => 'ȋ',
|
||||
'Ị' => 'ị',
|
||||
'Ḭ' => 'ḭ',
|
||||
'Ɨ' => 'ɨ',
|
||||
'Ɩ' => 'ɩ',
|
||||
'J' => 'j',
|
||||
'J' => 'j',
|
||||
'Ĵ' => 'ĵ',
|
||||
'K' => 'k',
|
||||
'Ḱ' => 'ḱ',
|
||||
'Ǩ' => 'ǩ',
|
||||
'Ķ' => 'ķ',
|
||||
'Ḳ' => 'ḳ',
|
||||
'Ḵ' => 'ḵ',
|
||||
'Ƙ' => 'ƙ',
|
||||
'L' => 'l',
|
||||
'Ĺ' => 'ĺ',
|
||||
'Ľ' => 'ľ',
|
||||
'Ļ' => 'ļ',
|
||||
'Ł' => 'ł',
|
||||
'Ḷ' => 'ḷ',
|
||||
'Ḽ' => 'ḽ',
|
||||
'Ḻ' => 'ḻ',
|
||||
'Ŀ' => 'ŀ',
|
||||
'Lj' => 'lj', // FIXME
|
||||
'M' => 'm',
|
||||
'M' => 'm',
|
||||
'Ṁ' => 'ṁ',
|
||||
'Ṃ' => 'ṃ',
|
||||
'N' => 'n',
|
||||
'N' => 'n',
|
||||
'Ń' => 'ń',
|
||||
'Ǹ' => 'ǹ',
|
||||
'Ñ' => 'ñ',
|
||||
'Ṅ' => 'ṅ',
|
||||
'Ņ' => 'ņ',
|
||||
'Ṇ' => 'ṇ',
|
||||
'Ṋ' => 'ṋ',
|
||||
'Ṉ' => 'ṉ',
|
||||
'Ɲ' => 'ɲ',
|
||||
'Ƞ' => 'ƞ',
|
||||
'Ŋ' => 'ŋ',
|
||||
'O' => 'o',
|
||||
'O' => 'o',
|
||||
'Ó' => 'ó',
|
||||
'Ŏ' => 'ŏ',
|
||||
'Ô' => 'ô',
|
||||
'Ố' => 'ố',
|
||||
'Ồ' => 'ồ',
|
||||
'Ỗ' => 'ỗ',
|
||||
'Ổ' => 'ổ',
|
||||
'Ö' => 'ö',
|
||||
'Ȫ' => 'ȫ',
|
||||
'Ő' => 'ő',
|
||||
'Õ' => 'õ',
|
||||
'Ṍ' => 'ṍ',
|
||||
'Ṏ' => 'ṏ',
|
||||
'Ȯ' => 'ȯ',
|
||||
'Ȱ' => 'ȱ',
|
||||
'Ø' => 'ø',
|
||||
'Ǿ' => 'ǿ',
|
||||
'Ǫ' => 'ǫ',
|
||||
'Ǭ' => 'ǭ',
|
||||
'Ṓ' => 'ṓ',
|
||||
'Ṑ' => 'ṑ',
|
||||
'Ỏ' => 'ỏ',
|
||||
'Ȍ' => 'ȍ',
|
||||
'Ȏ' => 'ȏ',
|
||||
'Ơ' => 'ơ',
|
||||
'Ờ' => 'ờ',
|
||||
'Ỡ' => 'ỡ',
|
||||
'Ở' => 'ở',
|
||||
'Ợ' => 'ợ',
|
||||
'Ọ' => 'ọ',
|
||||
'Ộ' => 'ộ',
|
||||
'Ɔ' => 'ɔ',
|
||||
'Ɵ' => 'ɵ',
|
||||
'Ȣ' => 'ȣ',
|
||||
'P' => 'p',
|
||||
'P' => 'p',
|
||||
'Ṕ' => 'ṕ',
|
||||
'Ƥ' => 'ƥ',
|
||||
'Q' => 'q',
|
||||
'Q' => 'q',
|
||||
'R' => 'r',
|
||||
'R' => 'r',
|
||||
'Ŕ' => 'ŕ',
|
||||
'Ṙ' => 'ṙ',
|
||||
'Ŗ' => 'ŗ',
|
||||
'Ȑ' => 'ȑ',
|
||||
'Ȓ' => 'ȓ',
|
||||
'Ṛ' => 'ṛ',
|
||||
'Ṝ' => 'ṝ',
|
||||
'Ʀ' => 'ʀ',
|
||||
'S' => 's',
|
||||
'S' => 's',
|
||||
'Ś' => 'ś',
|
||||
'Ṥ' => 'ṥ',
|
||||
'Ŝ' => 'ŝ',
|
||||
'Ṧ' => 'ṧ',
|
||||
'Ṡ' => 'ṡ',
|
||||
'Ş' => 'ş',
|
||||
'Ṣ' => 'ṣ',
|
||||
'Ṩ' => 'ṩ',
|
||||
'Ș' => 'ș',
|
||||
'T' => 't',
|
||||
'T' => 't',
|
||||
'Ť' => 'ť',
|
||||
'Ṫ' => 'ṫ',
|
||||
'Ţ' => 'ţ',
|
||||
'Ṭ' => 'ṭ',
|
||||
'Ṱ' => 'ṱ',
|
||||
'Ṯ' => 'ṯ',
|
||||
'Ŧ' => 'ŧ',
|
||||
'Ƭ' => 'ƭ',
|
||||
'Ʈ' => 'ʈ',
|
||||
'U' => 'u',
|
||||
'Ú' => 'ú',
|
||||
'Ù' => 'ù',
|
||||
'Ŭ' => 'ŭ',
|
||||
'Û' => 'û',
|
||||
'Ǔ' => 'ǔ',
|
||||
'Ů' => 'ů',
|
||||
'Ǘ' => 'ǘ',
|
||||
'Ǜ' => 'ǜ',
|
||||
'Ǚ' => 'ǚ',
|
||||
'Ǖ' => 'ǖ',
|
||||
'Ű' => 'ű',
|
||||
'Ũ' => 'ũ',
|
||||
'Ų' => 'ų',
|
||||
'Ū' => 'ū',
|
||||
'Ṻ' => 'ṻ',
|
||||
'Ủ' => 'ủ',
|
||||
'Ȕ' => 'ȕ',
|
||||
'Ȗ' => 'ȗ',
|
||||
'Ứ' => 'ứ',
|
||||
'Ừ' => 'ừ',
|
||||
'Ữ' => 'ữ',
|
||||
'Ử' => 'ử',
|
||||
'Ự' => 'ự',
|
||||
'Ụ' => 'ụ',
|
||||
'Ṷ' => 'ṷ',
|
||||
'Ṵ' => 'ṵ',
|
||||
'Ɯ' => 'ɯ',
|
||||
'Ʊ' => 'ʊ',
|
||||
'V' => 'v',
|
||||
'V' => 'v',
|
||||
'Ṿ' => 'ṿ',
|
||||
'Ʋ' => 'ʋ',
|
||||
'W' => 'w',
|
||||
'W' => 'w',
|
||||
'Ẃ' => 'ẃ',
|
||||
'Ẁ' => 'ẁ',
|
||||
'Ẅ' => 'ẅ',
|
||||
'Ẇ' => 'ẇ',
|
||||
'Ẉ' => 'ẉ',
|
||||
'X' => 'x',
|
||||
'X' => 'x',
|
||||
'Ẍ' => 'ẍ',
|
||||
'Y' => 'y',
|
||||
'Y' => 'y',
|
||||
'Ý' => 'ý',
|
||||
'Ỳ' => 'ỳ',
|
||||
'Ŷ' => 'ŷ',
|
||||
'Ÿ' => 'ÿ',
|
||||
'Ẏ' => 'ẏ',
|
||||
'Ȳ' => 'ȳ',
|
||||
'Ỷ' => 'ỷ',
|
||||
'Ỵ' => 'ỵ',
|
||||
'Ƴ' => 'ƴ',
|
||||
'Ȝ' => 'ȝ',
|
||||
'Z' => 'z',
|
||||
'Ź' => 'ź',
|
||||
'Ẑ' => 'ẑ',
|
||||
'Ž' => 'ž',
|
||||
'Ż' => 'ż',
|
||||
'Ẓ' => 'ẓ',
|
||||
'Ƶ' => 'ƶ',
|
||||
'Ȥ' => 'ȥ',
|
||||
'Ʒ' => 'ʒ',
|
||||
'Ǯ' => 'ǯ',
|
||||
'Ƹ' => 'ƹ',
|
||||
'Þ' => 'þ',
|
||||
'Ƨ' => 'ƨ',
|
||||
'Ƽ' => 'ƽ',
|
||||
'Ƅ' => 'ƅ',
|
||||
'Α' => 'α',
|
||||
'Ἀ' => 'ἀ',
|
||||
'Ἄ' => 'ἄ',
|
||||
'Ἂ' => 'ἂ',
|
||||
'ᾊ' => 'ᾂ',
|
||||
'Ἆ' => 'ἆ',
|
||||
'ᾎ' => 'ᾆ',
|
||||
'ᾈ' => 'ᾀ',
|
||||
'Ἁ' => 'ἁ',
|
||||
'ᾍ' => 'ᾅ',
|
||||
'Ἃ' => 'ἃ',
|
||||
'ᾋ' => 'ᾃ',
|
||||
'Ἇ' => 'ἇ',
|
||||
'ᾏ' => 'ᾇ',
|
||||
'ᾉ' => 'ᾁ',
|
||||
'Ὰ' => 'ὰ',
|
||||
'Ᾰ' => 'ᾰ',
|
||||
'Ᾱ' => 'ᾱ',
|
||||
'ᾼ' => 'ᾳ',
|
||||
'Β' => 'β',
|
||||
'Γ' => 'γ',
|
||||
'Ε' => 'ε',
|
||||
'Ἐ' => 'ἐ',
|
||||
'Ἔ' => 'ἔ',
|
||||
'Ἒ' => 'ἒ',
|
||||
'Ἑ' => 'ἑ',
|
||||
'Ἕ' => 'ἕ',
|
||||
'Έ' => 'έ',
|
||||
'Ὲ' => 'ὲ',
|
||||
'Ϝ' => 'ϝ',
|
||||
'Ϛ' => 'ϛ',
|
||||
'Ζ' => 'ζ',
|
||||
'Η' => 'η',
|
||||
'ᾜ' => 'ᾔ',
|
||||
'Ἢ' => 'ἢ',
|
||||
'ᾚ' => 'ᾒ',
|
||||
'Ἦ' => 'ἦ',
|
||||
'ᾞ' => 'ᾖ',
|
||||
'ᾘ' => 'ᾐ',
|
||||
'Ἥ' => 'ἥ',
|
||||
'ᾝ' => 'ᾕ',
|
||||
'Ἣ' => 'ἣ',
|
||||
'ᾛ' => 'ᾓ',
|
||||
'Ἧ' => 'ἧ',
|
||||
'ᾟ' => 'ᾗ',
|
||||
'Ή' => 'ή',
|
||||
'Ὴ' => 'ὴ',
|
||||
'ῌ' => 'ῃ',
|
||||
'Θ' => 'θ',
|
||||
'Ι' => 'ι',
|
||||
'Ἰ' => 'ἰ',
|
||||
'Ἲ' => 'ἲ',
|
||||
'Ἶ' => 'ἶ',
|
||||
'Ἱ' => 'ἱ',
|
||||
'Ἵ' => 'ἵ',
|
||||
'Ἳ' => 'ἳ',
|
||||
'Ἷ' => 'ἷ',
|
||||
'Ὶ' => 'ὶ',
|
||||
'Ῐ' => 'ῐ',
|
||||
'Ϊ' => 'ϊ',
|
||||
'Ῑ' => 'ῑ',
|
||||
'Κ' => 'κ',
|
||||
'Λ' => 'λ',
|
||||
'Ν' => 'ν',
|
||||
'Ξ' => 'ξ',
|
||||
'Ο' => 'ο',
|
||||
'Ὀ' => 'ὀ',
|
||||
'Ὄ' => 'ὄ',
|
||||
'Ὂ' => 'ὂ',
|
||||
'Ὅ' => 'ὅ',
|
||||
'Ὃ' => 'ὃ',
|
||||
'Ό' => 'ό',
|
||||
'Ὸ' => 'ὸ',
|
||||
'Π' => 'π',
|
||||
'Ϟ' => 'ϟ',
|
||||
'Ρ' => 'ρ',
|
||||
'Ῥ' => 'ῥ',
|
||||
'Σ' => 'ς',
|
||||
'Τ' => 'τ',
|
||||
'Υ' => 'υ',
|
||||
'Ὑ' => 'ὑ',
|
||||
'Ὓ' => 'ὓ',
|
||||
'Ὗ' => 'ὗ',
|
||||
'Ύ' => 'ύ',
|
||||
'Ὺ' => 'ὺ',
|
||||
'Ϋ' => 'ϋ',
|
||||
'Ῡ' => 'ῡ',
|
||||
'Χ' => 'χ',
|
||||
'Ψ' => 'ψ',
|
||||
'Ω' => 'ω',
|
||||
'Ὤ' => 'ὤ',
|
||||
'ᾬ' => 'ᾤ',
|
||||
'Ὢ' => 'ὢ',
|
||||
'Ὦ' => 'ὦ',
|
||||
'ᾮ' => 'ᾦ',
|
||||
'Ὡ' => 'ὡ',
|
||||
'Ὥ' => 'ὥ',
|
||||
'ᾭ' => 'ᾥ',
|
||||
'Ὣ' => 'ὣ',
|
||||
'Ὧ' => 'ὧ',
|
||||
'ᾯ' => 'ᾧ',
|
||||
'ᾩ' => 'ᾡ',
|
||||
'Ώ' => 'ώ',
|
||||
'Ὼ' => 'ὼ',
|
||||
'ῼ' => 'ῳ',
|
||||
'Ϣ' => 'ϣ',
|
||||
'Ϥ' => 'ϥ',
|
||||
'Ϧ' => 'ϧ',
|
||||
'Ϩ' => 'ϩ',
|
||||
'Ϫ' => 'ϫ',
|
||||
'Ϭ' => 'ϭ',
|
||||
'А' => 'а',
|
||||
'Ӑ' => 'ӑ',
|
||||
'Ӓ' => 'ӓ',
|
||||
'Ә' => 'ә',
|
||||
'Ӛ' => 'ӛ',
|
||||
'Ӕ' => 'ӕ',
|
||||
'В' => 'в',
|
||||
'Г' => 'г',
|
||||
'Ѓ' => 'ѓ',
|
||||
'Ґ' => 'ґ',
|
||||
'Ғ' => 'ғ',
|
||||
'Ҕ' => 'ҕ',
|
||||
'Ԁ' => 'ԁ',
|
||||
'Ђ' => 'ђ',
|
||||
'Ԃ' => 'ԃ',
|
||||
'Ҙ' => 'ҙ',
|
||||
'Е' => 'е',
|
||||
'Ѐ' => 'ѐ',
|
||||
'Ё' => 'ё',
|
||||
'Є' => 'є',
|
||||
'Ж' => 'ж',
|
||||
'Ӂ' => 'ӂ',
|
||||
'Ӝ' => 'ӝ',
|
||||
'Җ' => 'җ',
|
||||
'Ӟ' => 'ӟ',
|
||||
'Ԅ' => 'ԅ',
|
||||
'Ѕ' => 'ѕ',
|
||||
'Ӡ' => 'ӡ',
|
||||
'Ԇ' => 'ԇ',
|
||||
'И' => 'и',
|
||||
'Ӥ' => 'ӥ',
|
||||
'Ӣ' => 'ӣ',
|
||||
'Ҋ' => 'ҋ',
|
||||
'І' => 'і',
|
||||
'Ї' => 'ї',
|
||||
'Й' => 'й',
|
||||
'К' => 'к',
|
||||
'Ќ' => 'ќ',
|
||||
'Қ' => 'қ',
|
||||
'Ӄ' => 'ӄ',
|
||||
'Ҡ' => 'ҡ',
|
||||
'Ҟ' => 'ҟ',
|
||||
'Л' => 'л',
|
||||
'Ӆ' => 'ӆ',
|
||||
'Љ' => 'љ',
|
||||
'Ԉ' => 'ԉ',
|
||||
'М' => 'м',
|
||||
'Ӎ' => 'ӎ',
|
||||
'Ӊ' => 'ӊ',
|
||||
'Ң' => 'ң',
|
||||
'Ӈ' => 'ӈ',
|
||||
'Ҥ' => 'ҥ',
|
||||
'Њ' => 'њ',
|
||||
'Ԋ' => 'ԋ',
|
||||
'Ӧ' => 'ӧ',
|
||||
'Ө' => 'ө',
|
||||
'Ӫ' => 'ӫ',
|
||||
'П' => 'п',
|
||||
'Ҧ' => 'ҧ',
|
||||
'Ҁ' => 'ҁ',
|
||||
'Ҏ' => 'ҏ',
|
||||
'С' => 'с',
|
||||
'Ԍ' => 'ԍ',
|
||||
'Ҫ' => 'ҫ',
|
||||
'Т' => 'т',
|
||||
'Ԏ' => 'ԏ',
|
||||
'Ћ' => 'ћ',
|
||||
'У' => 'у',
|
||||
'Ў' => 'ў',
|
||||
'Ӱ' => 'ӱ',
|
||||
'Ӳ' => 'ӳ',
|
||||
'Ӯ' => 'ӯ',
|
||||
'Ұ' => 'ұ',
|
||||
'Ѹ' => 'ѹ',
|
||||
'Ф' => 'ф',
|
||||
'Х' => 'х',
|
||||
'Ҳ' => 'ҳ',
|
||||
'Һ' => 'һ',
|
||||
'Ѿ' => 'ѿ',
|
||||
'Ѽ' => 'ѽ',
|
||||
'Ѻ' => 'ѻ',
|
||||
'Ц' => 'ц',
|
||||
'Ҵ' => 'ҵ',
|
||||
'Ч' => 'ч',
|
||||
'Ҷ' => 'ҷ',
|
||||
'Ӌ' => 'ӌ',
|
||||
'Ҹ' => 'ҹ',
|
||||
'Ҽ' => 'ҽ',
|
||||
'Ҿ' => 'ҿ',
|
||||
'Џ' => 'џ',
|
||||
'Щ' => 'щ',
|
||||
'Ъ' => 'ъ',
|
||||
'Ы' => 'ы',
|
||||
'Ӹ' => 'ӹ',
|
||||
'Ь' => 'ь',
|
||||
'Ҍ' => 'ҍ',
|
||||
'Э' => 'э',
|
||||
'Ӭ' => 'ӭ',
|
||||
'Ю' => 'ю',
|
||||
'Я' => 'я',
|
||||
'Ѥ' => 'ѥ',
|
||||
'Ѧ' => 'ѧ',
|
||||
'Ѩ' => 'ѩ',
|
||||
'Ѭ' => 'ѭ',
|
||||
'Ѯ' => 'ѯ',
|
||||
'Ѱ' => 'ѱ',
|
||||
'Ѳ' => 'ѳ',
|
||||
'Ѵ' => 'ѵ',
|
||||
'Ҩ' => 'ҩ',
|
||||
'Ա' => 'ա',
|
||||
'Բ' => 'բ',
|
||||
'Գ' => 'գ',
|
||||
'Դ' => 'դ',
|
||||
'Ե' => 'ե',
|
||||
'Է' => 'է',
|
||||
'Ը' => 'ը',
|
||||
'Թ' => 'թ',
|
||||
'Ժ' => 'ժ',
|
||||
'Ի' => 'ի',
|
||||
'Լ' => 'լ',
|
||||
'Ծ' => 'ծ',
|
||||
'Կ' => 'կ',
|
||||
'Հ' => 'հ',
|
||||
'Ձ' => 'ձ',
|
||||
'Ղ' => 'ղ',
|
||||
'Ճ' => 'ճ',
|
||||
'Յ' => 'յ',
|
||||
'Ն' => 'ն',
|
||||
'Շ' => 'շ',
|
||||
'Ո' => 'ո',
|
||||
'Չ' => 'չ',
|
||||
'Պ' => 'պ',
|
||||
'Ռ' => 'ռ',
|
||||
'Ս' => 'ս',
|
||||
'Վ' => 'վ',
|
||||
'Տ' => 'տ',
|
||||
'Ր' => 'ր',
|
||||
'Ց' => 'ց',
|
||||
'Փ' => 'փ',
|
||||
'Ք' => 'ք',
|
||||
'Օ' => 'օ',
|
||||
'Ֆ' => 'ֆ',
|
||||
];
|
|
@ -0,0 +1,116 @@
|
|||
<?php
|
||||
/**
|
||||
* UTF-8 lookup table for lower case accented letters
|
||||
*
|
||||
* This lookuptable defines replacements for accented characters from the ASCII-7
|
||||
* range. This are lower case letters only.
|
||||
*
|
||||
* @author Andreas Gohr <andi@splitbrain.org>
|
||||
* @see \dokuwiki\Utf8\Clean::deaccent()
|
||||
*/
|
||||
return [
|
||||
'á' => 'a',
|
||||
'à' => 'a',
|
||||
'ă' => 'a',
|
||||
'â' => 'a',
|
||||
'å' => 'a',
|
||||
'ä' => 'ae',
|
||||
'ã' => 'a',
|
||||
'ą' => 'a',
|
||||
'ā' => 'a',
|
||||
'æ' => 'ae',
|
||||
'ḃ' => 'b',
|
||||
'ć' => 'c',
|
||||
'ĉ' => 'c',
|
||||
'č' => 'c',
|
||||
'ċ' => 'c',
|
||||
'ç' => 'c',
|
||||
'ď' => 'd',
|
||||
'ḋ' => 'd',
|
||||
'đ' => 'd',
|
||||
'ð' => 'dh',
|
||||
'é' => 'e',
|
||||
'è' => 'e',
|
||||
'ĕ' => 'e',
|
||||
'ê' => 'e',
|
||||
'ě' => 'e',
|
||||
'ë' => 'e',
|
||||
'ė' => 'e',
|
||||
'ę' => 'e',
|
||||
'ē' => 'e',
|
||||
'ḟ' => 'f',
|
||||
'ƒ' => 'f',
|
||||
'ğ' => 'g',
|
||||
'ĝ' => 'g',
|
||||
'ġ' => 'g',
|
||||
'ģ' => 'g',
|
||||
'ĥ' => 'h',
|
||||
'ħ' => 'h',
|
||||
'í' => 'i',
|
||||
'ì' => 'i',
|
||||
'î' => 'i',
|
||||
'ï' => 'i',
|
||||
'ĩ' => 'i',
|
||||
'į' => 'i',
|
||||
'ī' => 'i',
|
||||
'ĵ' => 'j',
|
||||
'ķ' => 'k',
|
||||
'ĺ' => 'l',
|
||||
'ľ' => 'l',
|
||||
'ļ' => 'l',
|
||||
'ł' => 'l',
|
||||
'ṁ' => 'm',
|
||||
'ń' => 'n',
|
||||
'ň' => 'n',
|
||||
'ñ' => 'n',
|
||||
'ņ' => 'n',
|
||||
'ó' => 'o',
|
||||
'ò' => 'o',
|
||||
'ô' => 'o',
|
||||
'ö' => 'oe',
|
||||
'ő' => 'o',
|
||||
'õ' => 'o',
|
||||
'ø' => 'o',
|
||||
'ō' => 'o',
|
||||
'ơ' => 'o',
|
||||
'ṗ' => 'p',
|
||||
'ŕ' => 'r',
|
||||
'ř' => 'r',
|
||||
'ŗ' => 'r',
|
||||
'ś' => 's',
|
||||
'ŝ' => 's',
|
||||
'š' => 's',
|
||||
'ṡ' => 's',
|
||||
'ş' => 's',
|
||||
'ș' => 's',
|
||||
'ß' => 'ss',
|
||||
'ť' => 't',
|
||||
'ṫ' => 't',
|
||||
'ţ' => 't',
|
||||
'ț' => 't',
|
||||
'ŧ' => 't',
|
||||
'ú' => 'u',
|
||||
'ù' => 'u',
|
||||
'ŭ' => 'u',
|
||||
'û' => 'u',
|
||||
'ů' => 'u',
|
||||
'ü' => 'ue',
|
||||
'ű' => 'u',
|
||||
'ũ' => 'u',
|
||||
'ų' => 'u',
|
||||
'ū' => 'u',
|
||||
'ư' => 'u',
|
||||
'ẃ' => 'w',
|
||||
'ẁ' => 'w',
|
||||
'ŵ' => 'w',
|
||||
'ẅ' => 'w',
|
||||
'ý' => 'y',
|
||||
'ỳ' => 'y',
|
||||
'ŷ' => 'y',
|
||||
'ÿ' => 'y',
|
||||
'ź' => 'z',
|
||||
'ž' => 'z',
|
||||
'ż' => 'z',
|
||||
'þ' => 'th',
|
||||
'µ' => 'u',
|
||||
];
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,620 @@
|
|||
<?php
|
||||
/**
|
||||
* UTF-8 array of common special characters
|
||||
*
|
||||
* This array should contain all special characters (not a letter or digit)
|
||||
* defined in the various local charsets - it's not a complete list of non-alphanum
|
||||
* characters in UTF-8. It's not perfect but should match most cases of special
|
||||
* chars.
|
||||
*
|
||||
* The controlchars 0x00 to 0x19 are _not_ included in this array. The space 0x20 is!
|
||||
* These chars are _not_ in the array either: _ (0x5f), : 0x3a, . 0x2e, - 0x2d, * 0x2a
|
||||
*
|
||||
* @author Andreas Gohr <andi@splitbrain.org>
|
||||
* @see \dokuwiki\Utf8\Clean::stripspecials()
|
||||
*/
|
||||
return [
|
||||
0x1a, //
|
||||
0x1b, //
|
||||
0x1c, //
|
||||
0x1d, //
|
||||
0x1e, //
|
||||
0x1f, //
|
||||
0x20, // <space>
|
||||
0x21, // !
|
||||
0x22, // "
|
||||
0x23, // #
|
||||
0x24, // $
|
||||
0x25, // %
|
||||
0x26, // &
|
||||
0x27, // '
|
||||
0x28, // (
|
||||
0x29, // )
|
||||
0x2b, // +
|
||||
0x2c, // ,
|
||||
0x2f, // /
|
||||
0x3b, // ;
|
||||
0x3c, // <
|
||||
0x3d, // =
|
||||
0x3e, // >
|
||||
0x3f, // ?
|
||||
0x40, // @
|
||||
0x5b, // [
|
||||
0x5c, // \
|
||||
0x5d, // ]
|
||||
0x5e, // ^
|
||||
0x60, // `
|
||||
0x7b, // {
|
||||
0x7c, // |
|
||||
0x7d, // }
|
||||
0x7e, // ~
|
||||
0x7f, //
|
||||
0x80, //
|
||||
0x81, //
|
||||
0x82, //
|
||||
0x83, //
|
||||
0x84, //
|
||||
0x85, //
|
||||
0x86, //
|
||||
0x87, //
|
||||
0x88, //
|
||||
0x89, //
|
||||
0x8a, //
|
||||
0x8b, //
|
||||
0x8c, //
|
||||
0x8d, //
|
||||
0x8e, //
|
||||
0x8f, //
|
||||
0x90, //
|
||||
0x91, //
|
||||
0x92, //
|
||||
0x93, //
|
||||
0x94, //
|
||||
0x95, //
|
||||
0x96, //
|
||||
0x97, //
|
||||
0x98, //
|
||||
0x99, //
|
||||
0x9a, //
|
||||
0x9b, //
|
||||
0x9c, //
|
||||
0x9d, //
|
||||
0x9e, //
|
||||
0x9f, //
|
||||
0xa0, //
|
||||
0xa1, // ¡
|
||||
0xa2, // ¢
|
||||
0xa3, // £
|
||||
0xa4, // ¤
|
||||
0xa5, // ¥
|
||||
0xa6, // ¦
|
||||
0xa7, // §
|
||||
0xa8, // ¨
|
||||
0xa9, // ©
|
||||
0xaa, // ª
|
||||
0xab, // «
|
||||
0xac, // ¬
|
||||
0xad, //
|
||||
0xae, // ®
|
||||
0xaf, // ¯
|
||||
0xb0, // °
|
||||
0xb1, // ±
|
||||
0xb2, // ²
|
||||
0xb3, // ³
|
||||
0xb4, // ´
|
||||
0xb5, // µ
|
||||
0xb6, // ¶
|
||||
0xb7, // ·
|
||||
0xb8, // ¸
|
||||
0xb9, // ¹
|
||||
0xba, // º
|
||||
0xbb, // »
|
||||
0xbc, // ¼
|
||||
0xbd, // ½
|
||||
0xbe, // ¾
|
||||
0xbf, // ¿
|
||||
0xd7, // ×
|
||||
0xf7, // ÷
|
||||
0x2c7, // ˇ
|
||||
0x2d8, // ˘
|
||||
0x2d9, // ˙
|
||||
0x2da, // ˚
|
||||
0x2db, // ˛
|
||||
0x2dc, // ˜
|
||||
0x2dd, // ˝
|
||||
0x300, // ̀
|
||||
0x301, // ́
|
||||
0x303, // ̃
|
||||
0x309, // ̉
|
||||
0x323, // ̣
|
||||
0x384, // ΄
|
||||
0x385, // ΅
|
||||
0x387, // ·
|
||||
0x3c6, // φ
|
||||
0x3d1, // ϑ
|
||||
0x3d2, // ϒ
|
||||
0x3d5, // ϕ
|
||||
0x3d6, // ϖ
|
||||
0x5b0, // ְ
|
||||
0x5b1, // ֱ
|
||||
0x5b2, // ֲ
|
||||
0x5b3, // ֳ
|
||||
0x5b4, // ִ
|
||||
0x5b5, // ֵ
|
||||
0x5b6, // ֶ
|
||||
0x5b7, // ַ
|
||||
0x5b8, // ָ
|
||||
0x5b9, // ֹ
|
||||
0x5bb, // ֻ
|
||||
0x5bc, // ּ
|
||||
0x5bd, // ֽ
|
||||
0x5be, // ־
|
||||
0x5bf, // ֿ
|
||||
0x5c0, // ׀
|
||||
0x5c1, // ׁ
|
||||
0x5c2, // ׂ
|
||||
0x5c3, // ׃
|
||||
0x5f3, // ׳
|
||||
0x5f4, // ״
|
||||
0x60c, // ،
|
||||
0x61b, // ؛
|
||||
0x61f, // ؟
|
||||
0x640, // ـ
|
||||
0x64b, // ً
|
||||
0x64c, // ٌ
|
||||
0x64d, // ٍ
|
||||
0x64e, // َ
|
||||
0x64f, // ُ
|
||||
0x650, // ِ
|
||||
0x651, // ّ
|
||||
0x652, // ْ
|
||||
0x66a, // ٪
|
||||
0xe3f, // ฿
|
||||
0x200c, //
|
||||
0x200d, //
|
||||
0x200e, //
|
||||
0x200f, //
|
||||
0x2013, // –
|
||||
0x2014, // —
|
||||
0x2015, // ―
|
||||
0x2017, // ‗
|
||||
0x2018, // ‘
|
||||
0x2019, // ’
|
||||
0x201a, // ‚
|
||||
0x201c, // “
|
||||
0x201d, // ”
|
||||
0x201e, // „
|
||||
0x2020, // †
|
||||
0x2021, // ‡
|
||||
0x2022, // •
|
||||
0x2026, // …
|
||||
0x2030, // ‰
|
||||
0x2032, // ′
|
||||
0x2033, // ″
|
||||
0x2039, // ‹
|
||||
0x203a, // ›
|
||||
0x2044, // ⁄
|
||||
0x20a7, // ₧
|
||||
0x20aa, // ₪
|
||||
0x20ab, // ₫
|
||||
0x20ac, // €
|
||||
0x2116, // №
|
||||
0x2118, // ℘
|
||||
0x2122, // ™
|
||||
0x2126, // Ω
|
||||
0x2135, // ℵ
|
||||
0x2190, // ←
|
||||
0x2191, // ↑
|
||||
0x2192, // →
|
||||
0x2193, // ↓
|
||||
0x2194, // ↔
|
||||
0x2195, // ↕
|
||||
0x21b5, // ↵
|
||||
0x21d0, // ⇐
|
||||
0x21d1, // ⇑
|
||||
0x21d2, // ⇒
|
||||
0x21d3, // ⇓
|
||||
0x21d4, // ⇔
|
||||
0x2200, // ∀
|
||||
0x2202, // ∂
|
||||
0x2203, // ∃
|
||||
0x2205, // ∅
|
||||
0x2206, // ∆
|
||||
0x2207, // ∇
|
||||
0x2208, // ∈
|
||||
0x2209, // ∉
|
||||
0x220b, // ∋
|
||||
0x220f, // ∏
|
||||
0x2211, // ∑
|
||||
0x2212, // −
|
||||
0x2215, // ∕
|
||||
0x2217, // ∗
|
||||
0x2219, // ∙
|
||||
0x221a, // √
|
||||
0x221d, // ∝
|
||||
0x221e, // ∞
|
||||
0x2220, // ∠
|
||||
0x2227, // ∧
|
||||
0x2228, // ∨
|
||||
0x2229, // ∩
|
||||
0x222a, // ∪
|
||||
0x222b, // ∫
|
||||
0x2234, // ∴
|
||||
0x223c, // ∼
|
||||
0x2245, // ≅
|
||||
0x2248, // ≈
|
||||
0x2260, // ≠
|
||||
0x2261, // ≡
|
||||
0x2264, // ≤
|
||||
0x2265, // ≥
|
||||
0x2282, // ⊂
|
||||
0x2283, // ⊃
|
||||
0x2284, // ⊄
|
||||
0x2286, // ⊆
|
||||
0x2287, // ⊇
|
||||
0x2295, // ⊕
|
||||
0x2297, // ⊗
|
||||
0x22a5, // ⊥
|
||||
0x22c5, // ⋅
|
||||
0x2310, // ⌐
|
||||
0x2320, // ⌠
|
||||
0x2321, // ⌡
|
||||
0x2329, // 〈
|
||||
0x232a, // 〉
|
||||
0x2469, // ⑩
|
||||
0x2500, // ─
|
||||
0x2502, // │
|
||||
0x250c, // ┌
|
||||
0x2510, // ┐
|
||||
0x2514, // └
|
||||
0x2518, // ┘
|
||||
0x251c, // ├
|
||||
0x2524, // ┤
|
||||
0x252c, // ┬
|
||||
0x2534, // ┴
|
||||
0x253c, // ┼
|
||||
0x2550, // ═
|
||||
0x2551, // ║
|
||||
0x2552, // ╒
|
||||
0x2553, // ╓
|
||||
0x2554, // ╔
|
||||
0x2555, // ╕
|
||||
0x2556, // ╖
|
||||
0x2557, // ╗
|
||||
0x2558, // ╘
|
||||
0x2559, // ╙
|
||||
0x255a, // ╚
|
||||
0x255b, // ╛
|
||||
0x255c, // ╜
|
||||
0x255d, // ╝
|
||||
0x255e, // ╞
|
||||
0x255f, // ╟
|
||||
0x2560, // ╠
|
||||
0x2561, // ╡
|
||||
0x2562, // ╢
|
||||
0x2563, // ╣
|
||||
0x2564, // ╤
|
||||
0x2565, // ╥
|
||||
0x2566, // ╦
|
||||
0x2567, // ╧
|
||||
0x2568, // ╨
|
||||
0x2569, // ╩
|
||||
0x256a, // ╪
|
||||
0x256b, // ╫
|
||||
0x256c, // ╬
|
||||
0x2580, // ▀
|
||||
0x2584, // ▄
|
||||
0x2588, // █
|
||||
0x258c, // ▌
|
||||
0x2590, // ▐
|
||||
0x2591, // ░
|
||||
0x2592, // ▒
|
||||
0x2593, // ▓
|
||||
0x25a0, // ■
|
||||
0x25b2, // ▲
|
||||
0x25bc, // ▼
|
||||
0x25c6, // ◆
|
||||
0x25ca, // ◊
|
||||
0x25cf, // ●
|
||||
0x25d7, // ◗
|
||||
0x2605, // ★
|
||||
0x260e, // ☎
|
||||
0x261b, // ☛
|
||||
0x261e, // ☞
|
||||
0x2660, // ♠
|
||||
0x2663, // ♣
|
||||
0x2665, // ♥
|
||||
0x2666, // ♦
|
||||
0x2701, // ✁
|
||||
0x2702, // ✂
|
||||
0x2703, // ✃
|
||||
0x2704, // ✄
|
||||
0x2706, // ✆
|
||||
0x2707, // ✇
|
||||
0x2708, // ✈
|
||||
0x2709, // ✉
|
||||
0x270c, // ✌
|
||||
0x270d, // ✍
|
||||
0x270e, // ✎
|
||||
0x270f, // ✏
|
||||
0x2710, // ✐
|
||||
0x2711, // ✑
|
||||
0x2712, // ✒
|
||||
0x2713, // ✓
|
||||
0x2714, // ✔
|
||||
0x2715, // ✕
|
||||
0x2716, // ✖
|
||||
0x2717, // ✗
|
||||
0x2718, // ✘
|
||||
0x2719, // ✙
|
||||
0x271a, // ✚
|
||||
0x271b, // ✛
|
||||
0x271c, // ✜
|
||||
0x271d, // ✝
|
||||
0x271e, // ✞
|
||||
0x271f, // ✟
|
||||
0x2720, // ✠
|
||||
0x2721, // ✡
|
||||
0x2722, // ✢
|
||||
0x2723, // ✣
|
||||
0x2724, // ✤
|
||||
0x2725, // ✥
|
||||
0x2726, // ✦
|
||||
0x2727, // ✧
|
||||
0x2729, // ✩
|
||||
0x272a, // ✪
|
||||
0x272b, // ✫
|
||||
0x272c, // ✬
|
||||
0x272d, // ✭
|
||||
0x272e, // ✮
|
||||
0x272f, // ✯
|
||||
0x2730, // ✰
|
||||
0x2731, // ✱
|
||||
0x2732, // ✲
|
||||
0x2733, // ✳
|
||||
0x2734, // ✴
|
||||
0x2735, // ✵
|
||||
0x2736, // ✶
|
||||
0x2737, // ✷
|
||||
0x2738, // ✸
|
||||
0x2739, // ✹
|
||||
0x273a, // ✺
|
||||
0x273b, // ✻
|
||||
0x273c, // ✼
|
||||
0x273d, // ✽
|
||||
0x273e, // ✾
|
||||
0x273f, // ✿
|
||||
0x2740, // ❀
|
||||
0x2741, // ❁
|
||||
0x2742, // ❂
|
||||
0x2743, // ❃
|
||||
0x2744, // ❄
|
||||
0x2745, // ❅
|
||||
0x2746, // ❆
|
||||
0x2747, // ❇
|
||||
0x2748, // ❈
|
||||
0x2749, // ❉
|
||||
0x274a, // ❊
|
||||
0x274b, // ❋
|
||||
0x274d, // ❍
|
||||
0x274f, // ❏
|
||||
0x2750, // ❐
|
||||
0x2751, // ❑
|
||||
0x2752, // ❒
|
||||
0x2756, // ❖
|
||||
0x2758, // ❘
|
||||
0x2759, // ❙
|
||||
0x275a, // ❚
|
||||
0x275b, // ❛
|
||||
0x275c, // ❜
|
||||
0x275d, // ❝
|
||||
0x275e, // ❞
|
||||
0x2761, // ❡
|
||||
0x2762, // ❢
|
||||
0x2763, // ❣
|
||||
0x2764, // ❤
|
||||
0x2765, // ❥
|
||||
0x2766, // ❦
|
||||
0x2767, // ❧
|
||||
0x277f, // ❿
|
||||
0x2789, // ➉
|
||||
0x2793, // ➓
|
||||
0x2794, // ➔
|
||||
0x2798, // ➘
|
||||
0x2799, // ➙
|
||||
0x279a, // ➚
|
||||
0x279b, // ➛
|
||||
0x279c, // ➜
|
||||
0x279d, // ➝
|
||||
0x279e, // ➞
|
||||
0x279f, // ➟
|
||||
0x27a0, // ➠
|
||||
0x27a1, // ➡
|
||||
0x27a2, // ➢
|
||||
0x27a3, // ➣
|
||||
0x27a4, // ➤
|
||||
0x27a5, // ➥
|
||||
0x27a6, // ➦
|
||||
0x27a7, // ➧
|
||||
0x27a8, // ➨
|
||||
0x27a9, // ➩
|
||||
0x27aa, // ➪
|
||||
0x27ab, // ➫
|
||||
0x27ac, // ➬
|
||||
0x27ad, // ➭
|
||||
0x27ae, // ➮
|
||||
0x27af, // ➯
|
||||
0x27b1, // ➱
|
||||
0x27b2, // ➲
|
||||
0x27b3, // ➳
|
||||
0x27b4, // ➴
|
||||
0x27b5, // ➵
|
||||
0x27b6, // ➶
|
||||
0x27b7, // ➷
|
||||
0x27b8, // ➸
|
||||
0x27b9, // ➹
|
||||
0x27ba, // ➺
|
||||
0x27bb, // ➻
|
||||
0x27bc, // ➼
|
||||
0x27bd, // ➽
|
||||
0x27be, // ➾
|
||||
0x3000, //
|
||||
0x3001, // 、
|
||||
0x3002, // 。
|
||||
0x3003, // 〃
|
||||
0x3008, // 〈
|
||||
0x3009, // 〉
|
||||
0x300a, // 《
|
||||
0x300b, // 》
|
||||
0x300c, // 「
|
||||
0x300d, // 」
|
||||
0x300e, // 『
|
||||
0x300f, // 』
|
||||
0x3010, // 【
|
||||
0x3011, // 】
|
||||
0x3012, // 〒
|
||||
0x3014, // 〔
|
||||
0x3015, // 〕
|
||||
0x3016, // 〖
|
||||
0x3017, // 〗
|
||||
0x3018, // 〘
|
||||
0x3019, // 〙
|
||||
0x301a, // 〚
|
||||
0x301b, // 〛
|
||||
0x3036, // 〶
|
||||
0xf6d9, //
|
||||
0xf6da, //
|
||||
0xf6db, //
|
||||
0xf8d7, //
|
||||
0xf8d8, //
|
||||
0xf8d9, //
|
||||
0xf8da, //
|
||||
0xf8db, //
|
||||
0xf8dc, //
|
||||
0xf8dd, //
|
||||
0xf8de, //
|
||||
0xf8df, //
|
||||
0xf8e0, //
|
||||
0xf8e1, //
|
||||
0xf8e2, //
|
||||
0xf8e3, //
|
||||
0xf8e4, //
|
||||
0xf8e5, //
|
||||
0xf8e6, //
|
||||
0xf8e7, //
|
||||
0xf8e8, //
|
||||
0xf8e9, //
|
||||
0xf8ea, //
|
||||
0xf8eb, //
|
||||
0xf8ec, //
|
||||
0xf8ed, //
|
||||
0xf8ee, //
|
||||
0xf8ef, //
|
||||
0xf8f0, //
|
||||
0xf8f1, //
|
||||
0xf8f2, //
|
||||
0xf8f3, //
|
||||
0xf8f4, //
|
||||
0xf8f5, //
|
||||
0xf8f6, //
|
||||
0xf8f7, //
|
||||
0xf8f8, //
|
||||
0xf8f9, //
|
||||
0xf8fa, //
|
||||
0xf8fb, //
|
||||
0xf8fc, //
|
||||
0xf8fd, //
|
||||
0xf8fe, //
|
||||
0xfe7c, // ﹼ
|
||||
0xfe7d, // ﹽ
|
||||
0xff01, // !
|
||||
0xff02, // "
|
||||
0xff03, // #
|
||||
0xff04, // $
|
||||
0xff05, // %
|
||||
0xff06, // &
|
||||
0xff07, // '
|
||||
0xff08, // (
|
||||
0xff09, // )
|
||||
0xff09, // )
|
||||
0xff0a, // *
|
||||
0xff0b, // +
|
||||
0xff0c, // ,
|
||||
0xff0d, // -
|
||||
0xff0e, // .
|
||||
0xff0f, // /
|
||||
0xff1a, // :
|
||||
0xff1b, // ;
|
||||
0xff1c, // <
|
||||
0xff1d, // =
|
||||
0xff1e, // >
|
||||
0xff1f, // ?
|
||||
0xff20, // @
|
||||
0xff3b, // [
|
||||
0xff3c, // \
|
||||
0xff3d, // ]
|
||||
0xff3e, // ^
|
||||
0xff40, // `
|
||||
0xff5b, // {
|
||||
0xff5c, // |
|
||||
0xff5d, // }
|
||||
0xff5e, // ~
|
||||
0xff5f, // ⦅
|
||||
0xff60, // ⦆
|
||||
0xff61, // 。
|
||||
0xff62, // 「
|
||||
0xff63, // 」
|
||||
0xff64, // 、
|
||||
0xff65, // ・
|
||||
0xffe0, // ¢
|
||||
0xffe1, // £
|
||||
0xffe2, // ¬
|
||||
0xffe3, //  ̄
|
||||
0xffe4, // ¦
|
||||
0xffe5, // ¥
|
||||
0xffe6, // ₩
|
||||
0xffe8, // │
|
||||
0xffe9, // ←
|
||||
0xffea, // ↑
|
||||
0xffeb, // →
|
||||
0xffec, // ↓
|
||||
0xffed, // ■
|
||||
0xffee, // ○
|
||||
0x1d6fc, // 𝛼
|
||||
0x1d6fd, // 𝛽
|
||||
0x1d6fe, // 𝛾
|
||||
0x1d6ff, // 𝛿
|
||||
0x1d700, // 𝜀
|
||||
0x1d701, // 𝜁
|
||||
0x1d702, // 𝜂
|
||||
0x1d703, // 𝜃
|
||||
0x1d704, // 𝜄
|
||||
0x1d705, // 𝜅
|
||||
0x1d706, // 𝜆
|
||||
0x1d707, // 𝜇
|
||||
0x1d708, // 𝜈
|
||||
0x1d709, // 𝜉
|
||||
0x1d70a, // 𝜊
|
||||
0x1d70b, // 𝜋
|
||||
0x1d70c, // 𝜌
|
||||
0x1d70d, // 𝜍
|
||||
0x1d70e, // 𝜎
|
||||
0x1d70f, // 𝜏
|
||||
0x1d710, // 𝜐
|
||||
0x1d711, // 𝜑
|
||||
0x1d712, // 𝜒
|
||||
0x1d713, // 𝜓
|
||||
0x1d714, // 𝜔
|
||||
0x1d715, // 𝜕
|
||||
0x1d716, // 𝜖
|
||||
0x1d717, // 𝜗
|
||||
0x1d718, // 𝜘
|
||||
0x1d719, // 𝜙
|
||||
0x1d71a, // 𝜚
|
||||
0x1d71b, // 𝜛
|
||||
0xc2a0, // 슠
|
||||
0xe28087, //
|
||||
0xe280af, //
|
||||
0xe281a0, //
|
||||
0xefbbbf, //
|
||||
];
|
|
@ -0,0 +1,114 @@
|
|||
<?php
|
||||
/**
|
||||
* UTF-8 lookup table for upper case accented letters
|
||||
*
|
||||
* This lookuptable defines replacements for accented characters from the ASCII-7
|
||||
* range. This are upper case letters only.
|
||||
*
|
||||
* @author Andreas Gohr <andi@splitbrain.org>
|
||||
* @see \dokuwiki\Utf8\Clean::deaccent()
|
||||
*/
|
||||
return [
|
||||
'Á' => 'A',
|
||||
'À' => 'A',
|
||||
'Ă' => 'A',
|
||||
'Â' => 'A',
|
||||
'Å' => 'A',
|
||||
'Ä' => 'Ae',
|
||||
'Ã' => 'A',
|
||||
'Ą' => 'A',
|
||||
'Ā' => 'A',
|
||||
'Æ' => 'Ae',
|
||||
'Ḃ' => 'B',
|
||||
'Ć' => 'C',
|
||||
'Ĉ' => 'C',
|
||||
'Č' => 'C',
|
||||
'Ċ' => 'C',
|
||||
'Ç' => 'C',
|
||||
'Ď' => 'D',
|
||||
'Ḋ' => 'D',
|
||||
'Đ' => 'D',
|
||||
'Ð' => 'Dh',
|
||||
'É' => 'E',
|
||||
'È' => 'E',
|
||||
'Ĕ' => 'E',
|
||||
'Ê' => 'E',
|
||||
'Ě' => 'E',
|
||||
'Ë' => 'E',
|
||||
'Ė' => 'E',
|
||||
'Ę' => 'E',
|
||||
'Ē' => 'E',
|
||||
'Ḟ' => 'F',
|
||||
'Ƒ' => 'F',
|
||||
'Ğ' => 'G',
|
||||
'Ĝ' => 'G',
|
||||
'Ġ' => 'G',
|
||||
'Ģ' => 'G',
|
||||
'Ĥ' => 'H',
|
||||
'Ħ' => 'H',
|
||||
'Í' => 'I',
|
||||
'Ì' => 'I',
|
||||
'Î' => 'I',
|
||||
'Ï' => 'I',
|
||||
'Ĩ' => 'I',
|
||||
'Į' => 'I',
|
||||
'Ī' => 'I',
|
||||
'Ĵ' => 'J',
|
||||
'Ķ' => 'K',
|
||||
'Ĺ' => 'L',
|
||||
'Ľ' => 'L',
|
||||
'Ļ' => 'L',
|
||||
'Ł' => 'L',
|
||||
'Ṁ' => 'M',
|
||||
'Ń' => 'N',
|
||||
'Ň' => 'N',
|
||||
'Ñ' => 'N',
|
||||
'Ņ' => 'N',
|
||||
'Ó' => 'O',
|
||||
'Ò' => 'O',
|
||||
'Ô' => 'O',
|
||||
'Ö' => 'Oe',
|
||||
'Ő' => 'O',
|
||||
'Õ' => 'O',
|
||||
'Ø' => 'O',
|
||||
'Ō' => 'O',
|
||||
'Ơ' => 'O',
|
||||
'Ṗ' => 'P',
|
||||
'Ŕ' => 'R',
|
||||
'Ř' => 'R',
|
||||
'Ŗ' => 'R',
|
||||
'Ś' => 'S',
|
||||
'Ŝ' => 'S',
|
||||
'Š' => 'S',
|
||||
'Ṡ' => 'S',
|
||||
'Ş' => 'S',
|
||||
'Ș' => 'S',
|
||||
'Ť' => 'T',
|
||||
'Ṫ' => 'T',
|
||||
'Ţ' => 'T',
|
||||
'Ț' => 'T',
|
||||
'Ŧ' => 'T',
|
||||
'Ú' => 'U',
|
||||
'Ù' => 'U',
|
||||
'Ŭ' => 'U',
|
||||
'Û' => 'U',
|
||||
'Ů' => 'U',
|
||||
'Ü' => 'Ue',
|
||||
'Ű' => 'U',
|
||||
'Ũ' => 'U',
|
||||
'Ų' => 'U',
|
||||
'Ū' => 'U',
|
||||
'Ư' => 'U',
|
||||
'Ẃ' => 'W',
|
||||
'Ẁ' => 'W',
|
||||
'Ŵ' => 'W',
|
||||
'Ẅ' => 'W',
|
||||
'Ý' => 'Y',
|
||||
'Ỳ' => 'Y',
|
||||
'Ŷ' => 'Y',
|
||||
'Ÿ' => 'Y',
|
||||
'Ź' => 'Z',
|
||||
'Ž' => 'Z',
|
||||
'Ż' => 'Z',
|
||||
'Þ' => 'Th',
|
||||
];
|
10
inc/auth.php
10
inc/auth.php
|
@ -518,7 +518,7 @@ function auth_isMember($memberlist, $user, array $groups) {
|
|||
|
||||
// clean user and groups
|
||||
if(!$auth->isCaseSensitive()) {
|
||||
$user = utf8_strtolower($user);
|
||||
$user = \dokuwiki\Utf8\PhpString::strtolower($user);
|
||||
$groups = array_map('utf8_strtolower', $groups);
|
||||
}
|
||||
$user = $auth->cleanUser($user);
|
||||
|
@ -533,7 +533,7 @@ function auth_isMember($memberlist, $user, array $groups) {
|
|||
// compare cleaned values
|
||||
foreach($members as $member) {
|
||||
if($member == '@ALL' ) return true;
|
||||
if(!$auth->isCaseSensitive()) $member = utf8_strtolower($member);
|
||||
if(!$auth->isCaseSensitive()) $member = \dokuwiki\Utf8\PhpString::strtolower($member);
|
||||
if($member[0] == '@') {
|
||||
$member = $auth->cleanGroup(substr($member, 1));
|
||||
if(in_array($member, $groups)) return true;
|
||||
|
@ -621,7 +621,7 @@ function auth_aclcheck_cb($data) {
|
|||
}
|
||||
|
||||
if(!$auth->isCaseSensitive()) {
|
||||
$user = utf8_strtolower($user);
|
||||
$user = \dokuwiki\Utf8\PhpString::strtolower($user);
|
||||
$groups = array_map('utf8_strtolower', $groups);
|
||||
}
|
||||
$user = auth_nameencode($auth->cleanUser($user));
|
||||
|
@ -648,7 +648,7 @@ function auth_aclcheck_cb($data) {
|
|||
$match = preg_replace('/#.*$/', '', $match); //ignore comments
|
||||
$acl = preg_split('/[ \t]+/', $match);
|
||||
if(!$auth->isCaseSensitive() && $acl[1] !== '@ALL') {
|
||||
$acl[1] = utf8_strtolower($acl[1]);
|
||||
$acl[1] = \dokuwiki\Utf8\PhpString::strtolower($acl[1]);
|
||||
}
|
||||
if(!in_array($acl[1], $groups)) {
|
||||
continue;
|
||||
|
@ -678,7 +678,7 @@ function auth_aclcheck_cb($data) {
|
|||
$match = preg_replace('/#.*$/', '', $match); //ignore comments
|
||||
$acl = preg_split('/[ \t]+/', $match);
|
||||
if(!$auth->isCaseSensitive() && $acl[1] !== '@ALL') {
|
||||
$acl[1] = utf8_strtolower($acl[1]);
|
||||
$acl[1] = \dokuwiki\Utf8\PhpString::strtolower($acl[1]);
|
||||
}
|
||||
if(!in_array($acl[1], $groups)) {
|
||||
continue;
|
||||
|
|
|
@ -93,7 +93,7 @@ function addLogEntry($date, $id, $type=DOKU_CHANGE_TYPE_EDIT, $summary='', $extr
|
|||
'type' => str_replace($strip, '', $type),
|
||||
'id' => $id,
|
||||
'user' => $user,
|
||||
'sum' => utf8_substr(str_replace($strip, '', $summary), 0, 255),
|
||||
'sum' => \dokuwiki\Utf8\PhpString::substr(str_replace($strip, '', $summary), 0, 255),
|
||||
'extra' => str_replace($strip, '', $extra),
|
||||
'sizechange' => $sizechange
|
||||
);
|
||||
|
@ -180,7 +180,7 @@ function addMediaLogEntry(
|
|||
'type' => str_replace($strip, '', $type),
|
||||
'id' => $id,
|
||||
'user' => $user,
|
||||
'sum' => utf8_substr(str_replace($strip, '', $summary), 0, 255),
|
||||
'sum' => \dokuwiki\Utf8\PhpString::substr(str_replace($strip, '', $summary), 0, 255),
|
||||
'extra' => str_replace($strip, '', $extra),
|
||||
'sizechange' => $sizechange
|
||||
);
|
||||
|
|
|
@ -1017,7 +1017,7 @@ function cleanText($text) {
|
|||
// if the text is not valid UTF-8 we simply assume latin1
|
||||
// this won't break any worse than it breaks with the wrong encoding
|
||||
// but might actually fix the problem in many cases
|
||||
if(!utf8_check($text)) $text = utf8_encode($text);
|
||||
if(!\dokuwiki\Utf8\Clean::isUtf8($text)) $text = utf8_encode($text);
|
||||
|
||||
return $text;
|
||||
}
|
||||
|
@ -1173,12 +1173,12 @@ function parsePageTemplate(&$data) {
|
|||
utf8_ucwords(curNS($id)),
|
||||
utf8_strtoupper(curNS($id)),
|
||||
$file,
|
||||
utf8_ucfirst($file),
|
||||
utf8_strtoupper($file),
|
||||
\dokuwiki\Utf8\PhpString::ucfirst($file),
|
||||
\dokuwiki\Utf8\PhpString::strtoupper($file),
|
||||
$page,
|
||||
utf8_ucfirst($page),
|
||||
utf8_ucwords($page),
|
||||
utf8_strtoupper($page),
|
||||
\dokuwiki\Utf8\PhpString::ucfirst($page),
|
||||
\dokuwiki\Utf8\PhpString::ucwords($page),
|
||||
\dokuwiki\Utf8\PhpString::strtoupper($page),
|
||||
$INPUT->server->str('REMOTE_USER'),
|
||||
$USERINFO['name'],
|
||||
$USERINFO['mail'],
|
||||
|
@ -1741,12 +1741,12 @@ function preg_quote_cb($string) {
|
|||
* @return string
|
||||
*/
|
||||
function shorten($keep, $short, $max, $min = 9, $char = '…') {
|
||||
$max = $max - utf8_strlen($keep);
|
||||
$max = $max - \dokuwiki\Utf8\PhpString::strlen($keep);
|
||||
if($max < $min) return $keep;
|
||||
$len = utf8_strlen($short);
|
||||
$len = \dokuwiki\Utf8\PhpString::strlen($short);
|
||||
if($len <= $max) return $keep.$short;
|
||||
$half = floor($max / 2);
|
||||
return $keep.utf8_substr($short, 0, $half - 1).$char.utf8_substr($short, $len - $half);
|
||||
return $keep.\dokuwiki\Utf8\PhpString::substr($short, 0, $half - 1).$char.\dokuwiki\Utf8\PhpString::substr($short, $len - $half);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -69,9 +69,9 @@ function sendFile($file, $mime, $dl, $cache, $public = false, $orig = null) {
|
|||
|
||||
//download or display?
|
||||
if($dl) {
|
||||
header('Content-Disposition: attachment;'.rfc2231_encode('filename', utf8_basename($orig)).';');
|
||||
header('Content-Disposition: attachment;'.rfc2231_encode('filename', \dokuwiki\Utf8\PhpString::basename($orig)).';');
|
||||
} else {
|
||||
header('Content-Disposition: inline;'.rfc2231_encode('filename', utf8_basename($orig)).';');
|
||||
header('Content-Disposition: inline;'.rfc2231_encode('filename', \dokuwiki\Utf8\PhpString::basename($orig)).';');
|
||||
}
|
||||
|
||||
//use x-sendfile header to pass the delivery to compatible webservers
|
||||
|
|
|
@ -97,7 +97,7 @@ function _ft_pageSearch(&$data) {
|
|||
);
|
||||
$evt = new Event('FULLTEXT_PHRASE_MATCH',$evdata);
|
||||
if ($evt->advise_before() && $evt->result !== true) {
|
||||
$text = utf8_strtolower($evdata['text']);
|
||||
$text = \dokuwiki\Utf8\PhpString::strtolower($evdata['text']);
|
||||
if (strpos($text, $phrase) !== false) {
|
||||
$evt->result = true;
|
||||
}
|
||||
|
@ -412,7 +412,7 @@ function ft_snippet($id,$highlight){
|
|||
$match = array();
|
||||
$snippets = array();
|
||||
$utf8_offset = $offset = $end = 0;
|
||||
$len = utf8_strlen($text);
|
||||
$len = \dokuwiki\Utf8\PhpString::strlen($text);
|
||||
|
||||
// build a regexp from the phrases to highlight
|
||||
$re1 = '(' .
|
||||
|
@ -442,8 +442,8 @@ function ft_snippet($id,$highlight){
|
|||
list($str,$idx) = $match[0];
|
||||
|
||||
// convert $idx (a byte offset) into a utf8 character offset
|
||||
$utf8_idx = utf8_strlen(substr($text,0,$idx));
|
||||
$utf8_len = utf8_strlen($str);
|
||||
$utf8_idx = \dokuwiki\Utf8\PhpString::strlen(substr($text,0,$idx));
|
||||
$utf8_len = \dokuwiki\Utf8\PhpString::strlen($str);
|
||||
|
||||
// establish context, 100 bytes surrounding the match string
|
||||
// first look to see if we can go 100 either side,
|
||||
|
@ -472,9 +472,9 @@ function ft_snippet($id,$highlight){
|
|||
$end = $utf8_idx + $utf8_len + $post; // now set it to the end of this context
|
||||
|
||||
if ($append) {
|
||||
$snippets[count($snippets)-1] .= utf8_substr($text,$append,$end-$append);
|
||||
$snippets[count($snippets)-1] .= \dokuwiki\Utf8\PhpString::substr($text,$append,$end-$append);
|
||||
} else {
|
||||
$snippets[] = utf8_substr($text,$start,$end-$start);
|
||||
$snippets[] = \dokuwiki\Utf8\PhpString::substr($text,$start,$end-$start);
|
||||
}
|
||||
|
||||
// set $offset for next match attempt
|
||||
|
@ -483,8 +483,8 @@ function ft_snippet($id,$highlight){
|
|||
// this prevents further matching of this snippet but for possible matches of length
|
||||
// smaller than match length + context (at least 50 characters) this match is part of the context
|
||||
$utf8_offset = $utf8_idx + $utf8_len;
|
||||
$offset = $idx + strlen(utf8_substr($text,$utf8_idx,$utf8_len));
|
||||
$offset = utf8_correctIdx($text,$offset);
|
||||
$offset = $idx + strlen(\dokuwiki\Utf8\PhpString::substr($text,$utf8_idx,$utf8_len));
|
||||
$offset = \dokuwiki\Utf8\Clean::correctIdx($text,$offset);
|
||||
}
|
||||
|
||||
$m = "\1";
|
||||
|
@ -674,7 +674,7 @@ function ft_queryParser($Indexer, $query){
|
|||
*/
|
||||
$parsed_query = '';
|
||||
$parens_level = 0;
|
||||
$terms = preg_split('/(-?".*?")/u', utf8_strtolower($query), -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
|
||||
$terms = preg_split('/(-?".*?")/u', \dokuwiki\Utf8\PhpString::strtolower($query), -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
|
||||
|
||||
foreach ($terms as $term) {
|
||||
$parsed = '';
|
||||
|
|
|
@ -355,7 +355,7 @@ function html_hilight($html,$phrases){
|
|||
$regex = join('|',$phrases);
|
||||
|
||||
if ($regex === '') return $html;
|
||||
if (!utf8_check($regex)) return $html;
|
||||
if (!\dokuwiki\Utf8\Clean::isUtf8($regex)) return $html;
|
||||
$html = @preg_replace_callback("/((<[^>]*)|$regex)/ui",'html_hilight_callback',$html);
|
||||
return $html;
|
||||
}
|
||||
|
|
|
@ -605,12 +605,12 @@ class Doku_Indexer {
|
|||
)
|
||||
);
|
||||
if (preg_match('/[^0-9A-Za-z ]/u', $text))
|
||||
$text = utf8_stripspecials($text, ' ', '\._\-:'.$wc);
|
||||
$text = \dokuwiki\Utf8\Clean::stripspecials($text, ' ', '\._\-:'.$wc);
|
||||
|
||||
$wordlist = explode(' ', $text);
|
||||
foreach ($wordlist as $i => $word) {
|
||||
$wordlist[$i] = (preg_match('/[^0-9A-Za-z]/u', $word)) ?
|
||||
utf8_strtolower($word) : strtolower($word);
|
||||
\dokuwiki\Utf8\PhpString::strtolower($word) : strtolower($word);
|
||||
}
|
||||
|
||||
foreach ($wordlist as $i => $word) {
|
||||
|
@ -1603,7 +1603,7 @@ function idx_indexLengths($filter) {
|
|||
* @return string
|
||||
*/
|
||||
function idx_cleanName($name) {
|
||||
$name = utf8_romanize(trim((string)$name));
|
||||
$name = \dokuwiki\Utf8\Clean::romanize(trim((string)$name));
|
||||
$name = preg_replace('#[ \./\\:-]+#', '_', $name);
|
||||
$name = preg_replace('/[^A-Za-z0-9_]/', '', $name);
|
||||
return strtolower($name);
|
||||
|
|
|
@ -342,7 +342,7 @@ function msg($message,$lvl=0,$line='',$file='',$allow=MSG_PUBLIC){
|
|||
$errors[1] = 'success';
|
||||
$errors[2] = 'notify';
|
||||
|
||||
if($line || $file) $message.=' ['.utf8_basename($file).':'.$line.']';
|
||||
if($line || $file) $message.=' ['.\dokuwiki\Utf8\PhpString::basename($file).':'.$line.']';
|
||||
|
||||
if(!isset($MSG)) $MSG = array();
|
||||
$MSG[]=array('lvl' => $errors[$lvl], 'msg' => $message, 'allow' => $allow);
|
||||
|
|
|
@ -623,7 +623,7 @@ function io_download($url,$file,$useAttachment=false,$defaultName='',$maxSize=20
|
|||
if (is_string($content_disposition) &&
|
||||
preg_match('/attachment;\s*filename\s*=\s*"([^"]*)"/i', $content_disposition, $match)) {
|
||||
|
||||
$name = utf8_basename($match[1]);
|
||||
$name = \dokuwiki\Utf8\PhpString::basename($match[1]);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
14
inc/mail.php
14
inc/mail.php
|
@ -134,11 +134,11 @@ function _mail_send_action($data) {
|
|||
// end additional code to support event ... original mail_send() code from here
|
||||
|
||||
if(defined('MAILHEADER_ASCIIONLY')){
|
||||
$subject = utf8_deaccent($subject);
|
||||
$subject = utf8_strip($subject);
|
||||
$subject = \dokuwiki\Utf8\Clean::deaccent($subject);
|
||||
$subject = \dokuwiki\Utf8\Clean::strip($subject);
|
||||
}
|
||||
|
||||
if(!utf8_isASCII($subject)) {
|
||||
if(!\dokuwiki\Utf8\Clean::isASCII($subject)) {
|
||||
$enc_subj = '=?UTF-8?Q?'.mail_quotedprintable_encode($subject,0).'?=';
|
||||
// Spaces must be encoded according to rfc2047. Use the "_" shorthand
|
||||
$enc_subj = preg_replace('/ /', '_', $enc_subj);
|
||||
|
@ -212,7 +212,7 @@ function mail_encode_address($string,$header='',$names=true){
|
|||
}
|
||||
|
||||
// FIXME: is there a way to encode the localpart of a emailaddress?
|
||||
if(!utf8_isASCII($addr)){
|
||||
if(!\dokuwiki\Utf8\Clean::isASCII($addr)){
|
||||
msg(hsc("E-Mail address <$addr> is not ASCII"),-1);
|
||||
continue;
|
||||
}
|
||||
|
@ -228,11 +228,11 @@ function mail_encode_address($string,$header='',$names=true){
|
|||
$addr = "<$addr>";
|
||||
|
||||
if(defined('MAILHEADER_ASCIIONLY')){
|
||||
$text = utf8_deaccent($text);
|
||||
$text = utf8_strip($text);
|
||||
$text = \dokuwiki\Utf8\Clean::deaccent($text);
|
||||
$text = \dokuwiki\Utf8\Clean::strip($text);
|
||||
}
|
||||
|
||||
if(!utf8_isASCII($text)){
|
||||
if(!\dokuwiki\Utf8\Clean::isASCII($text)){
|
||||
// put the quotes outside as in =?UTF-8?Q?"Elan Ruusam=C3=A4e"?= vs "=?UTF-8?Q?Elan Ruusam=C3=A4e?="
|
||||
if (preg_match('/^"(.+)"$/', $text, $matches)) {
|
||||
$text = '"=?UTF-8?Q?'.mail_quotedprintable_encode($matches[1], 0).'?="';
|
||||
|
|
|
@ -261,7 +261,7 @@ function media_delete($id,$auth){
|
|||
// trigger an event - MEDIA_DELETE_FILE
|
||||
$data = array();
|
||||
$data['id'] = $id;
|
||||
$data['name'] = utf8_basename($file);
|
||||
$data['name'] = \dokuwiki\Utf8\PhpString::basename($file);
|
||||
$data['path'] = $file;
|
||||
$data['size'] = (file_exists($file)) ? filesize($file) : 0;
|
||||
|
||||
|
@ -1762,7 +1762,7 @@ function media_printimgdetail($item, $fullscreen=false){
|
|||
$d = $item['meta']->getField(array('IPTC.Caption','EXIF.UserComment',
|
||||
'EXIF.TIFFImageDescription',
|
||||
'EXIF.TIFFUserComment'));
|
||||
if(utf8_strlen($d) > 250) $d = utf8_substr($d,0,250).'...';
|
||||
if(\dokuwiki\Utf8\PhpString::strlen($d) > 250) $d = \dokuwiki\Utf8\PhpString::substr($d,0,250).'...';
|
||||
$k = $item['meta']->getField(array('IPTC.Keywords','IPTC.Category','xmp.dc:subject'));
|
||||
|
||||
// print EXIF/IPTC data
|
||||
|
|
|
@ -44,7 +44,7 @@ function getID($param='id',$clean=true){
|
|||
if($param != 'id') {
|
||||
$relpath = 'lib/exe/';
|
||||
}
|
||||
$script = $conf['basedir'].$relpath.utf8_basename($INPUT->server->str('SCRIPT_FILENAME'));
|
||||
$script = $conf['basedir'].$relpath.\dokuwiki\Utf8\PhpString::basename($INPUT->server->str('SCRIPT_FILENAME'));
|
||||
|
||||
}elseif($INPUT->server->str('PATH_INFO')){
|
||||
$request = $INPUT->server->str('PATH_INFO');
|
||||
|
@ -127,7 +127,7 @@ function cleanID($raw_id,$ascii=false){
|
|||
$sepcharpat = '#\\'.$sepchar.'+#';
|
||||
|
||||
$id = trim((string)$raw_id);
|
||||
$id = utf8_strtolower($id);
|
||||
$id = \dokuwiki\Utf8\PhpString::strtolower($id);
|
||||
|
||||
//alternative namespace seperator
|
||||
if($conf['useslash']){
|
||||
|
@ -136,13 +136,13 @@ function cleanID($raw_id,$ascii=false){
|
|||
$id = strtr($id,';/',':'.$sepchar);
|
||||
}
|
||||
|
||||
if($conf['deaccent'] == 2 || $ascii) $id = utf8_romanize($id);
|
||||
if($conf['deaccent'] || $ascii) $id = utf8_deaccent($id,-1);
|
||||
if($conf['deaccent'] == 2 || $ascii) $id = \dokuwiki\Utf8\Clean::romanize($id);
|
||||
if($conf['deaccent'] || $ascii) $id = \dokuwiki\Utf8\Clean::deaccent($id,-1);
|
||||
|
||||
//remove specials
|
||||
$id = utf8_stripspecials($id,$sepchar,'\*');
|
||||
$id = \dokuwiki\Utf8\Clean::stripspecials($id,$sepchar,'\*');
|
||||
|
||||
if($ascii) $id = utf8_strip($id);
|
||||
if($ascii) $id = \dokuwiki\Utf8\Clean::strip($id);
|
||||
|
||||
//clean up
|
||||
$id = preg_replace($sepcharpat,$sepchar,$id);
|
||||
|
|
|
@ -21,8 +21,8 @@ class Doku_Renderer_code extends Doku_Renderer {
|
|||
if(!$language) $language = 'txt';
|
||||
$language = preg_replace(PREG_PATTERN_VALID_LANGUAGE, '', $language);
|
||||
if(!$filename) $filename = 'snippet.'.$language;
|
||||
$filename = utf8_basename($filename);
|
||||
$filename = utf8_stripspecials($filename, '_');
|
||||
$filename = \dokuwiki\Utf8\PhpString::basename($filename);
|
||||
$filename = \dokuwiki\Utf8\Clean::stripspecials($filename, '_');
|
||||
|
||||
// send CRLF to Windows clients
|
||||
if(strpos($INPUT->server->str('HTTP_USER_AGENT'), 'Windows') !== false) {
|
||||
|
|
|
@ -93,7 +93,7 @@ class Doku_Renderer_metadata extends Doku_Renderer
|
|||
// cut off too long abstracts
|
||||
$this->doc = trim($this->doc);
|
||||
if (strlen($this->doc) > self::ABSTRACT_MAX) {
|
||||
$this->doc = utf8_substr($this->doc, 0, self::ABSTRACT_MAX).'…';
|
||||
$this->doc = \dokuwiki\Utf8\PhpString::substr($this->doc, 0, self::ABSTRACT_MAX).'…';
|
||||
}
|
||||
$this->meta['description']['abstract'] = $this->doc;
|
||||
}
|
||||
|
|
|
@ -1639,7 +1639,7 @@ class Doku_Renderer_xhtml extends Doku_Renderer {
|
|||
// return the title of the picture
|
||||
if(!$title) {
|
||||
// just show the sourcename
|
||||
$title = $this->_xmlEntities(utf8_basename(noNS($src)));
|
||||
$title = $this->_xmlEntities(\dokuwiki\Utf8\PhpString::basename(noNS($src)));
|
||||
}
|
||||
return $title;
|
||||
}
|
||||
|
@ -1675,7 +1675,7 @@ class Doku_Renderer_xhtml extends Doku_Renderer {
|
|||
if(!$render) {
|
||||
// if the file is not supposed to be rendered
|
||||
// return the title of the file (just the sourcename if there is no title)
|
||||
return $title ? $title : $this->_xmlEntities(utf8_basename(noNS($src)));
|
||||
return $title ? $title : $this->_xmlEntities(\dokuwiki\Utf8\PhpString::basename(noNS($src)));
|
||||
}
|
||||
|
||||
$att = array();
|
||||
|
@ -1699,7 +1699,7 @@ class Doku_Renderer_xhtml extends Doku_Renderer {
|
|||
// return the title of the flash
|
||||
if(!$title) {
|
||||
// just show the sourcename
|
||||
$title = utf8_basename(noNS($src));
|
||||
$title = \dokuwiki\Utf8\PhpString::basename(noNS($src));
|
||||
}
|
||||
return $this->_xmlEntities($title);
|
||||
}
|
||||
|
@ -1720,7 +1720,7 @@ class Doku_Renderer_xhtml extends Doku_Renderer {
|
|||
$ret .= $this->_xmlEntities($title);
|
||||
} else {
|
||||
// just show the sourcename
|
||||
$ret .= $this->_xmlEntities(utf8_basename(noNS($src)));
|
||||
$ret .= $this->_xmlEntities(\dokuwiki\Utf8\PhpString::basename(noNS($src)));
|
||||
}
|
||||
|
||||
return $ret;
|
||||
|
@ -1882,7 +1882,7 @@ class Doku_Renderer_xhtml extends Doku_Renderer {
|
|||
$url = ml($file, '', true, '&');
|
||||
$linkType = 'internalmedia';
|
||||
}
|
||||
$title = $atts['title'] ? $atts['title'] : $this->_xmlEntities(utf8_basename(noNS($file)));
|
||||
$title = $atts['title'] ? $atts['title'] : $this->_xmlEntities(\dokuwiki\Utf8\PhpString::basename(noNS($file)));
|
||||
|
||||
$out .= '<source src="'.hsc($url).'" type="'.$mime.'" />'.NL;
|
||||
// alternative content (just a link to the file)
|
||||
|
@ -1949,7 +1949,7 @@ class Doku_Renderer_xhtml extends Doku_Renderer {
|
|||
$url = ml($file, '', true, '&');
|
||||
$linkType = 'internalmedia';
|
||||
}
|
||||
$title = $atts['title'] ? $atts['title'] : $this->_xmlEntities(utf8_basename(noNS($file)));
|
||||
$title = $atts['title'] ? $atts['title'] : $this->_xmlEntities(\dokuwiki\Utf8\PhpString::basename(noNS($file)));
|
||||
|
||||
$out .= '<source src="'.hsc($url).'" type="'.$mime.'" />'.NL;
|
||||
// alternative content (just a link to the file)
|
||||
|
|
|
@ -211,7 +211,7 @@ function search_media(&$data,$base,$file,$type,$lvl,$opts){
|
|||
return false;
|
||||
}
|
||||
|
||||
$info['file'] = utf8_basename($file);
|
||||
$info['file'] = \dokuwiki\Utf8\PhpString::basename($file);
|
||||
$info['size'] = filesize($base.'/'.$file);
|
||||
$info['mtime'] = filemtime($base.'/'.$file);
|
||||
$info['writable'] = is_writable($base.'/'.$file);
|
||||
|
@ -497,7 +497,7 @@ function search_universal(&$data,$base,$file,$type,$lvl,$opts){
|
|||
$item['open'] = $return;
|
||||
|
||||
if(!empty($opts['meta'])){
|
||||
$item['file'] = utf8_basename($file);
|
||||
$item['file'] = \dokuwiki\Utf8\PhpString::basename($file);
|
||||
$item['size'] = filesize($base.'/'.$file);
|
||||
$item['mtime'] = filemtime($base.'/'.$file);
|
||||
$item['rev'] = $item['mtime'];
|
||||
|
|
1845
inc/utf8.php
1845
inc/utf8.php
File diff suppressed because it is too large
Load Diff
|
@ -176,7 +176,7 @@ function js_load($file){
|
|||
|
||||
// is it a include_once?
|
||||
if($match[1]){
|
||||
$base = utf8_basename($ifile);
|
||||
$base = \dokuwiki\Utf8\PhpString::basename($ifile);
|
||||
if(array_key_exists($base, $loaded) && $loaded[$base] === true){
|
||||
$data = str_replace($match[0], '' ,$data);
|
||||
continue;
|
||||
|
|
|
@ -101,7 +101,7 @@ class auth_plugin_authad extends DokuWiki_Auth_Plugin
|
|||
// make sure the right encoding is used
|
||||
if ($this->getConf('sso_charset')) {
|
||||
$_SERVER['REMOTE_USER'] = iconv($this->getConf('sso_charset'), 'UTF-8', $_SERVER['REMOTE_USER']);
|
||||
} elseif (!utf8_check($_SERVER['REMOTE_USER'])) {
|
||||
} elseif (!\dokuwiki\Utf8\Clean::isUtf8($_SERVER['REMOTE_USER'])) {
|
||||
$_SERVER['REMOTE_USER'] = utf8_encode($_SERVER['REMOTE_USER']);
|
||||
}
|
||||
|
||||
|
@ -297,7 +297,7 @@ class auth_plugin_authad extends DokuWiki_Auth_Plugin
|
|||
$group = str_replace('\\', '', $group);
|
||||
$group = str_replace('#', '', $group);
|
||||
$group = preg_replace('[\s]', '_', $group);
|
||||
$group = utf8_strtolower(trim($group));
|
||||
$group = \dokuwiki\Utf8\PhpString::strtolower(trim($group));
|
||||
return $group;
|
||||
}
|
||||
|
||||
|
@ -322,8 +322,8 @@ class auth_plugin_authad extends DokuWiki_Auth_Plugin
|
|||
if ($dom) $domain = $dom;
|
||||
|
||||
// clean up both
|
||||
$domain = utf8_strtolower(trim($domain));
|
||||
$user = utf8_strtolower(trim($user));
|
||||
$domain = \dokuwiki\Utf8\PhpString::strtolower(trim($domain));
|
||||
$user = \dokuwiki\Utf8\PhpString::strtolower(trim($user));
|
||||
|
||||
// is this a known, valid domain? if not discard
|
||||
if (!is_array($this->conf[$domain])) {
|
||||
|
|
|
@ -913,7 +913,7 @@ class helper_plugin_extension_extension extends DokuWiki_Plugin
|
|||
if (is_string($content_disposition) &&
|
||||
preg_match('/attachment;\s*filename\s*=\s*"([^"]*)"/i', $content_disposition, $match)) {
|
||||
|
||||
$name = utf8_basename($match[1]);
|
||||
$name = \dokuwiki\Utf8\PhpString::basename($match[1]);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -953,7 +953,7 @@ class helper_plugin_extension_extension extends DokuWiki_Plugin
|
|||
if (is_null($file)) {
|
||||
$file = md5($url);
|
||||
} else {
|
||||
$file = utf8_basename($file);
|
||||
$file = \dokuwiki\Utf8\PhpString::basename($file);
|
||||
}
|
||||
|
||||
// create tmp directory for download
|
||||
|
|
|
@ -1080,7 +1080,7 @@ class admin_plugin_usermanager extends DokuWiki_Admin_Plugin
|
|||
$fd = fopen($_FILES['import']['tmp_name'], 'r');
|
||||
if ($fd) {
|
||||
while ($csv = fgets($fd)) {
|
||||
if (!utf8_check($csv)) {
|
||||
if (!\dokuwiki\Utf8\Clean::isUtf8($csv)) {
|
||||
$csv = utf8_encode($csv);
|
||||
}
|
||||
$raw = str_getcsv($csv);
|
||||
|
|
Loading…
Reference in New Issue