Merge branch 'utf8refactor' into psr2

* utf8refactor: replaced deprecated utf8 functions formatting cleanup mark old utf8 functions deprecated Some cleanup for the UTF-8 stuff Moved all utf8 methods to their own namespaced classes Create separate table files for UTF-8 handling
2019-07-14 21:11:04 +02:00 · 2019-07-14 21:11:04 +02:00 · b47790f975
parent f600e75a7d 8cbc5ee84f
commit b47790f975
48 changed files with 4288 additions and 1785 deletions
--- a/_test/phpcs.xml
+++ b/_test/phpcs.xml
@ -88,6 +88,7 @@
        <exclude-pattern>*/inc/Mailer.class.php</exclude-pattern>
        <exclude-pattern>*/doku.php</exclude-pattern>
        <exclude-pattern>*/install.php</exclude-pattern>
+        <exclude-pattern>*/inc/utf8.php</exclude-pattern>
        <exclude-pattern>*/feed.php</exclude-pattern>
        <exclude-pattern>*/inc/load.php</exclude-pattern>
        <exclude-pattern>*/bin/*.php</exclude-pattern>
--- a/_test/tests/inc/media_searchlist.test.php
+++ b/_test/tests/inc/media_searchlist.test.php
@ -124,7 +124,7 @@ class media_searchlist_test extends DokuWikiTest {
            $info             = array();
            $info['id']       = $this->upload_ns . ':' . $rel_id;
            $info['perm']     = auth_quickaclcheck(getNS($info['id']).':*');
-            $info['file']     = utf8_basename($file);
+            $info['file']     = \dokuwiki\Utf8\PhpString::basename($file);
            $info['size']     = filesize($file);
            $info['mtime']    = filemtime($file);
            $info['writable'] = is_writable($file);
--- a/_test/tests/inc/utf8_basename.test.php
+++ b/_test/tests/inc/utf8_basename.test.php
@ -84,8 +84,8 @@ class utf8_basename_test extends DokuWikiTest {
        );

        foreach($data as $test){
-            $this->assertEquals($test[2], utf8_basename($test[0], $test[1]), "input: ('".$test[0]."', '".$test[1]."')");
+            $this->assertEquals($test[2], \dokuwiki\Utf8\PhpString::basename($test[0], $test[1]), "input: ('".$test[0]."', '".$test[1]."')");
        }
     }

-}
+}
--- a/_test/tests/inc/utf8_correctidx.test.php
+++ b/_test/tests/inc/utf8_correctidx.test.php
@ -15,7 +15,7 @@ class utf8_correctidx_test extends DokuWikiTest {
        $tests[] = array('aaживπά우리をあöä',1,true,1);

        foreach($tests as $test){
-            $this->assertEquals(utf8_correctIdx($test[0],$test[1],$test[2]),$test[3]);
+            $this->assertEquals(\dokuwiki\Utf8\Clean::correctIdx($test[0],$test[1],$test[2]),$test[3]);
        }
    }

@ -33,7 +33,7 @@ class utf8_correctidx_test extends DokuWikiTest {
        $tests[] = array('aaживπά우리をあöä',4,true,4);

        foreach($tests as $test){
-            $this->assertEquals(utf8_correctIdx($test[0],$test[1],$test[2]),$test[3]);
+            $this->assertEquals(\dokuwiki\Utf8\Clean::correctIdx($test[0],$test[1],$test[2]),$test[3]);
        }
    }

@ -53,7 +53,7 @@ class utf8_correctidx_test extends DokuWikiTest {
        $tests[] = array('aaживπά우리をあöä',13,true,13);

        foreach($tests as $test){
-            $this->assertEquals(utf8_correctIdx($test[0],$test[1],$test[2]),$test[3]);
+            $this->assertEquals(\dokuwiki\Utf8\Clean::correctIdx($test[0],$test[1],$test[2]),$test[3]);
        }
    }

@ -69,7 +69,7 @@ class utf8_correctidx_test extends DokuWikiTest {
        $tests[] = array('aaживπά우리をあöä',128,true,29);

        foreach($tests as $test){
-            $this->assertEquals(utf8_correctIdx($test[0],$test[1],$test[2]),$test[3]);
+            $this->assertEquals(\dokuwiki\Utf8\Clean::correctIdx($test[0],$test[1],$test[2]),$test[3]);
        }
    }

--- a/_test/tests/inc/utf8_html.test.php
+++ b/_test/tests/inc/utf8_html.test.php
@ -8,61 +8,61 @@ class utf8_html_test extends DokuWikiTest {
    function test_from_1byte(){
        $in  = 'a';
        $out = 'a';
-        $this->assertEquals(utf8_tohtml($in),$out);
+        $this->assertEquals(\dokuwiki\Utf8\Conversion::toHtml($in),$out);
    }

    function test_from_2byte(){
        $in  = "\xc3\xbc";
        $out = '&#252;';
-        $this->assertEquals(utf8_tohtml($in),$out);
+        $this->assertEquals(\dokuwiki\Utf8\Conversion::toHtml($in),$out);
    }

    function test_from_3byte(){
        $in  = "\xe2\x99\x8a";
        $out = '&#x264a;';
-        $this->assertEquals(utf8_tohtml($in),$out);
+        $this->assertEquals(\dokuwiki\Utf8\Conversion::toHtml($in),$out);
    }

    function test_from_4byte(){
        $in  = "\xf4\x80\x80\x81";
        $out = '&#x100001;';
-        $this->assertEquals(utf8_tohtml($in),$out);
+        $this->assertEquals(\dokuwiki\Utf8\Conversion::toHtml($in),$out);
    }

    function test_to_1byte(){
        $out  = 'a';
        $in = 'a';
-        $this->assertEquals(utf8_unhtml($in),$out);
+        $this->assertEquals(\dokuwiki\Utf8\Conversion::fromHtml($in),$out);
    }

    function test_to_2byte(){
        $out  = "\xc3\xbc";
        $in = '&#252;';
-        $this->assertEquals(utf8_unhtml($in),$out);
+        $this->assertEquals(\dokuwiki\Utf8\Conversion::fromHtml($in),$out);
    }

    function test_to_3byte(){
        $out  = "\xe2\x99\x8a";
        $in = '&#x264a;';
-        $this->assertEquals(utf8_unhtml($in),$out);
+        $this->assertEquals(\dokuwiki\Utf8\Conversion::fromHtml($in),$out);
    }

    function test_to_4byte(){
        $out  = "\xf4\x80\x80\x81";
        $in = '&#x100001;';
-        $this->assertEquals(utf8_unhtml($in),$out);
+        $this->assertEquals(\dokuwiki\Utf8\Conversion::fromHtml($in),$out);
    }

    function test_without_entities(){
        $out  = '&amp;#38;&amp;#38;';
        $in = '&amp;#38;&#38;amp;#38;';
-        $this->assertEquals(utf8_unhtml($in),$out);
+        $this->assertEquals(\dokuwiki\Utf8\Conversion::fromHtml($in),$out);
    }

    function test_with_entities(){
        $out  = '&#38;&amp;#38;';
        $in = '&amp;#38;&#38;amp;#38;';
-        $this->assertEquals(utf8_unhtml($in,HTML_ENTITIES),$out);
+        $this->assertEquals(\dokuwiki\Utf8\Conversion::fromHtml($in,HTML_ENTITIES),$out);
    }

 }
--- a/_test/tests/inc/utf8_romanize.test.php
+++ b/_test/tests/inc/utf8_romanize.test.php
@ -18,7 +18,7 @@ class utf8_romanize_test extends DokuWikiTest {
        foreach($tests as $test){
            list($jap,$rom) = explode(';',trim($test));

-            $chk = utf8_romanize($jap);
+            $chk = \dokuwiki\Utf8\Clean::romanize($jap);
            $this->assertEquals($rom,$chk,"$jap\t->\t$chk\t!=\t$rom\t($line)");
            $line++;
        }
@ -31,7 +31,7 @@ class utf8_romanize_test extends DokuWikiTest {
     * @author Andreas Gohr <andi@splitbrain.org>
     */
    function test_deaccented(){
-        $this->assertEquals("a A a A a o O",utf8_romanize("å Å ä Ä ä ö Ö"));
+        $this->assertEquals("a A a A a o O",\dokuwiki\Utf8\Clean::romanize("å Å ä Ä ä ö Ö"));
    }
 }
 //Setup VIM: ex: et ts=4 :
--- a/_test/tests/inc/utf8_stripspecials.test.php
+++ b/_test/tests/inc/utf8_stripspecials.test.php
@ -19,7 +19,7 @@ class utf8_stripspecials extends DokuWikiTest {
        $tests[] = array('string with nbsps','_','\*','string_with_nbsps');

        foreach($tests as $test){
-            $this->assertEquals(utf8_stripspecials($test[0],$test[1],$test[2]),$test[3]);
+            $this->assertEquals(\dokuwiki\Utf8\Clean::stripspecials($test[0],$test[1],$test[2]),$test[3]);
        }
    }

--- a/_test/tests/inc/utf8_strtolower.test.php
+++ b/_test/tests/inc/utf8_strtolower.test.php
@ -10,7 +10,7 @@ class utf8_strtolower_test extends DokuWikiTest {
        );

        foreach($data as $input => $expected) {
-            $this->assertEquals($expected, utf8_strtolower($input));
+            $this->assertEquals($expected, \dokuwiki\Utf8\PhpString::strtolower($input));
        }

        // just make sure our data was correct
@ -20,4 +20,4 @@ class utf8_strtolower_test extends DokuWikiTest {
            }
        }
    }
-}
+}
--- a/_test/tests/inc/utf8_substr.test.php
+++ b/_test/tests/inc/utf8_substr.test.php
@ -21,7 +21,7 @@ class utf8_substr_test extends DokuWikiTest {
        $tests[] = array('живπά우리をあöä',-6,-2,'우리をあ');

        foreach($tests as $test){
-            $this->assertEquals(utf8_substr($test[0],$test[1],$test[2]),$test[3]);
+            $this->assertEquals(\dokuwiki\Utf8\PhpString::substr($test[0],$test[1],$test[2]),$test[3]);
        }
    }

@ -34,7 +34,7 @@ class utf8_substr_test extends DokuWikiTest {
        $tests[] = array($str,0,66002,$str);

        foreach($tests as $test){
-            $this->assertEquals(utf8_substr($test[0],$test[1],$test[2]),$test[3]);
+            $this->assertEquals(\dokuwiki\Utf8\PhpString::substr($test[0],$test[1],$test[2]),$test[3]);
        }
    }

--- a/_test/tests/inc/utf8_unicode.test.php
+++ b/_test/tests/inc/utf8_unicode.test.php
@ -8,49 +8,49 @@ class utf8_unicode_test extends DokuWikiTest {
    function test_from_1byte(){
        $in  = 'a';
        $out = array(97);
-        $this->assertEquals(utf8_to_unicode($in),$out);
+        $this->assertEquals(\dokuwiki\Utf8\Unicode::fromUtf8($in),$out);
    }

    function test_from_2byte(){
        $in  = "\xc3\xbc";
        $out = array(252);
-        $this->assertEquals(utf8_to_unicode($in),$out);
+        $this->assertEquals(\dokuwiki\Utf8\Unicode::fromUtf8($in),$out);
    }

    function test_from_3byte(){
        $in  = "\xe2\x99\x8a";
        $out = array(9802);
-        $this->assertEquals(utf8_to_unicode($in),$out);
+        $this->assertEquals(\dokuwiki\Utf8\Unicode::fromUtf8($in),$out);
    }

    function test_from_4byte(){
        $in  = "\xf4\x80\x80\x81";
        $out = array(1048577);
-        $this->assertEquals(utf8_to_unicode($in),$out);
+        $this->assertEquals(\dokuwiki\Utf8\Unicode::fromUtf8($in),$out);
    }

    function test_to_1byte(){
        $out  = 'a';
        $in = array(97);
-        $this->assertEquals(unicode_to_utf8($in),$out);
+        $this->assertEquals(\dokuwiki\Utf8\Unicode::toUtf8($in),$out);
    }

    function test_to_2byte(){
        $out  = "\xc3\xbc";
        $in = array(252);
-        $this->assertEquals(unicode_to_utf8($in),$out);
+        $this->assertEquals(\dokuwiki\Utf8\Unicode::toUtf8($in),$out);
    }

    function test_to_3byte(){
        $out  = "\xe2\x99\x8a";
        $in = array(9802);
-        $this->assertEquals(unicode_to_utf8($in),$out);
+        $this->assertEquals(\dokuwiki\Utf8\Unicode::toUtf8($in),$out);
    }

    function test_to_4byte(){
        $out  = "\xf4\x80\x80\x81";
        $in = array(1048577);
-        $this->assertEquals(unicode_to_utf8($in),$out);
+        $this->assertEquals(\dokuwiki\Utf8\Unicode::toUtf8($in),$out);
    }

 }
--- a/_test/tests/inc/utf8_utf16be.test.php
+++ b/_test/tests/inc/utf8_utf16be.test.php
@ -12,14 +12,14 @@ class utf8_utf16be_test extends DokuWikiTest {
     * Convert from UTF-8 to UTF-16BE
     */
    function test_to16be(){
-        $this->assertEquals(utf8_to_utf16be($this->utf8), $this->utf16);
+        $this->assertEquals(\dokuwiki\Utf8\Conversion::toUtf16Be($this->utf8), $this->utf16);
    }

    /**
     * Convert from UTF-16BE to UTF-8
     */
    function test_from16be(){
-        $this->assertEquals(utf16be_to_utf8($this->utf16),$this->utf8);
+        $this->assertEquals(\dokuwiki\Utf8\Conversion::fromUtf16Be($this->utf16),$this->utf8);
    }
 }

--- a/bin/dwpage.php
+++ b/bin/dwpage.php
@ -182,7 +182,7 @@ class PageCLI extends CLI {
        }

        if(empty($localfile)) {
-            $localfile = getcwd() . '/' . utf8_basename($wiki_fn);
+            $localfile = getcwd() . '/' . \dokuwiki\Utf8\PhpString::basename($wiki_fn);
        }

        if(!file_exists(dirname($localfile))) {
--- a/inc/Action/Sitemap.php
+++ b/inc/Action/Sitemap.php
@ -47,7 +47,7 @@ class Sitemap extends AbstractAction {
        if(is_readable($sitemap)) {
            // Send headers
            header('Content-Type: ' . $mime);
-            header('Content-Disposition: attachment; filename=' . utf8_basename($sitemap));
+            header('Content-Disposition: attachment; filename=' . \dokuwiki\Utf8\PhpString::basename($sitemap));

            http_conditionalRequest(filemtime($sitemap));

--- a/inc/JpegMeta.php
+++ b/inc/JpegMeta.php
@ -1302,7 +1302,7 @@ class JpegMeta {
    function _parseFileInfo() {
        if (file_exists($this->_fileName) && is_file($this->_fileName)) {
            $this->_info['file'] = array();
-            $this->_info['file']['Name'] = utf8_decodeFN(utf8_basename($this->_fileName));
+            $this->_info['file']['Name'] = utf8_decodeFN(\dokuwiki\Utf8\PhpString::basename($this->_fileName));
            $this->_info['file']['Path'] = fullpath($this->_fileName);
            $this->_info['file']['Size'] = filesize($this->_fileName);
            if ($this->_info['file']['Size'] < 1024) {
@ -1393,7 +1393,7 @@ class JpegMeta {
            }
        } else {
            $this->_info['file'] = array();
-            $this->_info['file']['Name'] = utf8_basename($this->_fileName);
+            $this->_info['file']['Name'] = \dokuwiki\Utf8\PhpString::basename($this->_fileName);
            $this->_info['file']['Url'] = $this->_fileName;
        }

--- a/inc/Mailer.class.php
+++ b/inc/Mailer.class.php
@ -80,7 +80,7 @@ class Mailer {
     */
    public function attachFile($path, $mime, $name = '', $embed = '') {
        if(!$name) {
-            $name = utf8_basename($path);
+            $name = \dokuwiki\Utf8\PhpString::basename($path);
        }

        $this->attach[] = array(
@ -387,7 +387,7 @@ class Mailer {
            }

            // FIXME: is there a way to encode the localpart of a emailaddress?
-            if(!utf8_isASCII($addr)) {
+            if(!\dokuwiki\Utf8\Clean::isASCII($addr)) {
                msg(hsc("E-Mail address <$addr> is not ASCII"), -1);
                continue;
            }
@ -403,11 +403,11 @@ class Mailer {
                $addr = "<$addr>";

                if(defined('MAILHEADER_ASCIIONLY')) {
-                    $text = utf8_deaccent($text);
-                    $text = utf8_strip($text);
+                    $text = \dokuwiki\Utf8\Clean::deaccent($text);
+                    $text = \dokuwiki\Utf8\Clean::strip($text);
                }

-                if(strpos($text, ',') !== false || !utf8_isASCII($text)) {
+                if(strpos($text, ',') !== false || !\dokuwiki\Utf8\Clean::isASCII($text)) {
                    $text = '=?UTF-8?B?'.base64_encode($text).'?=';
                }
            } else {
@ -553,10 +553,10 @@ class Mailer {
        if(isset($this->headers['Subject'])) {
            // add prefix to subject
            if(empty($conf['mailprefix'])) {
-                if(utf8_strlen($conf['title']) < 20) {
+                if(\dokuwiki\Utf8\PhpString::strlen($conf['title']) < 20) {
                    $prefix = '['.$conf['title'].']';
                } else {
-                    $prefix = '['.utf8_substr($conf['title'], 0, 20).'...]';
+                    $prefix = '['.\dokuwiki\Utf8\PhpString::substr($conf['title'], 0, 20).'...]';
                }
            } else {
                $prefix = '['.$conf['mailprefix'].']';
@ -568,10 +568,10 @@ class Mailer {

            // encode subject
            if(defined('MAILHEADER_ASCIIONLY')) {
-                $this->headers['Subject'] = utf8_deaccent($this->headers['Subject']);
-                $this->headers['Subject'] = utf8_strip($this->headers['Subject']);
+                $this->headers['Subject'] = \dokuwiki\Utf8\Clean::deaccent($this->headers['Subject']);
+                $this->headers['Subject'] = \dokuwiki\Utf8\Clean::strip($this->headers['Subject']);
            }
-            if(!utf8_isASCII($this->headers['Subject'])) {
+            if(!\dokuwiki\Utf8\Clean::isASCII($this->headers['Subject'])) {
                $this->headers['Subject'] = '=?UTF-8?B?'.base64_encode($this->headers['Subject']).'?=';
            }
        }
--- a/inc/SafeFN.class.php
+++ b/inc/SafeFN.class.php
@ -45,7 +45,7 @@ class SafeFN {
     * @author   Christopher Smith <chris@jalakai.co.uk>
     */
    public static function encode($filename) {
-        return self::unicodeToSafe(utf8_to_unicode($filename));
+        return self::unicodeToSafe(\dokuwiki\Utf8\Unicode::fromUtf8($filename));
    }

    /**
@ -74,7 +74,7 @@ class SafeFN {
     * @author   Christopher Smith <chris@jalakai.co.uk>
     */
    public static function decode($filename) {
-        return unicode_to_utf8(self::safeToUnicode(strtolower($filename)));
+        return \dokuwiki\Utf8\Unicode::toUtf8(self::safeToUnicode(strtolower($filename)));
    }

    public static function validatePrintableUtf8($printable_utf8) {
--- a/inc/Ui/Search.php
+++ b/inc/Ui/Search.php
@ -500,7 +500,7 @@ class Search extends Ui
    public function createPagenameFromQuery($parsedQuery)
    {
        $cleanedQuery = cleanID($parsedQuery['query']); // already strtolowered
-        if ($cleanedQuery === utf8_strtolower($parsedQuery['query'])) {
+        if ($cleanedQuery === \dokuwiki\Utf8\PhpString::strtolower($parsedQuery['query'])) {
            return ':' . $cleanedQuery;
        }
        $pagename = '';
--- a/inc/Utf8/Clean.php
+++ b/inc/Utf8/Clean.php
@ -0,0 +1,204 @@
+<?php
+
+namespace dokuwiki\Utf8;
+
+/**
+ * Methods to assess and clean UTF-8 strings
+ */
+class Clean
+{
+    /**
+     * Checks if a string contains 7bit ASCII only
+     *
+     * @author Andreas Haerter <andreas.haerter@dev.mail-node.com>
+     *
+     * @param string $str
+     * @return bool
+     */
+    public static function isASCII($str)
+    {
+        return (preg_match('/(?:[^\x00-\x7F])/', $str) !== 1);
+    }
+
+    /**
+     * Tries to detect if a string is in Unicode encoding
+     *
+     * @author <bmorel@ssi.fr>
+     * @link   http://php.net/manual/en/function.utf8-encode.php
+     *
+     * @param string $str
+     * @return bool
+     */
+    public static function isUtf8($str)
+    {
+        $len = strlen($str);
+        for ($i = 0; $i < $len; $i++) {
+            $b = ord($str[$i]);
+            if ($b < 0x80) continue; # 0bbbbbbb
+            elseif (($b & 0xE0) === 0xC0) $n = 1; # 110bbbbb
+            elseif (($b & 0xF0) === 0xE0) $n = 2; # 1110bbbb
+            elseif (($b & 0xF8) === 0xF0) $n = 3; # 11110bbb
+            elseif (($b & 0xFC) === 0xF8) $n = 4; # 111110bb
+            elseif (($b & 0xFE) === 0xFC) $n = 5; # 1111110b
+            else return false; # Does not match any model
+
+            for ($j = 0; $j < $n; $j++) { # n bytes matching 10bbbbbb follow ?
+                if ((++$i === $len) || ((ord($str[$i]) & 0xC0) !== 0x80))
+                    return false;
+            }
+        }
+        return true;
+    }
+
+    /**
+     * Strips all high byte chars
+     *
+     * Returns a pure ASCII7 string
+     *
+     * @author Andreas Gohr <andi@splitbrain.org>
+     *
+     * @param string $str
+     * @return string
+     */
+    public static function strip($str)
+    {
+        $ascii = '';
+        $len = strlen($str);
+        for ($i = 0; $i < $len; $i++) {
+            if (ord($str{$i}) < 128) {
+                $ascii .= $str{$i};
+            }
+        }
+        return $ascii;
+    }
+
+    /**
+     * Removes special characters (nonalphanumeric) from a UTF-8 string
+     *
+     * This function adds the controlchars 0x00 to 0x19 to the array of
+     * stripped chars (they are not included in $UTF8_SPECIAL_CHARS)
+     *
+     * @author Andreas Gohr <andi@splitbrain.org>
+     *
+     * @param  string $string The UTF8 string to strip of special chars
+     * @param  string $repl Replace special with this string
+     * @param  string $additional Additional chars to strip (used in regexp char class)
+     * @return string
+     */
+    public static function stripspecials($string, $repl = '', $additional = '')
+    {
+        static $specials = null;
+        if ($specials === null) {
+            $specials = preg_quote(Table::specialChars(), '/');
+        }
+
+        return preg_replace('/[' . $additional . '\x00-\x19' . $specials . ']/u', $repl, $string);
+    }
+
+    /**
+     * Replace bad bytes with an alternative character
+     *
+     * ASCII character is recommended for replacement char
+     *
+     * PCRE Pattern to locate bad bytes in a UTF-8 string
+     * Comes from W3 FAQ: Multilingual Forms
+     * Note: modified to include full ASCII range including control chars
+     *
+     * @author Harry Fuecks <hfuecks@gmail.com>
+     * @see http://www.w3.org/International/questions/qa-forms-utf-8
+     *
+     * @param string $str to search
+     * @param string $replace to replace bad bytes with (defaults to '?') - use ASCII
+     * @return string
+     */
+    public static function replaceBadBytes($str, $replace = '')
+    {
+        $UTF8_BAD =
+            '([\x00-\x7F]' .                          # ASCII (including control chars)
+            '|[\xC2-\xDF][\x80-\xBF]' .               # non-overlong 2-byte
+            '|\xE0[\xA0-\xBF][\x80-\xBF]' .           # excluding overlongs
+            '|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}' .    # straight 3-byte
+            '|\xED[\x80-\x9F][\x80-\xBF]' .           # excluding surrogates
+            '|\xF0[\x90-\xBF][\x80-\xBF]{2}' .        # planes 1-3
+            '|[\xF1-\xF3][\x80-\xBF]{3}' .            # planes 4-15
+            '|\xF4[\x80-\x8F][\x80-\xBF]{2}' .        # plane 16
+            '|(.{1}))';                               # invalid byte
+        ob_start();
+        while (preg_match('/' . $UTF8_BAD . '/S', $str, $matches)) {
+            if (!isset($matches[2])) {
+                echo $matches[0];
+            } else {
+                echo $replace;
+            }
+            $str = substr($str, strlen($matches[0]));
+        }
+        return ob_get_clean();
+    }
+
+
+    /**
+     * Replace accented UTF-8 characters by unaccented ASCII-7 equivalents
+     *
+     * Use the optional parameter to just deaccent lower ($case = -1) or upper ($case = 1)
+     * letters. Default is to deaccent both cases ($case = 0)
+     *
+     * @author Andreas Gohr <andi@splitbrain.org>
+     *
+     * @param string $string
+     * @param int $case
+     * @return string
+     */
+    public static function deaccent($string, $case = 0)
+    {
+        if ($case <= 0) {
+            $string = strtr($string, Table::lowerAccents());
+        }
+        if ($case >= 0) {
+            $string = strtr($string, Table::upperAccents());
+        }
+        return $string;
+    }
+
+    /**
+     * Romanize a non-latin string
+     *
+     * @author Andreas Gohr <andi@splitbrain.org>
+     *
+     * @param string $string
+     * @return string
+     */
+    public static function romanize($string)
+    {
+        if (self::isASCII($string)) return $string; //nothing to do
+
+        return strtr($string, Table::romanization());
+    }
+
+    /**
+     * adjust a byte index into a utf8 string to a utf8 character boundary
+     *
+     * @author       chris smith <chris@jalakai.co.uk>
+     *
+     * @param string $str utf8 character string
+     * @param int $i byte index into $str
+     * @param bool $next direction to search for boundary, false = up (current character) true = down (next character)
+     * @return int byte index into $str now pointing to a utf8 character boundary
+     */
+    public static function correctIdx($str, $i, $next = false)
+    {
+
+        if ($i <= 0) return 0;
+
+        $limit = strlen($str);
+        if ($i >= $limit) return $limit;
+
+        if ($next) {
+            while (($i < $limit) && ((ord($str[$i]) & 0xC0) === 0x80)) $i++;
+        } else {
+            while ($i && ((ord($str[$i]) & 0xC0) === 0x80)) $i--;
+        }
+
+        return $i;
+    }
+
+}
--- a/inc/Utf8/Conversion.php
+++ b/inc/Utf8/Conversion.php
@ -0,0 +1,161 @@
+<?php
+
+namespace dokuwiki\Utf8;
+
+/**
+ * Methods to convert from and to UTF-8 strings
+ */
+class Conversion
+{
+
+    /**
+     * Encodes UTF-8 characters to HTML entities
+     *
+     * @author Tom N Harris <tnharris@whoopdedo.org>
+     * @author <vpribish at shopping dot com>
+     * @link   http://php.net/manual/en/function.utf8-decode.php
+     *
+     * @param string $str
+     * @return string
+     */
+    public static function toHtml($str)
+    {
+        $ret = '';
+        foreach (Unicode::fromUtf8($str) as $cp) {
+            if ($cp < 0x80) {
+                $ret .= chr($cp);
+            } elseif ($cp < 0x100) {
+                $ret .= "&#$cp;";
+            } else {
+                $ret .= '&#x' . dechex($cp) . ';';
+            }
+        }
+        return $ret;
+    }
+
+    /**
+     * Decodes HTML entities to UTF-8 characters
+     *
+     * Convert any &#..; entity to a codepoint,
+     * The entities flag defaults to only decoding numeric entities.
+     * Pass HTML_ENTITIES and named entities, including &amp; &lt; etc.
+     * are handled as well. Avoids the problem that would occur if you
+     * had to decode "&amp;#38;&#38;amp;#38;"
+     *
+     * unhtmlspecialchars(\dokuwiki\Utf8\Conversion::fromHtml($s)) -> "&#38;&#38;"
+     * \dokuwiki\Utf8\Conversion::fromHtml(unhtmlspecialchars($s)) -> "&&amp#38;"
+     * what it should be                   -> "&#38;&amp#38;"
+     *
+     * @author Tom N Harris <tnharris@whoopdedo.org>
+     *
+     * @param  string $str UTF-8 encoded string
+     * @param  boolean $entities decode name entities in addtition to numeric ones
+     * @return string  UTF-8 encoded string with numeric (and named) entities replaced.
+     */
+    public static function fromHtml($str, $entities = false)
+    {
+        if (!$entities) {
+            return preg_replace_callback(
+                '/(&#([Xx])?([0-9A-Za-z]+);)/m',
+                [__CLASS__, 'decodeNumericEntity'],
+                $str
+            );
+        }
+
+        return preg_replace_callback(
+            '/&(#)?([Xx])?([0-9A-Za-z]+);/m',
+            [__CLASS__, 'decodeAnyEntity'],
+            $str
+        );
+    }
+
+    /**
+     * Decodes any HTML entity to it's correct UTF-8 char equivalent
+     *
+     * @param string $ent An entity
+     * @return string
+     */
+    protected static function decodeAnyEntity($ent)
+    {
+        // create the named entity lookup table
+        static $table = null;
+        if ($table === null) {
+            $table = get_html_translation_table(HTML_ENTITIES);
+            $table = array_flip($table);
+            $table = array_map(
+                static function ($c) {
+                    return Unicode::toUtf8(array(ord($c)));
+                },
+                $table
+            );
+        }
+
+        if ($ent[1] === '#') {
+            return self::decodeNumericEntity($ent);
+        }
+
+        if (array_key_exists($ent[0], $table)) {
+            return $table[$ent[0]];
+        }
+
+        return $ent[0];
+    }
+
+    /**
+     * Decodes numeric HTML entities to their correct UTF-8 characters
+     *
+     * @param $ent string A numeric entity
+     * @return string|false
+     */
+    protected static function decodeNumericEntity($ent)
+    {
+        switch ($ent[2]) {
+            case 'X':
+            case 'x':
+                $cp = hexdec($ent[3]);
+                break;
+            default:
+                $cp = intval($ent[3]);
+                break;
+        }
+        return Unicode::toUtf8(array($cp));
+    }
+
+    /**
+     * UTF-8 to UTF-16BE conversion.
+     *
+     * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits
+     *
+     * @param string $str
+     * @param bool $bom
+     * @return string
+     */
+    public static function toUtf16be($str, $bom = false)
+    {
+        $out = $bom ? "\xFE\xFF" : '';
+        if (UTF8_MBSTRING) {
+            return $out . mb_convert_encoding($str, 'UTF-16BE', 'UTF-8');
+        }
+
+        $uni = Unicode::fromUtf8($str);
+        foreach ($uni as $cp) {
+            $out .= pack('n', $cp);
+        }
+        return $out;
+    }
+
+    /**
+     * UTF-8 to UTF-16BE conversion.
+     *
+     * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits
+     *
+     * @param string $str
+     * @return false|string
+     */
+    public static function fromUtf16be($str)
+    {
+        $uni = unpack('n*', $str);
+        return Unicode::toUtf8($uni);
+    }
+
+}
--- a/inc/Utf8/PhpString.php
+++ b/inc/Utf8/PhpString.php
@ -0,0 +1,381 @@
+<?php
+
+namespace dokuwiki\Utf8;
+
+/**
+ * UTF-8 aware equivalents to PHP's string functions
+ */
+class PhpString
+{
+
+    /**
+     * A locale independent basename() implementation
+     *
+     * works around a bug in PHP's basename() implementation
+     *
+     * @param string $path A path
+     * @param string $suffix If the name component ends in suffix this will also be cut off
+     * @return string
+     * @link   https://bugs.php.net/bug.php?id=37738
+     *
+     * @see basename()
+     */
+    public static function basename($path, $suffix = '')
+    {
+        $path = trim($path, '\\/');
+        $rpos = max(strrpos($path, '/'), strrpos($path, '\\'));
+        if ($rpos) {
+            $path = substr($path, $rpos + 1);
+        }
+
+        $suflen = strlen($suffix);
+        if ($suflen && (substr($path, -$suflen) === $suffix)) {
+            $path = substr($path, 0, -$suflen);
+        }
+
+        return $path;
+    }
+
+    /**
+     * Unicode aware replacement for strlen()
+     *
+     * utf8_decode() converts characters that are not in ISO-8859-1
+     * to '?', which, for the purpose of counting, is alright - It's
+     * even faster than mb_strlen.
+     *
+     * @param string $string
+     * @return int
+     * @see    utf8_decode()
+     *
+     * @author <chernyshevsky at hotmail dot com>
+     * @see    strlen()
+     */
+    public static function strlen($string)
+    {
+        if (function_exists('utf8_decode')) {
+            return strlen(utf8_decode($string));
+        }
+
+        if (UTF8_MBSTRING) {
+            return mb_strlen($string, 'UTF-8');
+        }
+
+        if (function_exists('iconv_strlen')) {
+            return iconv_strlen($string, 'UTF-8');
+        }
+
+        return strlen($string);
+    }
+
+    /**
+     * UTF-8 aware alternative to substr
+     *
+     * Return part of a string given character offset (and optionally length)
+     *
+     * @param string $str
+     * @param int $offset number of UTF-8 characters offset (from left)
+     * @param int $length (optional) length in UTF-8 characters from offset
+     * @return string
+     * @author Harry Fuecks <hfuecks@gmail.com>
+     * @author Chris Smith <chris@jalakai.co.uk>
+     *
+     */
+    public static function substr($str, $offset, $length = null)
+    {
+        if (UTF8_MBSTRING) {
+            if ($length === null) {
+                return mb_substr($str, $offset);
+            }
+
+            return mb_substr($str, $offset, $length);
+        }
+
+        /*
+         * Notes:
+         *
+         * no mb string support, so we'll use pcre regex's with 'u' flag
+         * pcre only supports repetitions of less than 65536, in order to accept up to MAXINT values for
+         * offset and length, we'll repeat a group of 65535 characters when needed (ok, up to MAXINT-65536)
+         *
+         * substr documentation states false can be returned in some cases (e.g. offset > string length)
+         * mb_substr never returns false, it will return an empty string instead.
+         *
+         * calculating the number of characters in the string is a relatively expensive operation, so
+         * we only carry it out when necessary. It isn't necessary for +ve offsets and no specified length
+         */
+
+        // cast parameters to appropriate types to avoid multiple notices/warnings
+        $str = (string)$str;                          // generates E_NOTICE for PHP4 objects, but not PHP5 objects
+        $offset = (int)$offset;
+        if ($length !== null) $length = (int)$length;
+
+        // handle trivial cases
+        if ($length === 0) return '';
+        if ($offset < 0 && $length < 0 && $length < $offset) return '';
+
+        $offset_pattern = '';
+        $length_pattern = '';
+
+        // normalise -ve offsets (we could use a tail anchored pattern, but they are horribly slow!)
+        if ($offset < 0) {
+            $strlen = self::strlen($str);        // see notes
+            $offset = $strlen + $offset;
+            if ($offset < 0) $offset = 0;
+        }
+
+        // establish a pattern for offset, a non-captured group equal in length to offset
+        if ($offset > 0) {
+            $Ox = (int)($offset / 65535);
+            $Oy = $offset % 65535;
+
+            if ($Ox) $offset_pattern = '(?:.{65535}){' . $Ox . '}';
+            $offset_pattern = '^(?:' . $offset_pattern . '.{' . $Oy . '})';
+        } else {
+            $offset_pattern = '^';                      // offset == 0; just anchor the pattern
+        }
+
+        // establish a pattern for length
+        if ($length === null) {
+            $length_pattern = '(.*)$';                  // the rest of the string
+        } else {
+
+            if (!isset($strlen)) $strlen = self::strlen($str);    // see notes
+            if ($offset > $strlen) return '';           // another trivial case
+
+            if ($length > 0) {
+
+                // reduce any length that would go past the end of the string
+                $length = min($strlen - $offset, $length);
+
+                $Lx = (int)($length / 65535);
+                $Ly = $length % 65535;
+
+                // +ve length requires ... a captured group of length characters
+                if ($Lx) $length_pattern = '(?:.{65535}){' . $Lx . '}';
+                $length_pattern = '(' . $length_pattern . '.{' . $Ly . '})';
+
+            } else if ($length < 0) {
+
+                if ($length < ($offset - $strlen)) return '';
+
+                $Lx = (int)((-$length) / 65535);
+                $Ly = (-$length) % 65535;
+
+                // -ve length requires ... capture everything except a group of -length characters
+                //                         anchored at the tail-end of the string
+                if ($Lx) $length_pattern = '(?:.{65535}){' . $Lx . '}';
+                $length_pattern = '(.*)(?:' . $length_pattern . '.{' . $Ly . '})$';
+            }
+        }
+
+        if (!preg_match('#' . $offset_pattern . $length_pattern . '#us', $str, $match)) return '';
+        return $match[1];
+    }
+
+    /**
+     * Unicode aware replacement for substr_replace()
+     *
+     * @param string $string input string
+     * @param string $replacement the replacement
+     * @param int $start the replacing will begin at the start'th offset into string.
+     * @param int $length If given and is positive, it represents the length of the portion of string which is
+     *                            to be replaced. If length is zero then this function will have the effect of inserting
+     *                            replacement into string at the given start offset.
+     * @return string
+     * @see    substr_replace()
+     *
+     * @author Andreas Gohr <andi@splitbrain.org>
+     */
+    public static function substr_replace($string, $replacement, $start, $length = 0)
+    {
+        $ret = '';
+        if ($start > 0) $ret .= self::substr($string, 0, $start);
+        $ret .= $replacement;
+        $ret .= self::substr($string, $start + $length);
+        return $ret;
+    }
+
+    /**
+     * Unicode aware replacement for ltrim()
+     *
+     * @param string $str
+     * @param string $charlist
+     * @return string
+     * @see    ltrim()
+     *
+     * @author Andreas Gohr <andi@splitbrain.org>
+     */
+    public static function ltrim($str, $charlist = '')
+    {
+        if ($charlist === '') return ltrim($str);
+
+        //quote charlist for use in a characterclass
+        $charlist = preg_replace('!([\\\\\\-\\]\\[/])!', '\\\${1}', $charlist);
+
+        return preg_replace('/^[' . $charlist . ']+/u', '', $str);
+    }
+
+    /**
+     * Unicode aware replacement for rtrim()
+     *
+     * @param string $str
+     * @param string $charlist
+     * @return string
+     * @see    rtrim()
+     *
+     * @author Andreas Gohr <andi@splitbrain.org>
+     */
+    public static function rtrim($str, $charlist = '')
+    {
+        if ($charlist === '') return rtrim($str);
+
+        //quote charlist for use in a characterclass
+        $charlist = preg_replace('!([\\\\\\-\\]\\[/])!', '\\\${1}', $charlist);
+
+        return preg_replace('/[' . $charlist . ']+$/u', '', $str);
+    }
+
+    /**
+     * Unicode aware replacement for trim()
+     *
+     * @param string $str
+     * @param string $charlist
+     * @return string
+     * @see    trim()
+     *
+     * @author Andreas Gohr <andi@splitbrain.org>
+     */
+    public static function trim($str, $charlist = '')
+    {
+        if ($charlist === '') return trim($str);
+
+        return self::ltrim(self::rtrim($str, $charlist), $charlist);
+    }
+
+    /**
+     * This is a unicode aware replacement for strtolower()
+     *
+     * Uses mb_string extension if available
+     *
+     * @param string $string
+     * @return string
+     * @see    \dokuwiki\Utf8\PhpString::strtoupper()
+     *
+     * @author Leo Feyer <leo@typolight.org>
+     * @see    strtolower()
+     */
+    public static function strtolower($string)
+    {
+        if (UTF8_MBSTRING) {
+            if (class_exists('Normalizer', $autoload = false)) {
+                return \Normalizer::normalize(mb_strtolower($string, 'utf-8'));
+            }
+            return (mb_strtolower($string, 'utf-8'));
+        }
+        return strtr($string, Table::upperCaseToLowerCase());
+    }
+
+    /**
+     * This is a unicode aware replacement for strtoupper()
+     *
+     * Uses mb_string extension if available
+     *
+     * @param string $string
+     * @return string
+     * @see    \dokuwiki\Utf8\PhpString::strtoupper()
+     *
+     * @author Leo Feyer <leo@typolight.org>
+     * @see    strtoupper()
+     */
+    public static function strtoupper($string)
+    {
+        if (UTF8_MBSTRING) return mb_strtoupper($string, 'utf-8');
+
+        return strtr($string, Table::lowerCaseToUpperCase());
+    }
+
+
+    /**
+     * UTF-8 aware alternative to ucfirst
+     * Make a string's first character uppercase
+     *
+     * @param string $str
+     * @return string with first character as upper case (if applicable)
+     * @author Harry Fuecks
+     *
+     */
+    public static function ucfirst($str)
+    {
+        switch (self::strlen($str)) {
+            case 0:
+                return '';
+            case 1:
+                return self::strtoupper($str);
+            default:
+                preg_match('/^(.{1})(.*)$/us', $str, $matches);
+                return self::strtoupper($matches[1]) . $matches[2];
+        }
+    }
+
+    /**
+     * UTF-8 aware alternative to ucwords
+     * Uppercase the first character of each word in a string
+     *
+     * @param string $str
+     * @return string with first char of each word uppercase
+     * @author Harry Fuecks
+     * @see http://php.net/ucwords
+     *
+     */
+    public static function ucwords($str)
+    {
+        // Note: [\x0c\x09\x0b\x0a\x0d\x20] matches;
+        // form feeds, horizontal tabs, vertical tabs, linefeeds and carriage returns
+        // This corresponds to the definition of a "word" defined at http://php.net/ucwords
+        $pattern = '/(^|([\x0c\x09\x0b\x0a\x0d\x20]+))([^\x0c\x09\x0b\x0a\x0d\x20]{1})[^\x0c\x09\x0b\x0a\x0d\x20]*/u';
+
+        return preg_replace_callback(
+            $pattern,
+            function ($matches) {
+                $leadingws = $matches[2];
+                $ucfirst = self::strtoupper($matches[3]);
+                $ucword = self::substr_replace(ltrim($matches[0]), $ucfirst, 0, 1);
+                return $leadingws . $ucword;
+            },
+            $str
+        );
+    }
+
+    /**
+     * This is an Unicode aware replacement for strpos
+     *
+     * @param string $haystack
+     * @param string $needle
+     * @param integer $offset
+     * @return integer
+     * @author Leo Feyer <leo@typolight.org>
+     * @see    strpos()
+     *
+     */
+    public static function strpos($haystack, $needle, $offset = 0)
+    {
+        $comp = 0;
+        $length = null;
+
+        while ($length === null || $length < $offset) {
+            $pos = strpos($haystack, $needle, $offset + $comp);
+
+            if ($pos === false)
+                return false;
+
+            $length = self::strlen(substr($haystack, 0, $pos));
+
+            if ($length < $offset)
+                $comp = $pos - $length;
+        }
+
+        return $length;
+    }
+
+
+}
--- a/inc/Utf8/Table.php
+++ b/inc/Utf8/Table.php
@ -0,0 +1,93 @@
+<?php
+
+namespace dokuwiki\Utf8;
+
+/**
+ * Provides static access to the UTF-8 conversion tables
+ *
+ * Lazy-Loads tables on first access
+ */
+class Table
+{
+
+    /**
+     * Get the upper to lower case conversion table
+     *
+     * @return array
+     */
+    public static function upperCaseToLowerCase()
+    {
+        static $table = null;
+        if ($table === null) $table = include __DIR__ . '/tables/case.php';
+        return $table;
+    }
+
+    /**
+     * Get the lower to upper case conversion table
+     *
+     * @return array
+     */
+    public static function lowerCaseToUpperCase()
+    {
+        static $table = null;
+        if ($table === null) {
+            $uclc = self::upperCaseToLowerCase();
+            $table = array_flip($uclc);
+        }
+        return $table;
+    }
+
+    /**
+     * Get the lower case accent table
+     * @return array
+     */
+    public static function lowerAccents()
+    {
+        static $table = null;
+        if ($table === null) {
+            $table = include __DIR__ . '/tables/loweraccents.php';
+        }
+        return $table;
+    }
+
+    /**
+     * Get the lower case accent table
+     * @return array
+     */
+    public static function upperAccents()
+    {
+        static $table = null;
+        if ($table === null) {
+            $table = include __DIR__ . '/tables/upperaccents.php';
+        }
+        return $table;
+    }
+
+    /**
+     * Get the romanization table
+     * @return array
+     */
+    public static function romanization()
+    {
+        static $table = null;
+        if ($table === null) {
+            $table = include __DIR__ . '/tables/romanization.php';
+        }
+        return $table;
+    }
+
+    /**
+     * Get the special chars as a concatenated string
+     * @return string
+     */
+    public static function specialChars()
+    {
+        static $string = null;
+        if ($string === null) {
+            $table = include __DIR__ . '/tables/specials.php';
+            // FIXME should we cache this to file system?
+            $string = Unicode::toUtf8($table);
+        }
+        return $string;
+    }
+}
--- a/inc/Utf8/Unicode.php
+++ b/inc/Utf8/Unicode.php
@ -0,0 +1,277 @@
+<?php
+
+namespace dokuwiki\Utf8;
+
+/**
+ * Convert between UTF-8 and a list of Unicode Code Points
+ */
+class Unicode
+{
+
+    /**
+     * Takes an UTF-8 string and returns an array of ints representing the
+     * Unicode characters. Astral planes are supported ie. the ints in the
+     * output can be > 0xFFFF. Occurrances of the BOM are ignored. Surrogates
+     * are not allowed.
+     *
+     * If $strict is set to true the function returns false if the input
+     * string isn't a valid UTF-8 octet sequence and raises a PHP error at
+     * level E_USER_WARNING
+     *
+     * Note: this function has been modified slightly in this library to
+     * trigger errors on encountering bad bytes
+     *
+     * @author <hsivonen@iki.fi>
+     * @author Harry Fuecks <hfuecks@gmail.com>
+     * @see    unicode_to_utf8
+     * @link   http://hsivonen.iki.fi/php-utf8/
+     * @link   http://sourceforge.net/projects/phputf8/
+     * @todo break into less complex chunks
+     * @todo use exceptions instead of user errors
+     *
+     * @param  string $str UTF-8 encoded string
+     * @param  boolean $strict Check for invalid sequences?
+     * @return mixed array of unicode code points or false if UTF-8 invalid
+     */
+    public static function fromUtf8($str, $strict = false)
+    {
+        $mState = 0;     // cached expected number of octets after the current octet
+        // until the beginning of the next UTF8 character sequence
+        $mUcs4 = 0;     // cached Unicode character
+        $mBytes = 1;     // cached expected number of octets in the current sequence
+
+        $out = array();
+
+        $len = strlen($str);
+
+        for ($i = 0; $i < $len; $i++) {
+
+            $in = ord($str{$i});
+
+            if ($mState === 0) {
+
+                // When mState is zero we expect either a US-ASCII character or a
+                // multi-octet sequence.
+                if (0 === (0x80 & $in)) {
+                    // US-ASCII, pass straight through.
+                    $out[] = $in;
+                    $mBytes = 1;
+
+                } else if (0xC0 === (0xE0 & $in)) {
+                    // First octet of 2 octet sequence
+                    $mUcs4 = $in;
+                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
+                    $mState = 1;
+                    $mBytes = 2;
+
+                } else if (0xE0 === (0xF0 & $in)) {
+                    // First octet of 3 octet sequence
+                    $mUcs4 = $in;
+                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
+                    $mState = 2;
+                    $mBytes = 3;
+
+                } else if (0xF0 === (0xF8 & $in)) {
+                    // First octet of 4 octet sequence
+                    $mUcs4 = $in;
+                    $mUcs4 = ($mUcs4 & 0x07) << 18;
+                    $mState = 3;
+                    $mBytes = 4;
+
+                } else if (0xF8 === (0xFC & $in)) {
+                    /* First octet of 5 octet sequence.
+                     *
+                     * This is illegal because the encoded codepoint must be either
+                     * (a) not the shortest form or
+                     * (b) outside the Unicode range of 0-0x10FFFF.
+                     * Rather than trying to resynchronize, we will carry on until the end
+                     * of the sequence and let the later error handling code catch it.
+                     */
+                    $mUcs4 = $in;
+                    $mUcs4 = ($mUcs4 & 0x03) << 24;
+                    $mState = 4;
+                    $mBytes = 5;
+
+                } else if (0xFC === (0xFE & $in)) {
+                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
+                    $mUcs4 = $in;
+                    $mUcs4 = ($mUcs4 & 1) << 30;
+                    $mState = 5;
+                    $mBytes = 6;
+
+                } elseif ($strict) {
+                    /* Current octet is neither in the US-ASCII range nor a legal first
+                     * octet of a multi-octet sequence.
+                     */
+                    trigger_error(
+                        'utf8_to_unicode: Illegal sequence identifier ' .
+                        'in UTF-8 at byte ' . $i,
+                        E_USER_WARNING
+                    );
+                    return false;
+
+                }
+
+            } else {
+
+                // When mState is non-zero, we expect a continuation of the multi-octet
+                // sequence
+                if (0x80 === (0xC0 & $in)) {
+
+                    // Legal continuation.
+                    $shift = ($mState - 1) * 6;
+                    $tmp = $in;
+                    $tmp = ($tmp & 0x0000003F) << $shift;
+                    $mUcs4 |= $tmp;
+
+                    /**
+                     * End of the multi-octet sequence. mUcs4 now contains the final
+                     * Unicode codepoint to be output
+                     */
+                    if (0 === --$mState) {
+
+                        /*
+                         * Check for illegal sequences and codepoints.
+                         */
+                        // From Unicode 3.1, non-shortest form is illegal
+                        if (((2 === $mBytes) && ($mUcs4 < 0x0080)) ||
+                            ((3 === $mBytes) && ($mUcs4 < 0x0800)) ||
+                            ((4 === $mBytes) && ($mUcs4 < 0x10000)) ||
+                            (4 < $mBytes) ||
+                            // From Unicode 3.2, surrogate characters are illegal
+                            (($mUcs4 & 0xFFFFF800) === 0xD800) ||
+                            // Codepoints outside the Unicode range are illegal
+                            ($mUcs4 > 0x10FFFF)) {
+
+                            if ($strict) {
+                                trigger_error(
+                                    'utf8_to_unicode: Illegal sequence or codepoint ' .
+                                    'in UTF-8 at byte ' . $i,
+                                    E_USER_WARNING
+                                );
+
+                                return false;
+                            }
+
+                        }
+
+                        if (0xFEFF !== $mUcs4) {
+                            // BOM is legal but we don't want to output it
+                            $out[] = $mUcs4;
+                        }
+
+                        //initialize UTF8 cache
+                        $mState = 0;
+                        $mUcs4 = 0;
+                        $mBytes = 1;
+                    }
+
+                } elseif ($strict) {
+                    /**
+                     *((0xC0 & (*in) != 0x80) && (mState != 0))
+                     * Incomplete multi-octet sequence.
+                     */
+                    trigger_error(
+                        'utf8_to_unicode: Incomplete multi-octet ' .
+                        '   sequence in UTF-8 at byte ' . $i,
+                        E_USER_WARNING
+                    );
+
+                    return false;
+                }
+            }
+        }
+        return $out;
+    }
+
+    /**
+     * Takes an array of ints representing the Unicode characters and returns
+     * a UTF-8 string. Astral planes are supported ie. the ints in the
+     * input can be > 0xFFFF. Occurrances of the BOM are ignored. Surrogates
+     * are not allowed.
+     *
+     * If $strict is set to true the function returns false if the input
+     * array contains ints that represent surrogates or are outside the
+     * Unicode range and raises a PHP error at level E_USER_WARNING
+     *
+     * Note: this function has been modified slightly in this library to use
+     * output buffering to concatenate the UTF-8 string (faster) as well as
+     * reference the array by it's keys
+     *
+     * @param  array $arr of unicode code points representing a string
+     * @param  boolean $strict Check for invalid sequences?
+     * @return string|false UTF-8 string or false if array contains invalid code points
+     *
+     * @author <hsivonen@iki.fi>
+     * @author Harry Fuecks <hfuecks@gmail.com>
+     * @see    utf8_to_unicode
+     * @link   http://hsivonen.iki.fi/php-utf8/
+     * @link   http://sourceforge.net/projects/phputf8/
+     * @todo use exceptions instead of user errors
+     */
+    public static function toUtf8($arr, $strict = false)
+    {
+        if (!is_array($arr)) return '';
+        ob_start();
+
+        foreach (array_keys($arr) as $k) {
+
+            if (($arr[$k] >= 0) && ($arr[$k] <= 0x007f)) {
+                # ASCII range (including control chars)
+
+                echo chr($arr[$k]);
+
+            } else if ($arr[$k] <= 0x07ff) {
+                # 2 byte sequence
+
+                echo chr(0xc0 | ($arr[$k] >> 6));
+                echo chr(0x80 | ($arr[$k] & 0x003f));
+
+            } else if ($arr[$k] == 0xFEFF) {
+                # Byte order mark (skip)
+                // nop -- zap the BOM
+
+            } else if ($arr[$k] >= 0xD800 && $arr[$k] <= 0xDFFF) {
+                # Test for illegal surrogates
+
+                // found a surrogate
+                if ($strict) {
+                    trigger_error(
+                        'unicode_to_utf8: Illegal surrogate ' .
+                        'at index: ' . $k . ', value: ' . $arr[$k],
+                        E_USER_WARNING
+                    );
+                    return false;
+                }
+
+            } else if ($arr[$k] <= 0xffff) {
+                # 3 byte sequence
+
+                echo chr(0xe0 | ($arr[$k] >> 12));
+                echo chr(0x80 | (($arr[$k] >> 6) & 0x003f));
+                echo chr(0x80 | ($arr[$k] & 0x003f));
+
+            } else if ($arr[$k] <= 0x10ffff) {
+                # 4 byte sequence
+
+                echo chr(0xf0 | ($arr[$k] >> 18));
+                echo chr(0x80 | (($arr[$k] >> 12) & 0x3f));
+                echo chr(0x80 | (($arr[$k] >> 6) & 0x3f));
+                echo chr(0x80 | ($arr[$k] & 0x3f));
+
+            } elseif ($strict) {
+
+                trigger_error(
+                    'unicode_to_utf8: Codepoint out of Unicode range ' .
+                    'at index: ' . $k . ', value: ' . $arr[$k],
+                    E_USER_WARNING
+                );
+
+                // out of range
+                return false;
+            }
+        }
+
+        return ob_get_clean();
+    }
+}
--- a/inc/Utf8/tables/case.php
+++ b/inc/Utf8/tables/case.php
@ -0,0 +1,567 @@
+<?php
+/**
+ * UTF-8 Case lookup table
+ *
+ * This lookuptable defines the lower case letters to their corresponding
+ * upper case letter in UTF-8
+ *
+ * @author Andreas Gohr <andi@splitbrain.org>
+ */
+return [
+    'A' => 'a',
+    'Ａ' => 'ａ',
+    'Á' => 'á',
+    'À' => 'à',
+    'Ă' => 'ă',
+    'Ắ' => 'ắ',
+    'Ẵ' => 'ẵ',
+    'Ẳ' => 'ẳ',
+    'Â' => 'â',
+    'Ấ' => 'ấ',
+    'Ầ' => 'ầ',
+    'Ẫ' => 'ẫ',
+    'Ǎ' => 'ǎ',
+    'Å' => 'å',
+    'Ǻ' => 'ǻ',
+    'Ä' => 'ä',
+    'Ǟ' => 'ǟ',
+    'Ã' => 'ã',
+    'Ǡ' => 'ǡ',
+    'Ą' => 'ą',
+    'Ā' => 'ā',
+    'Ả' => 'ả',
+    'Ȁ' => 'ȁ',
+    'Ȃ' => 'ȃ',
+    'Ặ' => 'ặ',
+    'Ậ' => 'ậ',
+    'Ḁ' => 'ḁ',
+    'Æ' => 'æ',
+    'Ǽ' => 'ǽ',
+    'Ǣ' => 'ǣ',
+    'Ｂ' => 'ｂ',
+    'Ḃ' => 'ḃ',
+    'Ḅ' => 'ḅ',
+    'Ḇ' => 'ḇ',
+    'Ɓ' => 'ɓ',
+    'Ƃ' => 'ƃ',
+    'Ｃ' => 'ｃ',
+    'Ć' => 'ć',
+    'Ĉ' => 'ĉ',
+    'Č' => 'č',
+    'Ċ' => 'ċ',
+    'Ç' => 'ç',
+    'Ƈ' => 'ƈ',
+    'D' => 'd',
+    'Ｄ' => 'ｄ',
+    'Ď' => 'ď',
+    'Ḋ' => 'ḋ',
+    'Ḑ' => 'ḑ',
+    'Ḍ' => 'ḍ',
+    'Ḓ' => 'ḓ',
+    'Ḏ' => 'ḏ',
+    'Ð' => 'ð',
+    'ǲ' => 'ǳ', //FIXME
+    'ǅ' => 'ǆ', //FIXME
+    'Ɗ' => 'ɗ',
+    'Ƌ' => 'ƌ',
+    'E' => 'e',
+    'Ｅ' => 'ｅ',
+    'É' => 'é',
+    'È' => 'è',
+    'Ê' => 'ê',
+    'Ế' => 'ế',
+    'Ề' => 'ề',
+    'Ễ' => 'ễ',
+    'Ể' => 'ể',
+    'Ě' => 'ě',
+    'Ẽ' => 'ẽ',
+    'Ė' => 'ė',
+    'Ȩ' => 'ȩ',
+    'Ḝ' => 'ḝ',
+    'Ę' => 'ę',
+    'Ē' => 'ē',
+    'Ḕ' => 'ḕ',
+    'Ẻ' => 'ẻ',
+    'Ȅ' => 'ȅ',
+    'Ȇ' => 'ȇ',
+    'Ẹ' => 'ẹ',
+    'Ệ' => 'ệ',
+    'Ḛ' => 'ḛ',
+    'Ǝ' => 'ǝ',
+    'Ə' => 'ə',
+    'Ɛ' => 'ɛ',
+    'F' => 'f',
+    'Ｆ' => 'ｆ',
+    'Ƒ' => 'ƒ',
+    'G' => 'g',
+    'Ｇ' => 'ｇ',
+    'Ǵ' => 'ǵ',
+    'Ğ' => 'ğ',
+    'Ĝ' => 'ĝ',
+    'Ġ' => 'ġ',
+    'Ģ' => 'ģ',
+    'Ḡ' => 'ḡ',
+    'Ǥ' => 'ǥ',
+    'Ɣ' => 'ɣ',
+    'Ƣ' => 'ƣ',
+    'Ｈ' => 'ｈ',
+    'Ĥ' => 'ĥ',
+    'Ȟ' => 'ȟ',
+    'Ḧ' => 'ḧ',
+    'Ḣ' => 'ḣ',
+    'Ḩ' => 'ḩ',
+    'Ḥ' => 'ḥ',
+    'Ḫ' => 'ḫ',
+    'Ƕ' => 'ƕ',
+    'I' => 'i',
+    'Ｉ' => 'ｉ',
+    'Í' => 'í',
+    'Ĭ' => 'ĭ',
+    'Î' => 'î',
+    'Ǐ' => 'ǐ',
+    'Ï' => 'ï',
+    'Ḯ' => 'ḯ',
+    'Ĩ' => 'ĩ',
+    'Ī' => 'ī',
+    'Ỉ' => 'ỉ',
+    'Ȉ' => 'ȉ',
+    'Ȋ' => 'ȋ',
+    'Ị' => 'ị',
+    'Ḭ' => 'ḭ',
+    'Ɨ' => 'ɨ',
+    'Ɩ' => 'ɩ',
+    'J' => 'j',
+    'Ｊ' => 'ｊ',
+    'Ĵ' => 'ĵ',
+    'K' => 'k',
+    'Ḱ' => 'ḱ',
+    'Ǩ' => 'ǩ',
+    'Ķ' => 'ķ',
+    'Ḳ' => 'ḳ',
+    'Ḵ' => 'ḵ',
+    'Ƙ' => 'ƙ',
+    'Ｌ' => 'ｌ',
+    'Ĺ' => 'ĺ',
+    'Ľ' => 'ľ',
+    'Ļ' => 'ļ',
+    'Ł' => 'ł',
+    'Ḷ' => 'ḷ',
+    'Ḽ' => 'ḽ',
+    'Ḻ' => 'ḻ',
+    'Ŀ' => 'ŀ',
+    'ǈ' => 'ǉ', // FIXME
+    'M' => 'm',
+    'Ｍ' => 'ｍ',
+    'Ṁ' => 'ṁ',
+    'Ṃ' => 'ṃ',
+    'N' => 'n',
+    'Ｎ' => 'ｎ',
+    'Ń' => 'ń',
+    'Ǹ' => 'ǹ',
+    'Ñ' => 'ñ',
+    'Ṅ' => 'ṅ',
+    'Ņ' => 'ņ',
+    'Ṇ' => 'ṇ',
+    'Ṋ' => 'ṋ',
+    'Ṉ' => 'ṉ',
+    'Ɲ' => 'ɲ',
+    'Ƞ' => 'ƞ',
+    'Ŋ' => 'ŋ',
+    'O' => 'o',
+    'Ｏ' => 'ｏ',
+    'Ó' => 'ó',
+    'Ŏ' => 'ŏ',
+    'Ô' => 'ô',
+    'Ố' => 'ố',
+    'Ồ' => 'ồ',
+    'Ỗ' => 'ỗ',
+    'Ổ' => 'ổ',
+    'Ö' => 'ö',
+    'Ȫ' => 'ȫ',
+    'Ő' => 'ő',
+    'Õ' => 'õ',
+    'Ṍ' => 'ṍ',
+    'Ṏ' => 'ṏ',
+    'Ȯ' => 'ȯ',
+    'Ȱ' => 'ȱ',
+    'Ø' => 'ø',
+    'Ǿ' => 'ǿ',
+    'Ǫ' => 'ǫ',
+    'Ǭ' => 'ǭ',
+    'Ṓ' => 'ṓ',
+    'Ṑ' => 'ṑ',
+    'Ỏ' => 'ỏ',
+    'Ȍ' => 'ȍ',
+    'Ȏ' => 'ȏ',
+    'Ơ' => 'ơ',
+    'Ờ' => 'ờ',
+    'Ỡ' => 'ỡ',
+    'Ở' => 'ở',
+    'Ợ' => 'ợ',
+    'Ọ' => 'ọ',
+    'Ộ' => 'ộ',
+    'Ɔ' => 'ɔ',
+    'Ɵ' => 'ɵ',
+    'Ȣ' => 'ȣ',
+    'P' => 'p',
+    'Ｐ' => 'ｐ',
+    'Ṕ' => 'ṕ',
+    'Ƥ' => 'ƥ',
+    'Q' => 'q',
+    'Ｑ' => 'ｑ',
+    'R' => 'r',
+    'Ｒ' => 'ｒ',
+    'Ŕ' => 'ŕ',
+    'Ṙ' => 'ṙ',
+    'Ŗ' => 'ŗ',
+    'Ȑ' => 'ȑ',
+    'Ȓ' => 'ȓ',
+    'Ṛ' => 'ṛ',
+    'Ṝ' => 'ṝ',
+    'Ʀ' => 'ʀ',
+    'S' => 's',
+    'Ｓ' => 'ｓ',
+    'Ś' => 'ś',
+    'Ṥ' => 'ṥ',
+    'Ŝ' => 'ŝ',
+    'Ṧ' => 'ṧ',
+    'Ṡ' => 'ṡ',
+    'Ş' => 'ş',
+    'Ṣ' => 'ṣ',
+    'Ṩ' => 'ṩ',
+    'Ș' => 'ș',
+    'T' => 't',
+    'Ｔ' => 'ｔ',
+    'Ť' => 'ť',
+    'Ṫ' => 'ṫ',
+    'Ţ' => 'ţ',
+    'Ṭ' => 'ṭ',
+    'Ṱ' => 'ṱ',
+    'Ṯ' => 'ṯ',
+    'Ŧ' => 'ŧ',
+    'Ƭ' => 'ƭ',
+    'Ʈ' => 'ʈ',
+    'U' => 'u',
+    'Ú' => 'ú',
+    'Ù' => 'ù',
+    'Ŭ' => 'ŭ',
+    'Û' => 'û',
+    'Ǔ' => 'ǔ',
+    'Ů' => 'ů',
+    'Ǘ' => 'ǘ',
+    'Ǜ' => 'ǜ',
+    'Ǚ' => 'ǚ',
+    'Ǖ' => 'ǖ',
+    'Ű' => 'ű',
+    'Ũ' => 'ũ',
+    'Ų' => 'ų',
+    'Ū' => 'ū',
+    'Ṻ' => 'ṻ',
+    'Ủ' => 'ủ',
+    'Ȕ' => 'ȕ',
+    'Ȗ' => 'ȗ',
+    'Ứ' => 'ứ',
+    'Ừ' => 'ừ',
+    'Ữ' => 'ữ',
+    'Ử' => 'ử',
+    'Ự' => 'ự',
+    'Ụ' => 'ụ',
+    'Ṷ' => 'ṷ',
+    'Ṵ' => 'ṵ',
+    'Ɯ' => 'ɯ',
+    'Ʊ' => 'ʊ',
+    'V' => 'v',
+    'Ｖ' => 'ｖ',
+    'Ṿ' => 'ṿ',
+    'Ʋ' => 'ʋ',
+    'W' => 'w',
+    'Ｗ' => 'ｗ',
+    'Ẃ' => 'ẃ',
+    'Ẁ' => 'ẁ',
+    'Ẅ' => 'ẅ',
+    'Ẇ' => 'ẇ',
+    'Ẉ' => 'ẉ',
+    'X' => 'x',
+    'Ｘ' => 'ｘ',
+    'Ẍ' => 'ẍ',
+    'Y' => 'y',
+    'Ｙ' => 'ｙ',
+    'Ý' => 'ý',
+    'Ỳ' => 'ỳ',
+    'Ŷ' => 'ŷ',
+    'Ÿ' => 'ÿ',
+    'Ẏ' => 'ẏ',
+    'Ȳ' => 'ȳ',
+    'Ỷ' => 'ỷ',
+    'Ỵ' => 'ỵ',
+    'Ƴ' => 'ƴ',
+    'Ȝ' => 'ȝ',
+    'Ｚ' => 'ｚ',
+    'Ź' => 'ź',
+    'Ẑ' => 'ẑ',
+    'Ž' => 'ž',
+    'Ż' => 'ż',
+    'Ẓ' => 'ẓ',
+    'Ƶ' => 'ƶ',
+    'Ȥ' => 'ȥ',
+    'Ʒ' => 'ʒ',
+    'Ǯ' => 'ǯ',
+    'Ƹ' => 'ƹ',
+    'Þ' => 'þ',
+    'Ƨ' => 'ƨ',
+    'Ƽ' => 'ƽ',
+    'Ƅ' => 'ƅ',
+    'Α' => 'α',
+    'Ἀ' => 'ἀ',
+    'Ἄ' => 'ἄ',
+    'Ἂ' => 'ἂ',
+    'ᾊ' => 'ᾂ',
+    'Ἆ' => 'ἆ',
+    'ᾎ' => 'ᾆ',
+    'ᾈ' => 'ᾀ',
+    'Ἁ' => 'ἁ',
+    'ᾍ' => 'ᾅ',
+    'Ἃ' => 'ἃ',
+    'ᾋ' => 'ᾃ',
+    'Ἇ' => 'ἇ',
+    'ᾏ' => 'ᾇ',
+    'ᾉ' => 'ᾁ',
+    'Ὰ' => 'ὰ',
+    'Ᾰ' => 'ᾰ',
+    'Ᾱ' => 'ᾱ',
+    'ᾼ' => 'ᾳ',
+    'Β' => 'β',
+    'Γ' => 'γ',
+    'Ε' => 'ε',
+    'Ἐ' => 'ἐ',
+    'Ἔ' => 'ἔ',
+    'Ἒ' => 'ἒ',
+    'Ἑ' => 'ἑ',
+    'Ἕ' => 'ἕ',
+    'Έ' => 'έ',
+    'Ὲ' => 'ὲ',
+    'Ϝ' => 'ϝ',
+    'Ϛ' => 'ϛ',
+    'Ζ' => 'ζ',
+    'Η' => 'η',
+    'ᾜ' => 'ᾔ',
+    'Ἢ' => 'ἢ',
+    'ᾚ' => 'ᾒ',
+    'Ἦ' => 'ἦ',
+    'ᾞ' => 'ᾖ',
+    'ᾘ' => 'ᾐ',
+    'Ἥ' => 'ἥ',
+    'ᾝ' => 'ᾕ',
+    'Ἣ' => 'ἣ',
+    'ᾛ' => 'ᾓ',
+    'Ἧ' => 'ἧ',
+    'ᾟ' => 'ᾗ',
+    'Ή' => 'ή',
+    'Ὴ' => 'ὴ',
+    'ῌ' => 'ῃ',
+    'Θ' => 'θ',
+    'Ι' => 'ι',
+    'Ἰ' => 'ἰ',
+    'Ἲ' => 'ἲ',
+    'Ἶ' => 'ἶ',
+    'Ἱ' => 'ἱ',
+    'Ἵ' => 'ἵ',
+    'Ἳ' => 'ἳ',
+    'Ἷ' => 'ἷ',
+    'Ὶ' => 'ὶ',
+    'Ῐ' => 'ῐ',
+    'Ϊ' => 'ϊ',
+    'Ῑ' => 'ῑ',
+    'Κ' => 'κ',
+    'Λ' => 'λ',
+    'Ν' => 'ν',
+    'Ξ' => 'ξ',
+    'Ο' => 'ο',
+    'Ὀ' => 'ὀ',
+    'Ὄ' => 'ὄ',
+    'Ὂ' => 'ὂ',
+    'Ὅ' => 'ὅ',
+    'Ὃ' => 'ὃ',
+    'Ό' => 'ό',
+    'Ὸ' => 'ὸ',
+    'Π' => 'π',
+    'Ϟ' => 'ϟ',
+    'Ρ' => 'ρ',
+    'Ῥ' => 'ῥ',
+    'Σ' => 'ς',
+    'Τ' => 'τ',
+    'Υ' => 'υ',
+    'Ὑ' => 'ὑ',
+    'Ὓ' => 'ὓ',
+    'Ὗ' => 'ὗ',
+    'Ύ' => 'ύ',
+    'Ὺ' => 'ὺ',
+    'Ϋ' => 'ϋ',
+    'Ῡ' => 'ῡ',
+    'Χ' => 'χ',
+    'Ψ' => 'ψ',
+    'Ω' => 'ω',
+    'Ὤ' => 'ὤ',
+    'ᾬ' => 'ᾤ',
+    'Ὢ' => 'ὢ',
+    'Ὦ' => 'ὦ',
+    'ᾮ' => 'ᾦ',
+    'Ὡ' => 'ὡ',
+    'Ὥ' => 'ὥ',
+    'ᾭ' => 'ᾥ',
+    'Ὣ' => 'ὣ',
+    'Ὧ' => 'ὧ',
+    'ᾯ' => 'ᾧ',
+    'ᾩ' => 'ᾡ',
+    'Ώ' => 'ώ',
+    'Ὼ' => 'ὼ',
+    'ῼ' => 'ῳ',
+    'Ϣ' => 'ϣ',
+    'Ϥ' => 'ϥ',
+    'Ϧ' => 'ϧ',
+    'Ϩ' => 'ϩ',
+    'Ϫ' => 'ϫ',
+    'Ϭ' => 'ϭ',
+    'А' => 'а',
+    'Ӑ' => 'ӑ',
+    'Ӓ' => 'ӓ',
+    'Ә' => 'ә',
+    'Ӛ' => 'ӛ',
+    'Ӕ' => 'ӕ',
+    'В' => 'в',
+    'Г' => 'г',
+    'Ѓ' => 'ѓ',
+    'Ґ' => 'ґ',
+    'Ғ' => 'ғ',
+    'Ҕ' => 'ҕ',
+    'Ԁ' => 'ԁ',
+    'Ђ' => 'ђ',
+    'Ԃ' => 'ԃ',
+    'Ҙ' => 'ҙ',
+    'Е' => 'е',
+    'Ѐ' => 'ѐ',
+    'Ё' => 'ё',
+    'Є' => 'є',
+    'Ж' => 'ж',
+    'Ӂ' => 'ӂ',
+    'Ӝ' => 'ӝ',
+    'Җ' => 'җ',
+    'Ӟ' => 'ӟ',
+    'Ԅ' => 'ԅ',
+    'Ѕ' => 'ѕ',
+    'Ӡ' => 'ӡ',
+    'Ԇ' => 'ԇ',
+    'И' => 'и',
+    'Ӥ' => 'ӥ',
+    'Ӣ' => 'ӣ',
+    'Ҋ' => 'ҋ',
+    'І' => 'і',
+    'Ї' => 'ї',
+    'Й' => 'й',
+    'К' => 'к',
+    'Ќ' => 'ќ',
+    'Қ' => 'қ',
+    'Ӄ' => 'ӄ',
+    'Ҡ' => 'ҡ',
+    'Ҟ' => 'ҟ',
+    'Л' => 'л',
+    'Ӆ' => 'ӆ',
+    'Љ' => 'љ',
+    'Ԉ' => 'ԉ',
+    'М' => 'м',
+    'Ӎ' => 'ӎ',
+    'Ӊ' => 'ӊ',
+    'Ң' => 'ң',
+    'Ӈ' => 'ӈ',
+    'Ҥ' => 'ҥ',
+    'Њ' => 'њ',
+    'Ԋ' => 'ԋ',
+    'Ӧ' => 'ӧ',
+    'Ө' => 'ө',
+    'Ӫ' => 'ӫ',
+    'П' => 'п',
+    'Ҧ' => 'ҧ',
+    'Ҁ' => 'ҁ',
+    'Ҏ' => 'ҏ',
+    'С' => 'с',
+    'Ԍ' => 'ԍ',
+    'Ҫ' => 'ҫ',
+    'Т' => 'т',
+    'Ԏ' => 'ԏ',
+    'Ћ' => 'ћ',
+    'У' => 'у',
+    'Ў' => 'ў',
+    'Ӱ' => 'ӱ',
+    'Ӳ' => 'ӳ',
+    'Ӯ' => 'ӯ',
+    'Ұ' => 'ұ',
+    'Ѹ' => 'ѹ',
+    'Ф' => 'ф',
+    'Х' => 'х',
+    'Ҳ' => 'ҳ',
+    'Һ' => 'һ',
+    'Ѿ' => 'ѿ',
+    'Ѽ' => 'ѽ',
+    'Ѻ' => 'ѻ',
+    'Ц' => 'ц',
+    'Ҵ' => 'ҵ',
+    'Ч' => 'ч',
+    'Ҷ' => 'ҷ',
+    'Ӌ' => 'ӌ',
+    'Ҹ' => 'ҹ',
+    'Ҽ' => 'ҽ',
+    'Ҿ' => 'ҿ',
+    'Џ' => 'џ',
+    'Щ' => 'щ',
+    'Ъ' => 'ъ',
+    'Ы' => 'ы',
+    'Ӹ' => 'ӹ',
+    'Ь' => 'ь',
+    'Ҍ' => 'ҍ',
+    'Э' => 'э',
+    'Ӭ' => 'ӭ',
+    'Ю' => 'ю',
+    'Я' => 'я',
+    'Ѥ' => 'ѥ',
+    'Ѧ' => 'ѧ',
+    'Ѩ' => 'ѩ',
+    'Ѭ' => 'ѭ',
+    'Ѯ' => 'ѯ',
+    'Ѱ' => 'ѱ',
+    'Ѳ' => 'ѳ',
+    'Ѵ' => 'ѵ',
+    'Ҩ' => 'ҩ',
+    'Ա' => 'ա',
+    'Բ' => 'բ',
+    'Գ' => 'գ',
+    'Դ' => 'դ',
+    'Ե' => 'ե',
+    'Է' => 'է',
+    'Ը' => 'ը',
+    'Թ' => 'թ',
+    'Ժ' => 'ժ',
+    'Ի' => 'ի',
+    'Լ' => 'լ',
+    'Ծ' => 'ծ',
+    'Կ' => 'կ',
+    'Հ' => 'հ',
+    'Ձ' => 'ձ',
+    'Ղ' => 'ղ',
+    'Ճ' => 'ճ',
+    'Յ' => 'յ',
+    'Ն' => 'ն',
+    'Շ' => 'շ',
+    'Ո' => 'ո',
+    'Չ' => 'չ',
+    'Պ' => 'պ',
+    'Ռ' => 'ռ',
+    'Ս' => 'ս',
+    'Վ' => 'վ',
+    'Տ' => 'տ',
+    'Ր' => 'ր',
+    'Ց' => 'ց',
+    'Փ' => 'փ',
+    'Ք' => 'ք',
+    'Օ' => 'օ',
+    'Ֆ' => 'ֆ',
+];
--- a/inc/Utf8/tables/loweraccents.php
+++ b/inc/Utf8/tables/loweraccents.php
@ -0,0 +1,116 @@
+<?php
+/**
+ * UTF-8 lookup table for lower case accented letters
+ *
+ * This lookuptable defines replacements for accented characters from the ASCII-7
+ * range. This are lower case letters only.
+ *
+ * @author Andreas Gohr <andi@splitbrain.org>
+ * @see    \dokuwiki\Utf8\Clean::deaccent()
+ */
+return [
+    'á' => 'a',
+    'à' => 'a',
+    'ă' => 'a',
+    'â' => 'a',
+    'å' => 'a',
+    'ä' => 'ae',
+    'ã' => 'a',
+    'ą' => 'a',
+    'ā' => 'a',
+    'æ' => 'ae',
+    'ḃ' => 'b',
+    'ć' => 'c',
+    'ĉ' => 'c',
+    'č' => 'c',
+    'ċ' => 'c',
+    'ç' => 'c',
+    'ď' => 'd',
+    'ḋ' => 'd',
+    'đ' => 'd',
+    'ð' => 'dh',
+    'é' => 'e',
+    'è' => 'e',
+    'ĕ' => 'e',
+    'ê' => 'e',
+    'ě' => 'e',
+    'ë' => 'e',
+    'ė' => 'e',
+    'ę' => 'e',
+    'ē' => 'e',
+    'ḟ' => 'f',
+    'ƒ' => 'f',
+    'ğ' => 'g',
+    'ĝ' => 'g',
+    'ġ' => 'g',
+    'ģ' => 'g',
+    'ĥ' => 'h',
+    'ħ' => 'h',
+    'í' => 'i',
+    'ì' => 'i',
+    'î' => 'i',
+    'ï' => 'i',
+    'ĩ' => 'i',
+    'į' => 'i',
+    'ī' => 'i',
+    'ĵ' => 'j',
+    'ķ' => 'k',
+    'ĺ' => 'l',
+    'ľ' => 'l',
+    'ļ' => 'l',
+    'ł' => 'l',
+    'ṁ' => 'm',
+    'ń' => 'n',
+    'ň' => 'n',
+    'ñ' => 'n',
+    'ņ' => 'n',
+    'ó' => 'o',
+    'ò' => 'o',
+    'ô' => 'o',
+    'ö' => 'oe',
+    'ő' => 'o',
+    'õ' => 'o',
+    'ø' => 'o',
+    'ō' => 'o',
+    'ơ' => 'o',
+    'ṗ' => 'p',
+    'ŕ' => 'r',
+    'ř' => 'r',
+    'ŗ' => 'r',
+    'ś' => 's',
+    'ŝ' => 's',
+    'š' => 's',
+    'ṡ' => 's',
+    'ş' => 's',
+    'ș' => 's',
+    'ß' => 'ss',
+    'ť' => 't',
+    'ṫ' => 't',
+    'ţ' => 't',
+    'ț' => 't',
+    'ŧ' => 't',
+    'ú' => 'u',
+    'ù' => 'u',
+    'ŭ' => 'u',
+    'û' => 'u',
+    'ů' => 'u',
+    'ü' => 'ue',
+    'ű' => 'u',
+    'ũ' => 'u',
+    'ų' => 'u',
+    'ū' => 'u',
+    'ư' => 'u',
+    'ẃ' => 'w',
+    'ẁ' => 'w',
+    'ŵ' => 'w',
+    'ẅ' => 'w',
+    'ý' => 'y',
+    'ỳ' => 'y',
+    'ŷ' => 'y',
+    'ÿ' => 'y',
+    'ź' => 'z',
+    'ž' => 'z',
+    'ż' => 'z',
+    'þ' => 'th',
+    'µ' => 'u',
+];
--- a/inc/Utf8/tables/romanization.php
+++ b/inc/Utf8/tables/romanization.php
--- a/inc/Utf8/tables/specials.php
+++ b/inc/Utf8/tables/specials.php
@ -0,0 +1,620 @@
+<?php
+/**
+ * UTF-8 array of common special characters
+ *
+ * This array should contain all special characters (not a letter or digit)
+ * defined in the various local charsets - it's not a complete list of non-alphanum
+ * characters in UTF-8. It's not perfect but should match most cases of special
+ * chars.
+ *
+ * The controlchars 0x00 to 0x19 are _not_ included in this array. The space 0x20 is!
+ * These chars are _not_ in the array either:  _ (0x5f), : 0x3a, . 0x2e, - 0x2d, * 0x2a
+ *
+ * @author Andreas Gohr <andi@splitbrain.org>
+ * @see    \dokuwiki\Utf8\Clean::stripspecials()
+ */
+return [
+    0x1a, // 
+    0x1b, // 
+    0x1c, // 
+    0x1d, // 
+    0x1e, // 
+    0x1f, // 
+    0x20, // <space>
+    0x21, // !
+    0x22, // "
+    0x23, // #
+    0x24, // $
+    0x25, // %
+    0x26, // &
+    0x27, // '
+    0x28, // (
+    0x29, // )
+    0x2b, // +
+    0x2c, // ,
+    0x2f, // /
+    0x3b, // ;
+    0x3c, // <
+    0x3d, // =
+    0x3e, // >
+    0x3f, // ?
+    0x40, // @
+    0x5b, // [
+    0x5c, // \
+    0x5d, // ]
+    0x5e, // ^
+    0x60, // `
+    0x7b, // {
+    0x7c, // |
+    0x7d, // }
+    0x7e, // ~
+    0x7f, // 
+    0x80, // 
+    0x81, // 
+    0x82, // 
+    0x83, // 
+    0x84, // 
+    0x85, // 
+    0x86, // 
+    0x87, // 
+    0x88, // 
+    0x89, // 
+    0x8a, // 
+    0x8b, // 
+    0x8c, // 
+    0x8d, // 
+    0x8e, // 
+    0x8f, // 
+    0x90, // 
+    0x91, // 
+    0x92, // 
+    0x93, // 
+    0x94, // 
+    0x95, // 
+    0x96, // 
+    0x97, // 
+    0x98, // 
+    0x99, // 
+    0x9a, // 
+    0x9b, // 
+    0x9c, // 
+    0x9d, // 
+    0x9e, // 
+    0x9f, // 
+    0xa0, //  
+    0xa1, // ¡
+    0xa2, // ¢
+    0xa3, // £
+    0xa4, // ¤
+    0xa5, // ¥
+    0xa6, // ¦
+    0xa7, // §
+    0xa8, // ¨
+    0xa9, // ©
+    0xaa, // ª
+    0xab, // «
+    0xac, // ¬
+    0xad, // 
+    0xae, // ®
+    0xaf, // ¯
+    0xb0, // °
+    0xb1, // ±
+    0xb2, // ²
+    0xb3, // ³
+    0xb4, // ´
+    0xb5, // µ
+    0xb6, // ¶
+    0xb7, // ·
+    0xb8, // ¸
+    0xb9, // ¹
+    0xba, // º
+    0xbb, // »
+    0xbc, // ¼
+    0xbd, // ½
+    0xbe, // ¾
+    0xbf, // ¿
+    0xd7, // ×
+    0xf7, // ÷
+    0x2c7, // ˇ
+    0x2d8, // ˘
+    0x2d9, // ˙
+    0x2da, // ˚
+    0x2db, // ˛
+    0x2dc, // ˜
+    0x2dd, // ˝
+    0x300, // ̀
+    0x301, // ́
+    0x303, // ̃
+    0x309, // ̉
+    0x323, // ̣
+    0x384, // ΄
+    0x385, // ΅
+    0x387, // ·
+    0x3c6, // φ
+    0x3d1, // ϑ
+    0x3d2, // ϒ
+    0x3d5, // ϕ
+    0x3d6, // ϖ
+    0x5b0, // ְ
+    0x5b1, // ֱ
+    0x5b2, // ֲ
+    0x5b3, // ֳ
+    0x5b4, // ִ
+    0x5b5, // ֵ
+    0x5b6, // ֶ
+    0x5b7, // ַ
+    0x5b8, // ָ
+    0x5b9, // ֹ
+    0x5bb, // ֻ
+    0x5bc, // ּ
+    0x5bd, // ֽ
+    0x5be, // ־
+    0x5bf, // ֿ
+    0x5c0, // ׀
+    0x5c1, // ׁ
+    0x5c2, // ׂ
+    0x5c3, // ׃
+    0x5f3, // ׳
+    0x5f4, // ״
+    0x60c, // ،
+    0x61b, // ؛
+    0x61f, // ؟
+    0x640, // ـ
+    0x64b, // ً
+    0x64c, // ٌ
+    0x64d, // ٍ
+    0x64e, // َ
+    0x64f, // ُ
+    0x650, // ِ
+    0x651, // ّ
+    0x652, // ْ
+    0x66a, // ٪
+    0xe3f, // ฿
+    0x200c, // ‌
+    0x200d, // ‍
+    0x200e, // ‎
+    0x200f, // ‏
+    0x2013, // –
+    0x2014, // —
+    0x2015, // ―
+    0x2017, // ‗
+    0x2018, // ‘
+    0x2019, // ’
+    0x201a, // ‚
+    0x201c, // “
+    0x201d, // ”
+    0x201e, // „
+    0x2020, // †
+    0x2021, // ‡
+    0x2022, // •
+    0x2026, // …
+    0x2030, // ‰
+    0x2032, // ′
+    0x2033, // ″
+    0x2039, // ‹
+    0x203a, // ›
+    0x2044, // ⁄
+    0x20a7, // ₧
+    0x20aa, // ₪
+    0x20ab, // ₫
+    0x20ac, // €
+    0x2116, // №
+    0x2118, // ℘
+    0x2122, // ™
+    0x2126, // Ω
+    0x2135, // ℵ
+    0x2190, // ←
+    0x2191, // ↑
+    0x2192, // →
+    0x2193, // ↓
+    0x2194, // ↔
+    0x2195, // ↕
+    0x21b5, // ↵
+    0x21d0, // ⇐
+    0x21d1, // ⇑
+    0x21d2, // ⇒
+    0x21d3, // ⇓
+    0x21d4, // ⇔
+    0x2200, // ∀
+    0x2202, // ∂
+    0x2203, // ∃
+    0x2205, // ∅
+    0x2206, // ∆
+    0x2207, // ∇
+    0x2208, // ∈
+    0x2209, // ∉
+    0x220b, // ∋
+    0x220f, // ∏
+    0x2211, // ∑
+    0x2212, // −
+    0x2215, // ∕
+    0x2217, // ∗
+    0x2219, // ∙
+    0x221a, // √
+    0x221d, // ∝
+    0x221e, // ∞
+    0x2220, // ∠
+    0x2227, // ∧
+    0x2228, // ∨
+    0x2229, // ∩
+    0x222a, // ∪
+    0x222b, // ∫
+    0x2234, // ∴
+    0x223c, // ∼
+    0x2245, // ≅
+    0x2248, // ≈
+    0x2260, // ≠
+    0x2261, // ≡
+    0x2264, // ≤
+    0x2265, // ≥
+    0x2282, // ⊂
+    0x2283, // ⊃
+    0x2284, // ⊄
+    0x2286, // ⊆
+    0x2287, // ⊇
+    0x2295, // ⊕
+    0x2297, // ⊗
+    0x22a5, // ⊥
+    0x22c5, // ⋅
+    0x2310, // ⌐
+    0x2320, // ⌠
+    0x2321, // ⌡
+    0x2329, // 〈
+    0x232a, // 〉
+    0x2469, // ⑩
+    0x2500, // ─
+    0x2502, // │
+    0x250c, // ┌
+    0x2510, // ┐
+    0x2514, // └
+    0x2518, // ┘
+    0x251c, // ├
+    0x2524, // ┤
+    0x252c, // ┬
+    0x2534, // ┴
+    0x253c, // ┼
+    0x2550, // ═
+    0x2551, // ║
+    0x2552, // ╒
+    0x2553, // ╓
+    0x2554, // ╔
+    0x2555, // ╕
+    0x2556, // ╖
+    0x2557, // ╗
+    0x2558, // ╘
+    0x2559, // ╙
+    0x255a, // ╚
+    0x255b, // ╛
+    0x255c, // ╜
+    0x255d, // ╝
+    0x255e, // ╞
+    0x255f, // ╟
+    0x2560, // ╠
+    0x2561, // ╡
+    0x2562, // ╢
+    0x2563, // ╣
+    0x2564, // ╤
+    0x2565, // ╥
+    0x2566, // ╦
+    0x2567, // ╧
+    0x2568, // ╨
+    0x2569, // ╩
+    0x256a, // ╪
+    0x256b, // ╫
+    0x256c, // ╬
+    0x2580, // ▀
+    0x2584, // ▄
+    0x2588, // █
+    0x258c, // ▌
+    0x2590, // ▐
+    0x2591, // ░
+    0x2592, // ▒
+    0x2593, // ▓
+    0x25a0, // ■
+    0x25b2, // ▲
+    0x25bc, // ▼
+    0x25c6, // ◆
+    0x25ca, // ◊
+    0x25cf, // ●
+    0x25d7, // ◗
+    0x2605, // ★
+    0x260e, // ☎
+    0x261b, // ☛
+    0x261e, // ☞
+    0x2660, // ♠
+    0x2663, // ♣
+    0x2665, // ♥
+    0x2666, // ♦
+    0x2701, // ✁
+    0x2702, // ✂
+    0x2703, // ✃
+    0x2704, // ✄
+    0x2706, // ✆
+    0x2707, // ✇
+    0x2708, // ✈
+    0x2709, // ✉
+    0x270c, // ✌
+    0x270d, // ✍
+    0x270e, // ✎
+    0x270f, // ✏
+    0x2710, // ✐
+    0x2711, // ✑
+    0x2712, // ✒
+    0x2713, // ✓
+    0x2714, // ✔
+    0x2715, // ✕
+    0x2716, // ✖
+    0x2717, // ✗
+    0x2718, // ✘
+    0x2719, // ✙
+    0x271a, // ✚
+    0x271b, // ✛
+    0x271c, // ✜
+    0x271d, // ✝
+    0x271e, // ✞
+    0x271f, // ✟
+    0x2720, // ✠
+    0x2721, // ✡
+    0x2722, // ✢
+    0x2723, // ✣
+    0x2724, // ✤
+    0x2725, // ✥
+    0x2726, // ✦
+    0x2727, // ✧
+    0x2729, // ✩
+    0x272a, // ✪
+    0x272b, // ✫
+    0x272c, // ✬
+    0x272d, // ✭
+    0x272e, // ✮
+    0x272f, // ✯
+    0x2730, // ✰
+    0x2731, // ✱
+    0x2732, // ✲
+    0x2733, // ✳
+    0x2734, // ✴
+    0x2735, // ✵
+    0x2736, // ✶
+    0x2737, // ✷
+    0x2738, // ✸
+    0x2739, // ✹
+    0x273a, // ✺
+    0x273b, // ✻
+    0x273c, // ✼
+    0x273d, // ✽
+    0x273e, // ✾
+    0x273f, // ✿
+    0x2740, // ❀
+    0x2741, // ❁
+    0x2742, // ❂
+    0x2743, // ❃
+    0x2744, // ❄
+    0x2745, // ❅
+    0x2746, // ❆
+    0x2747, // ❇
+    0x2748, // ❈
+    0x2749, // ❉
+    0x274a, // ❊
+    0x274b, // ❋
+    0x274d, // ❍
+    0x274f, // ❏
+    0x2750, // ❐
+    0x2751, // ❑
+    0x2752, // ❒
+    0x2756, // ❖
+    0x2758, // ❘
+    0x2759, // ❙
+    0x275a, // ❚
+    0x275b, // ❛
+    0x275c, // ❜
+    0x275d, // ❝
+    0x275e, // ❞
+    0x2761, // ❡
+    0x2762, // ❢
+    0x2763, // ❣
+    0x2764, // ❤
+    0x2765, // ❥
+    0x2766, // ❦
+    0x2767, // ❧
+    0x277f, // ❿
+    0x2789, // ➉
+    0x2793, // ➓
+    0x2794, // ➔
+    0x2798, // ➘
+    0x2799, // ➙
+    0x279a, // ➚
+    0x279b, // ➛
+    0x279c, // ➜
+    0x279d, // ➝
+    0x279e, // ➞
+    0x279f, // ➟
+    0x27a0, // ➠
+    0x27a1, // ➡
+    0x27a2, // ➢
+    0x27a3, // ➣
+    0x27a4, // ➤
+    0x27a5, // ➥
+    0x27a6, // ➦
+    0x27a7, // ➧
+    0x27a8, // ➨
+    0x27a9, // ➩
+    0x27aa, // ➪
+    0x27ab, // ➫
+    0x27ac, // ➬
+    0x27ad, // ➭
+    0x27ae, // ➮
+    0x27af, // ➯
+    0x27b1, // ➱
+    0x27b2, // ➲
+    0x27b3, // ➳
+    0x27b4, // ➴
+    0x27b5, // ➵
+    0x27b6, // ➶
+    0x27b7, // ➷
+    0x27b8, // ➸
+    0x27b9, // ➹
+    0x27ba, // ➺
+    0x27bb, // ➻
+    0x27bc, // ➼
+    0x27bd, // ➽
+    0x27be, // ➾
+    0x3000, // 　
+    0x3001, // 、
+    0x3002, // 。
+    0x3003, // 〃
+    0x3008, // 〈
+    0x3009, // 〉
+    0x300a, // 《
+    0x300b, // 》
+    0x300c, // 「
+    0x300d, // 」
+    0x300e, // 『
+    0x300f, // 』
+    0x3010, // 【
+    0x3011, // 】
+    0x3012, // 〒
+    0x3014, // 〔
+    0x3015, // 〕
+    0x3016, // 〖
+    0x3017, // 〗
+    0x3018, // 〘
+    0x3019, // 〙
+    0x301a, // 〚
+    0x301b, // 〛
+    0x3036, // 〶
+    0xf6d9, // 
+    0xf6da, // 
+    0xf6db, // 
+    0xf8d7, // 
+    0xf8d8, // 
+    0xf8d9, // 
+    0xf8da, // 
+    0xf8db, // 
+    0xf8dc, // 
+    0xf8dd, // 
+    0xf8de, // 
+    0xf8df, // 
+    0xf8e0, // 
+    0xf8e1, // 
+    0xf8e2, // 
+    0xf8e3, // 
+    0xf8e4, // 
+    0xf8e5, // 
+    0xf8e6, // 
+    0xf8e7, // 
+    0xf8e8, // 
+    0xf8e9, // 
+    0xf8ea, // 
+    0xf8eb, // 
+    0xf8ec, // 
+    0xf8ed, // 
+    0xf8ee, // 
+    0xf8ef, // 
+    0xf8f0, // 
+    0xf8f1, // 
+    0xf8f2, // 
+    0xf8f3, // 
+    0xf8f4, // 
+    0xf8f5, // 
+    0xf8f6, // 
+    0xf8f7, // 
+    0xf8f8, // 
+    0xf8f9, // 
+    0xf8fa, // 
+    0xf8fb, // 
+    0xf8fc, // 
+    0xf8fd, // 
+    0xf8fe, // 
+    0xfe7c, // ﹼ
+    0xfe7d, // ﹽ
+    0xff01, // ！
+    0xff02, // ＂
+    0xff03, // ＃
+    0xff04, // ＄
+    0xff05, // ％
+    0xff06, // ＆
+    0xff07, // ＇
+    0xff08, // （
+    0xff09, // ）
+    0xff09, // ）
+    0xff0a, // ＊
+    0xff0b, // ＋
+    0xff0c, // ，
+    0xff0d, // －
+    0xff0e, // ．
+    0xff0f, // ／
+    0xff1a, // ：
+    0xff1b, // ；
+    0xff1c, // ＜
+    0xff1d, // ＝
+    0xff1e, // ＞
+    0xff1f, // ？
+    0xff20, // ＠
+    0xff3b, // ［
+    0xff3c, // ＼
+    0xff3d, // ］
+    0xff3e, // ＾
+    0xff40, // ｀
+    0xff5b, // ｛
+    0xff5c, // ｜
+    0xff5d, // ｝
+    0xff5e, // ～
+    0xff5f, // ｟
+    0xff60, // ｠
+    0xff61, // ｡
+    0xff62, // ｢
+    0xff63, // ｣
+    0xff64, // ､
+    0xff65, // ･
+    0xffe0, // ￠
+    0xffe1, // ￡
+    0xffe2, // ￢
+    0xffe3, // ￣
+    0xffe4, // ￤
+    0xffe5, // ￥
+    0xffe6, // ￦
+    0xffe8, // ￨
+    0xffe9, // ￩
+    0xffea, // ￪
+    0xffeb, // ￫
+    0xffec, // ￬
+    0xffed, // ￭
+    0xffee, // ￮
+    0x1d6fc, // 𝛼
+    0x1d6fd, // 𝛽
+    0x1d6fe, // 𝛾
+    0x1d6ff, // 𝛿
+    0x1d700, // 𝜀
+    0x1d701, // 𝜁
+    0x1d702, // 𝜂
+    0x1d703, // 𝜃
+    0x1d704, // 𝜄
+    0x1d705, // 𝜅
+    0x1d706, // 𝜆
+    0x1d707, // 𝜇
+    0x1d708, // 𝜈
+    0x1d709, // 𝜉
+    0x1d70a, // 𝜊
+    0x1d70b, // 𝜋
+    0x1d70c, // 𝜌
+    0x1d70d, // 𝜍
+    0x1d70e, // 𝜎
+    0x1d70f, // 𝜏
+    0x1d710, // 𝜐
+    0x1d711, // 𝜑
+    0x1d712, // 𝜒
+    0x1d713, // 𝜓
+    0x1d714, // 𝜔
+    0x1d715, // 𝜕
+    0x1d716, // 𝜖
+    0x1d717, // 𝜗
+    0x1d718, // 𝜘
+    0x1d719, // 𝜙
+    0x1d71a, // 𝜚
+    0x1d71b, // 𝜛
+    0xc2a0, // 슠
+    0xe28087, // 
+    0xe280af, // 
+    0xe281a0, // 
+    0xefbbbf, // 
+];
--- a/inc/Utf8/tables/upperaccents.php
+++ b/inc/Utf8/tables/upperaccents.php
@ -0,0 +1,114 @@
+<?php
+/**
+ * UTF-8 lookup table for upper case accented letters
+ *
+ * This lookuptable defines replacements for accented characters from the ASCII-7
+ * range. This are upper case letters only.
+ *
+ * @author Andreas Gohr <andi@splitbrain.org>
+ * @see    \dokuwiki\Utf8\Clean::deaccent()
+ */
+return [
+    'Á' => 'A',
+    'À' => 'A',
+    'Ă' => 'A',
+    'Â' => 'A',
+    'Å' => 'A',
+    'Ä' => 'Ae',
+    'Ã' => 'A',
+    'Ą' => 'A',
+    'Ā' => 'A',
+    'Æ' => 'Ae',
+    'Ḃ' => 'B',
+    'Ć' => 'C',
+    'Ĉ' => 'C',
+    'Č' => 'C',
+    'Ċ' => 'C',
+    'Ç' => 'C',
+    'Ď' => 'D',
+    'Ḋ' => 'D',
+    'Đ' => 'D',
+    'Ð' => 'Dh',
+    'É' => 'E',
+    'È' => 'E',
+    'Ĕ' => 'E',
+    'Ê' => 'E',
+    'Ě' => 'E',
+    'Ë' => 'E',
+    'Ė' => 'E',
+    'Ę' => 'E',
+    'Ē' => 'E',
+    'Ḟ' => 'F',
+    'Ƒ' => 'F',
+    'Ğ' => 'G',
+    'Ĝ' => 'G',
+    'Ġ' => 'G',
+    'Ģ' => 'G',
+    'Ĥ' => 'H',
+    'Ħ' => 'H',
+    'Í' => 'I',
+    'Ì' => 'I',
+    'Î' => 'I',
+    'Ï' => 'I',
+    'Ĩ' => 'I',
+    'Į' => 'I',
+    'Ī' => 'I',
+    'Ĵ' => 'J',
+    'Ķ' => 'K',
+    'Ĺ' => 'L',
+    'Ľ' => 'L',
+    'Ļ' => 'L',
+    'Ł' => 'L',
+    'Ṁ' => 'M',
+    'Ń' => 'N',
+    'Ň' => 'N',
+    'Ñ' => 'N',
+    'Ņ' => 'N',
+    'Ó' => 'O',
+    'Ò' => 'O',
+    'Ô' => 'O',
+    'Ö' => 'Oe',
+    'Ő' => 'O',
+    'Õ' => 'O',
+    'Ø' => 'O',
+    'Ō' => 'O',
+    'Ơ' => 'O',
+    'Ṗ' => 'P',
+    'Ŕ' => 'R',
+    'Ř' => 'R',
+    'Ŗ' => 'R',
+    'Ś' => 'S',
+    'Ŝ' => 'S',
+    'Š' => 'S',
+    'Ṡ' => 'S',
+    'Ş' => 'S',
+    'Ș' => 'S',
+    'Ť' => 'T',
+    'Ṫ' => 'T',
+    'Ţ' => 'T',
+    'Ț' => 'T',
+    'Ŧ' => 'T',
+    'Ú' => 'U',
+    'Ù' => 'U',
+    'Ŭ' => 'U',
+    'Û' => 'U',
+    'Ů' => 'U',
+    'Ü' => 'Ue',
+    'Ű' => 'U',
+    'Ũ' => 'U',
+    'Ų' => 'U',
+    'Ū' => 'U',
+    'Ư' => 'U',
+    'Ẃ' => 'W',
+    'Ẁ' => 'W',
+    'Ŵ' => 'W',
+    'Ẅ' => 'W',
+    'Ý' => 'Y',
+    'Ỳ' => 'Y',
+    'Ŷ' => 'Y',
+    'Ÿ' => 'Y',
+    'Ź' => 'Z',
+    'Ž' => 'Z',
+    'Ż' => 'Z',
+    'Þ' => 'Th',
+];
--- a/inc/auth.php
+++ b/inc/auth.php
@ -518,7 +518,7 @@ function auth_isMember($memberlist, $user, array $groups) {

    // clean user and groups
    if(!$auth->isCaseSensitive()) {
-        $user   = utf8_strtolower($user);
+        $user   = \dokuwiki\Utf8\PhpString::strtolower($user);
        $groups = array_map('utf8_strtolower', $groups);
    }
    $user   = $auth->cleanUser($user);
@ -533,7 +533,7 @@ function auth_isMember($memberlist, $user, array $groups) {
    // compare cleaned values
    foreach($members as $member) {
        if($member == '@ALL' ) return true;
-        if(!$auth->isCaseSensitive()) $member = utf8_strtolower($member);
+        if(!$auth->isCaseSensitive()) $member = \dokuwiki\Utf8\PhpString::strtolower($member);
        if($member[0] == '@') {
            $member = $auth->cleanGroup(substr($member, 1));
            if(in_array($member, $groups)) return true;
@ -621,7 +621,7 @@ function auth_aclcheck_cb($data) {
    }

    if(!$auth->isCaseSensitive()) {
-        $user   = utf8_strtolower($user);
+        $user   = \dokuwiki\Utf8\PhpString::strtolower($user);
        $groups = array_map('utf8_strtolower', $groups);
    }
    $user   = auth_nameencode($auth->cleanUser($user));
@ -648,7 +648,7 @@ function auth_aclcheck_cb($data) {
            $match = preg_replace('/#.*$/', '', $match); //ignore comments
            $acl   = preg_split('/[ \t]+/', $match);
            if(!$auth->isCaseSensitive() && $acl[1] !== '@ALL') {
-                $acl[1] = utf8_strtolower($acl[1]);
+                $acl[1] = \dokuwiki\Utf8\PhpString::strtolower($acl[1]);
            }
            if(!in_array($acl[1], $groups)) {
                continue;
@ -678,7 +678,7 @@ function auth_aclcheck_cb($data) {
                $match = preg_replace('/#.*$/', '', $match); //ignore comments
                $acl   = preg_split('/[ \t]+/', $match);
                if(!$auth->isCaseSensitive() && $acl[1] !== '@ALL') {
-                    $acl[1] = utf8_strtolower($acl[1]);
+                    $acl[1] = \dokuwiki\Utf8\PhpString::strtolower($acl[1]);
                }
                if(!in_array($acl[1], $groups)) {
                    continue;
--- a/inc/changelog.php
+++ b/inc/changelog.php
@ -93,7 +93,7 @@ function addLogEntry($date, $id, $type=DOKU_CHANGE_TYPE_EDIT, $summary='', $extr
        'type'       => str_replace($strip, '', $type),
        'id'         => $id,
        'user'       => $user,
-        'sum'        => utf8_substr(str_replace($strip, '', $summary), 0, 255),
+        'sum'        => \dokuwiki\Utf8\PhpString::substr(str_replace($strip, '', $summary), 0, 255),
        'extra'      => str_replace($strip, '', $extra),
        'sizechange' => $sizechange
    );
@ -180,7 +180,7 @@ function addMediaLogEntry(
        'type'       => str_replace($strip, '', $type),
        'id'         => $id,
        'user'       => $user,
-        'sum'        => utf8_substr(str_replace($strip, '', $summary), 0, 255),
+        'sum'        => \dokuwiki\Utf8\PhpString::substr(str_replace($strip, '', $summary), 0, 255),
        'extra'      => str_replace($strip, '', $extra),
        'sizechange' => $sizechange
    );
--- a/inc/common.php
+++ b/inc/common.php
@ -1017,7 +1017,7 @@ function cleanText($text) {
    // if the text is not valid UTF-8 we simply assume latin1
    // this won't break any worse than it breaks with the wrong encoding
    // but might actually fix the problem in many cases
-    if(!utf8_check($text)) $text = utf8_encode($text);
+    if(!\dokuwiki\Utf8\Clean::isUtf8($text)) $text = utf8_encode($text);

    return $text;
 }
@ -1173,12 +1173,12 @@ function parsePageTemplate(&$data) {
             utf8_ucwords(curNS($id)),
             utf8_strtoupper(curNS($id)),
             $file,
-             utf8_ucfirst($file),
-             utf8_strtoupper($file),
+             \dokuwiki\Utf8\PhpString::ucfirst($file),
+             \dokuwiki\Utf8\PhpString::strtoupper($file),
             $page,
-             utf8_ucfirst($page),
-             utf8_ucwords($page),
-             utf8_strtoupper($page),
+             \dokuwiki\Utf8\PhpString::ucfirst($page),
+             \dokuwiki\Utf8\PhpString::ucwords($page),
+             \dokuwiki\Utf8\PhpString::strtoupper($page),
             $INPUT->server->str('REMOTE_USER'),
             $USERINFO['name'],
             $USERINFO['mail'],
@ -1741,12 +1741,12 @@ function preg_quote_cb($string) {
 * @return string
 */
 function shorten($keep, $short, $max, $min = 9, $char = '…') {
-    $max = $max - utf8_strlen($keep);
+    $max = $max - \dokuwiki\Utf8\PhpString::strlen($keep);
    if($max < $min) return $keep;
-    $len = utf8_strlen($short);
+    $len = \dokuwiki\Utf8\PhpString::strlen($short);
    if($len <= $max) return $keep.$short;
    $half = floor($max / 2);
-    return $keep.utf8_substr($short, 0, $half - 1).$char.utf8_substr($short, $len - $half);
+    return $keep.\dokuwiki\Utf8\PhpString::substr($short, 0, $half - 1).$char.\dokuwiki\Utf8\PhpString::substr($short, $len - $half);
 }

 /**
--- a/inc/fetch.functions.php
+++ b/inc/fetch.functions.php
@ -69,9 +69,9 @@ function sendFile($file, $mime, $dl, $cache, $public = false, $orig = null) {

    //download or display?
    if($dl) {
-        header('Content-Disposition: attachment;'.rfc2231_encode('filename', utf8_basename($orig)).';');
+        header('Content-Disposition: attachment;'.rfc2231_encode('filename', \dokuwiki\Utf8\PhpString::basename($orig)).';');
    } else {
-        header('Content-Disposition: inline;'.rfc2231_encode('filename', utf8_basename($orig)).';');
+        header('Content-Disposition: inline;'.rfc2231_encode('filename', \dokuwiki\Utf8\PhpString::basename($orig)).';');
    }

    //use x-sendfile header to pass the delivery to compatible webservers
--- a/inc/fulltext.php
+++ b/inc/fulltext.php
@ -97,7 +97,7 @@ function _ft_pageSearch(&$data) {
                    );
                    $evt = new Event('FULLTEXT_PHRASE_MATCH',$evdata);
                    if ($evt->advise_before() && $evt->result !== true) {
-                        $text = utf8_strtolower($evdata['text']);
+                        $text = \dokuwiki\Utf8\PhpString::strtolower($evdata['text']);
                        if (strpos($text, $phrase) !== false) {
                            $evt->result = true;
                        }
@ -412,7 +412,7 @@ function ft_snippet($id,$highlight){
        $match = array();
        $snippets = array();
        $utf8_offset = $offset = $end = 0;
-        $len = utf8_strlen($text);
+        $len = \dokuwiki\Utf8\PhpString::strlen($text);

        // build a regexp from the phrases to highlight
        $re1 = '(' .
@ -442,8 +442,8 @@ function ft_snippet($id,$highlight){
            list($str,$idx) = $match[0];

            // convert $idx (a byte offset) into a utf8 character offset
-            $utf8_idx = utf8_strlen(substr($text,0,$idx));
-            $utf8_len = utf8_strlen($str);
+            $utf8_idx = \dokuwiki\Utf8\PhpString::strlen(substr($text,0,$idx));
+            $utf8_len = \dokuwiki\Utf8\PhpString::strlen($str);

            // establish context, 100 bytes surrounding the match string
            // first look to see if we can go 100 either side,
@ -472,9 +472,9 @@ function ft_snippet($id,$highlight){
            $end = $utf8_idx + $utf8_len + $post;      // now set it to the end of this context

            if ($append) {
-                $snippets[count($snippets)-1] .= utf8_substr($text,$append,$end-$append);
+                $snippets[count($snippets)-1] .= \dokuwiki\Utf8\PhpString::substr($text,$append,$end-$append);
            } else {
-                $snippets[] = utf8_substr($text,$start,$end-$start);
+                $snippets[] = \dokuwiki\Utf8\PhpString::substr($text,$start,$end-$start);
            }

            // set $offset for next match attempt
@ -483,8 +483,8 @@ function ft_snippet($id,$highlight){
            // this prevents further matching of this snippet but for possible matches of length
            // smaller than match length + context (at least 50 characters) this match is part of the context
            $utf8_offset = $utf8_idx + $utf8_len;
-            $offset = $idx + strlen(utf8_substr($text,$utf8_idx,$utf8_len));
-            $offset = utf8_correctIdx($text,$offset);
+            $offset = $idx + strlen(\dokuwiki\Utf8\PhpString::substr($text,$utf8_idx,$utf8_len));
+            $offset = \dokuwiki\Utf8\Clean::correctIdx($text,$offset);
        }

        $m = "\1";
@ -674,7 +674,7 @@ function ft_queryParser($Indexer, $query){
     */
    $parsed_query = '';
    $parens_level = 0;
-    $terms = preg_split('/(-?".*?")/u', utf8_strtolower($query), -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
+    $terms = preg_split('/(-?".*?")/u', \dokuwiki\Utf8\PhpString::strtolower($query), -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);

    foreach ($terms as $term) {
        $parsed = '';
--- a/inc/html.php
+++ b/inc/html.php
@ -355,7 +355,7 @@ function html_hilight($html,$phrases){
    $regex = join('|',$phrases);

    if ($regex === '') return $html;
-    if (!utf8_check($regex)) return $html;
+    if (!\dokuwiki\Utf8\Clean::isUtf8($regex)) return $html;
    $html = @preg_replace_callback("/((<[^>]*)|$regex)/ui",'html_hilight_callback',$html);
    return $html;
 }
--- a/inc/indexer.php
+++ b/inc/indexer.php
@ -605,12 +605,12 @@ class Doku_Indexer {
                       )
                     );
        if (preg_match('/[^0-9A-Za-z ]/u', $text))
-            $text = utf8_stripspecials($text, ' ', '\._\-:'.$wc);
+            $text = \dokuwiki\Utf8\Clean::stripspecials($text, ' ', '\._\-:'.$wc);

        $wordlist = explode(' ', $text);
        foreach ($wordlist as $i => $word) {
            $wordlist[$i] = (preg_match('/[^0-9A-Za-z]/u', $word)) ?
-                utf8_strtolower($word) : strtolower($word);
+                \dokuwiki\Utf8\PhpString::strtolower($word) : strtolower($word);
        }

        foreach ($wordlist as $i => $word) {
@ -1603,7 +1603,7 @@ function idx_indexLengths($filter) {
 * @return string
 */
 function idx_cleanName($name) {
-    $name = utf8_romanize(trim((string)$name));
+    $name = \dokuwiki\Utf8\Clean::romanize(trim((string)$name));
    $name = preg_replace('#[ \./\\:-]+#', '_', $name);
    $name = preg_replace('/[^A-Za-z0-9_]/', '', $name);
    return strtolower($name);
--- a/inc/infoutils.php
+++ b/inc/infoutils.php
@ -342,7 +342,7 @@ function msg($message,$lvl=0,$line='',$file='',$allow=MSG_PUBLIC){
    $errors[1]  = 'success';
    $errors[2]  = 'notify';

-    if($line || $file) $message.=' ['.utf8_basename($file).':'.$line.']';
+    if($line || $file) $message.=' ['.\dokuwiki\Utf8\PhpString::basename($file).':'.$line.']';

    if(!isset($MSG)) $MSG = array();
    $MSG[]=array('lvl' => $errors[$lvl], 'msg' => $message, 'allow' => $allow);
--- a/inc/io.php
+++ b/inc/io.php
@ -623,7 +623,7 @@ function io_download($url,$file,$useAttachment=false,$defaultName='',$maxSize=20
            if (is_string($content_disposition) &&
                    preg_match('/attachment;\s*filename\s*=\s*"([^"]*)"/i', $content_disposition, $match)) {

-                $name = utf8_basename($match[1]);
+                $name = \dokuwiki\Utf8\PhpString::basename($match[1]);
            }

        }
--- a/inc/mail.php
+++ b/inc/mail.php
@ -134,11 +134,11 @@ function _mail_send_action($data) {
    // end additional code to support event ... original mail_send() code from here

    if(defined('MAILHEADER_ASCIIONLY')){
-        $subject = utf8_deaccent($subject);
-        $subject = utf8_strip($subject);
+        $subject = \dokuwiki\Utf8\Clean::deaccent($subject);
+        $subject = \dokuwiki\Utf8\Clean::strip($subject);
    }

-    if(!utf8_isASCII($subject)) {
+    if(!\dokuwiki\Utf8\Clean::isASCII($subject)) {
        $enc_subj = '=?UTF-8?Q?'.mail_quotedprintable_encode($subject,0).'?=';
        // Spaces must be encoded according to rfc2047. Use the "_" shorthand
        $enc_subj = preg_replace('/ /', '_', $enc_subj);
@ -212,7 +212,7 @@ function mail_encode_address($string,$header='',$names=true){
        }

        // FIXME: is there a way to encode the localpart of a emailaddress?
-        if(!utf8_isASCII($addr)){
+        if(!\dokuwiki\Utf8\Clean::isASCII($addr)){
            msg(hsc("E-Mail address <$addr> is not ASCII"),-1);
            continue;
        }
@ -228,11 +228,11 @@ function mail_encode_address($string,$header='',$names=true){
            $addr = "<$addr>";

            if(defined('MAILHEADER_ASCIIONLY')){
-                $text = utf8_deaccent($text);
-                $text = utf8_strip($text);
+                $text = \dokuwiki\Utf8\Clean::deaccent($text);
+                $text = \dokuwiki\Utf8\Clean::strip($text);
            }

-            if(!utf8_isASCII($text)){
+            if(!\dokuwiki\Utf8\Clean::isASCII($text)){
                // put the quotes outside as in =?UTF-8?Q?"Elan Ruusam=C3=A4e"?= vs "=?UTF-8?Q?Elan Ruusam=C3=A4e?="
                if (preg_match('/^"(.+)"$/', $text, $matches)) {
                    $text = '"=?UTF-8?Q?'.mail_quotedprintable_encode($matches[1], 0).'?="';
--- a/inc/media.php
+++ b/inc/media.php
@ -261,7 +261,7 @@ function media_delete($id,$auth){
    // trigger an event - MEDIA_DELETE_FILE
    $data = array();
    $data['id']   = $id;
-    $data['name'] = utf8_basename($file);
+    $data['name'] = \dokuwiki\Utf8\PhpString::basename($file);
    $data['path'] = $file;
    $data['size'] = (file_exists($file)) ? filesize($file) : 0;

@ -1762,7 +1762,7 @@ function media_printimgdetail($item, $fullscreen=false){
    $d = $item['meta']->getField(array('IPTC.Caption','EXIF.UserComment',
                'EXIF.TIFFImageDescription',
                'EXIF.TIFFUserComment'));
-    if(utf8_strlen($d) > 250) $d = utf8_substr($d,0,250).'...';
+    if(\dokuwiki\Utf8\PhpString::strlen($d) > 250) $d = \dokuwiki\Utf8\PhpString::substr($d,0,250).'...';
    $k = $item['meta']->getField(array('IPTC.Keywords','IPTC.Category','xmp.dc:subject'));

    // print EXIF/IPTC data
--- a/inc/pageutils.php
+++ b/inc/pageutils.php
@ -44,7 +44,7 @@ function getID($param='id',$clean=true){
            if($param != 'id') {
                $relpath = 'lib/exe/';
            }
-            $script = $conf['basedir'].$relpath.utf8_basename($INPUT->server->str('SCRIPT_FILENAME'));
+            $script = $conf['basedir'].$relpath.\dokuwiki\Utf8\PhpString::basename($INPUT->server->str('SCRIPT_FILENAME'));

        }elseif($INPUT->server->str('PATH_INFO')){
            $request = $INPUT->server->str('PATH_INFO');
@ -127,7 +127,7 @@ function cleanID($raw_id,$ascii=false){
        $sepcharpat = '#\\'.$sepchar.'+#';

    $id = trim((string)$raw_id);
-    $id = utf8_strtolower($id);
+    $id = \dokuwiki\Utf8\PhpString::strtolower($id);

    //alternative namespace seperator
    if($conf['useslash']){
@ -136,13 +136,13 @@ function cleanID($raw_id,$ascii=false){
        $id = strtr($id,';/',':'.$sepchar);
    }

-    if($conf['deaccent'] == 2 || $ascii) $id = utf8_romanize($id);
-    if($conf['deaccent'] || $ascii) $id = utf8_deaccent($id,-1);
+    if($conf['deaccent'] == 2 || $ascii) $id = \dokuwiki\Utf8\Clean::romanize($id);
+    if($conf['deaccent'] || $ascii) $id = \dokuwiki\Utf8\Clean::deaccent($id,-1);

    //remove specials
-    $id = utf8_stripspecials($id,$sepchar,'\*');
+    $id = \dokuwiki\Utf8\Clean::stripspecials($id,$sepchar,'\*');

-    if($ascii) $id = utf8_strip($id);
+    if($ascii) $id = \dokuwiki\Utf8\Clean::strip($id);

    //clean up
    $id = preg_replace($sepcharpat,$sepchar,$id);
--- a/inc/parser/code.php
+++ b/inc/parser/code.php
@ -21,8 +21,8 @@ class Doku_Renderer_code extends Doku_Renderer {
        if(!$language) $language = 'txt';
        $language = preg_replace(PREG_PATTERN_VALID_LANGUAGE, '', $language);
        if(!$filename) $filename = 'snippet.'.$language;
-        $filename = utf8_basename($filename);
-        $filename = utf8_stripspecials($filename, '_');
+        $filename = \dokuwiki\Utf8\PhpString::basename($filename);
+        $filename = \dokuwiki\Utf8\Clean::stripspecials($filename, '_');

        // send CRLF to Windows clients
        if(strpos($INPUT->server->str('HTTP_USER_AGENT'), 'Windows') !== false) {
--- a/inc/parser/metadata.php
+++ b/inc/parser/metadata.php
@ -93,7 +93,7 @@ class Doku_Renderer_metadata extends Doku_Renderer
            // cut off too long abstracts
            $this->doc = trim($this->doc);
            if (strlen($this->doc) > self::ABSTRACT_MAX) {
-                $this->doc = utf8_substr($this->doc, 0, self::ABSTRACT_MAX).'…';
+                $this->doc = \dokuwiki\Utf8\PhpString::substr($this->doc, 0, self::ABSTRACT_MAX).'…';
            }
            $this->meta['description']['abstract'] = $this->doc;
        }
--- a/inc/parser/xhtml.php
+++ b/inc/parser/xhtml.php
@ -1639,7 +1639,7 @@ class Doku_Renderer_xhtml extends Doku_Renderer {
                // return the title of the picture
                if(!$title) {
                    // just show the sourcename
-                    $title = $this->_xmlEntities(utf8_basename(noNS($src)));
+                    $title = $this->_xmlEntities(\dokuwiki\Utf8\PhpString::basename(noNS($src)));
                }
                return $title;
            }
@ -1675,7 +1675,7 @@ class Doku_Renderer_xhtml extends Doku_Renderer {
            if(!$render) {
                // if the file is not supposed to be rendered
                // return the title of the file (just the sourcename if there is no title)
-                return $title ? $title : $this->_xmlEntities(utf8_basename(noNS($src)));
+                return $title ? $title : $this->_xmlEntities(\dokuwiki\Utf8\PhpString::basename(noNS($src)));
            }

            $att          = array();
@ -1699,7 +1699,7 @@ class Doku_Renderer_xhtml extends Doku_Renderer {
                // return the title of the flash
                if(!$title) {
                    // just show the sourcename
-                    $title = utf8_basename(noNS($src));
+                    $title = \dokuwiki\Utf8\PhpString::basename(noNS($src));
                }
                return $this->_xmlEntities($title);
            }
@ -1720,7 +1720,7 @@ class Doku_Renderer_xhtml extends Doku_Renderer {
            $ret .= $this->_xmlEntities($title);
        } else {
            // just show the sourcename
-            $ret .= $this->_xmlEntities(utf8_basename(noNS($src)));
+            $ret .= $this->_xmlEntities(\dokuwiki\Utf8\PhpString::basename(noNS($src)));
        }

        return $ret;
@ -1882,7 +1882,7 @@ class Doku_Renderer_xhtml extends Doku_Renderer {
                $url = ml($file, '', true, '&');
                $linkType = 'internalmedia';
            }
-            $title = $atts['title'] ? $atts['title'] : $this->_xmlEntities(utf8_basename(noNS($file)));
+            $title = $atts['title'] ? $atts['title'] : $this->_xmlEntities(\dokuwiki\Utf8\PhpString::basename(noNS($file)));

            $out .= '<source src="'.hsc($url).'" type="'.$mime.'" />'.NL;
            // alternative content (just a link to the file)
@ -1949,7 +1949,7 @@ class Doku_Renderer_xhtml extends Doku_Renderer {
                $url = ml($file, '', true, '&');
                $linkType = 'internalmedia';
            }
-            $title = $atts['title'] ? $atts['title'] : $this->_xmlEntities(utf8_basename(noNS($file)));
+            $title = $atts['title'] ? $atts['title'] : $this->_xmlEntities(\dokuwiki\Utf8\PhpString::basename(noNS($file)));

            $out .= '<source src="'.hsc($url).'" type="'.$mime.'" />'.NL;
            // alternative content (just a link to the file)
--- a/inc/search.php
+++ b/inc/search.php
@ -211,7 +211,7 @@ function search_media(&$data,$base,$file,$type,$lvl,$opts){
        return false;
    }

-    $info['file']     = utf8_basename($file);
+    $info['file']     = \dokuwiki\Utf8\PhpString::basename($file);
    $info['size']     = filesize($base.'/'.$file);
    $info['mtime']    = filemtime($base.'/'.$file);
    $info['writable'] = is_writable($base.'/'.$file);
@ -497,7 +497,7 @@ function search_universal(&$data,$base,$file,$type,$lvl,$opts){
    $item['open']  = $return;

    if(!empty($opts['meta'])){
-        $item['file']       = utf8_basename($file);
+        $item['file']       = \dokuwiki\Utf8\PhpString::basename($file);
        $item['size']       = filesize($base.'/'.$file);
        $item['mtime']      = filemtime($base.'/'.$file);
        $item['rev']        = $item['mtime'];
--- a/inc/utf8.php
+++ b/inc/utf8.php
--- a/lib/exe/js.php
+++ b/lib/exe/js.php
@ -176,7 +176,7 @@ function js_load($file){

        // is it a include_once?
        if($match[1]){
-            $base = utf8_basename($ifile);
+            $base = \dokuwiki\Utf8\PhpString::basename($ifile);
            if(array_key_exists($base, $loaded) && $loaded[$base] === true){
                $data  = str_replace($match[0], '' ,$data);
                continue;
--- a/lib/plugins/authad/auth.php
+++ b/lib/plugins/authad/auth.php
@ -101,7 +101,7 @@ class auth_plugin_authad extends DokuWiki_Auth_Plugin
            // make sure the right encoding is used
            if ($this->getConf('sso_charset')) {
                $_SERVER['REMOTE_USER'] = iconv($this->getConf('sso_charset'), 'UTF-8', $_SERVER['REMOTE_USER']);
-            } elseif (!utf8_check($_SERVER['REMOTE_USER'])) {
+            } elseif (!\dokuwiki\Utf8\Clean::isUtf8($_SERVER['REMOTE_USER'])) {
                $_SERVER['REMOTE_USER'] = utf8_encode($_SERVER['REMOTE_USER']);
            }

@ -297,7 +297,7 @@ class auth_plugin_authad extends DokuWiki_Auth_Plugin
        $group = str_replace('\\', '', $group);
        $group = str_replace('#', '', $group);
        $group = preg_replace('[\s]', '_', $group);
-        $group = utf8_strtolower(trim($group));
+        $group = \dokuwiki\Utf8\PhpString::strtolower(trim($group));
        return $group;
    }

@ -322,8 +322,8 @@ class auth_plugin_authad extends DokuWiki_Auth_Plugin
        if ($dom) $domain = $dom;

        // clean up both
-        $domain = utf8_strtolower(trim($domain));
-        $user   = utf8_strtolower(trim($user));
+        $domain = \dokuwiki\Utf8\PhpString::strtolower(trim($domain));
+        $user   = \dokuwiki\Utf8\PhpString::strtolower(trim($user));

        // is this a known, valid domain? if not discard
        if (!is_array($this->conf[$domain])) {
--- a/lib/plugins/extension/helper/extension.php
+++ b/lib/plugins/extension/helper/extension.php
@ -913,7 +913,7 @@ class helper_plugin_extension_extension extends DokuWiki_Plugin
            if (is_string($content_disposition) &&
                    preg_match('/attachment;\s*filename\s*=\s*"([^"]*)"/i', $content_disposition, $match)) {

-                $name = utf8_basename($match[1]);
+                $name = \dokuwiki\Utf8\PhpString::basename($match[1]);
            }

        }
@ -953,7 +953,7 @@ class helper_plugin_extension_extension extends DokuWiki_Plugin
        if (is_null($file)) {
            $file = md5($url);
        } else {
-            $file = utf8_basename($file);
+            $file = \dokuwiki\Utf8\PhpString::basename($file);
        }

        // create tmp directory for download
--- a/lib/plugins/usermanager/admin.php
+++ b/lib/plugins/usermanager/admin.php
@ -1080,7 +1080,7 @@ class admin_plugin_usermanager extends DokuWiki_Admin_Plugin
        $fd = fopen($_FILES['import']['tmp_name'], 'r');
        if ($fd) {
            while ($csv = fgets($fd)) {
-                if (!utf8_check($csv)) {
+                if (!\dokuwiki\Utf8\Clean::isUtf8($csv)) {
                    $csv = utf8_encode($csv);
                }
                $raw = str_getcsv($csv);