utf8 replacements for strpos and strlen

darcs-hash:20050123155239-9977f-2ddc1e19ccf48579c71382e8933166a86ee750a4.gz
This commit is contained in:
andi 2005-01-23 16:52:39 +01:00
parent f29bd55326
commit 2f95495925
1 changed files with 52 additions and 0 deletions

View File

@ -52,6 +52,24 @@ function utf8_check($Str) {
return true;
}
/**
* This is a unicode aware replacement for strlen()
*
* Uses mb_string extension if available
*
* @author Andreas Gohr <andi@splitbrain.org>
* @see strlen()
*/
function utf8_strlen($string){
if(!defined('UTF8_NOMBSTRING') && function_exists('mb_strlen'))
return mb_strlen($string,'utf-8');
$uni = utf8_to_unicode($string);
return count($uni);
}
/**
* This is a unicode aware replacement for strtolower()
*
@ -118,6 +136,40 @@ function utf8_deaccent($string,$case=0){
return $string;
}
/**
* This is an Unicode aware replacement for strpos
*
* Uses mb_string extension if available
*
* @author Scott Michael Reynen <scott@randomchaos.com>
* @author Andreas Gohr <andi@splitbrain.org>
* @link http://www.randomchaos.com/document.php?source=php_and_unicode
* @see strpos()
*/
function utf8_strpos($haystack, $needle,$offset=0) {
if(!defined('UTF8_NOMBSTRING') && function_exists('mb_strpos'))
return mb_strpos($haystack,$needle,$offset,'utf-8');
$haystack = utf8_to_unicode($haystack);
$needle = utf8_to_unicode($needle);
$position = $offset;
$found = false;
while( (! $found ) && ( $position < count( $haystack ) ) ) {
if ( $needle[0] == $haystack[$position] ) {
for ($i = 1; $i < count( $needle ); $i++ ) {
if ( $needle[$i] != $haystack[ $position + $i ] ) break;
}
if ( $i == count( $needle ) ) {
$found = true;
$position--;
}
}
$position++;
}
return ( $found == true ) ? $position : false;
}
/**
* This function will any UTF-8 encoded text and return it as
* a list of Unicode values: