utf8 compatible fulltext search

darcs-hash:20050123165100-9977f-697d62ef2daa33eaf167e59754bb6f9444a06135.gz
This commit is contained in:
andi 2005-01-23 17:51:00 +01:00
parent 7077c942d0
commit d5a2a500a5
2 changed files with 8 additions and 6 deletions

View File

@ -536,7 +536,7 @@ function html_search(){
//do quick pagesearch
$data = array();
search($data,$conf['datadir'],'search_pagename',array(query => $QUERY));
search($data,$conf['datadir'],'search_pagename',array(query => cleanID($QUERY)));
if(count($data)){
sort($data);
print '<div class="search_quickresult">';
@ -554,7 +554,7 @@ function html_search(){
//do fulltext search
$data = array();
search($data,$conf['datadir'],'search_fulltext',array(query => $QUERY));
search($data,$conf['datadir'],'search_fulltext',array(query => utf8_strtolower($QUERY)));
if(count($data)){
usort($data,'sort_search_fulltext');
foreach($data as $row){

View File

@ -269,6 +269,8 @@ function search_fulltext(&$data,$base,$file,$type,$lvl,$opts){
//get text
$text = io_readfile($base.'/'.$file);
//lowercase text (u modifier does not help with case)
$lctext = utf8_strtolower($text);
//create regexp from queries
$qpreg = preg_split('/\s+/',preg_quote($opts['query'],'#'));
@ -276,17 +278,17 @@ function search_fulltext(&$data,$base,$file,$type,$lvl,$opts){
//do the fulltext search
$matches = array();
if($cnt = preg_match_all('#'.$qpreg.'#si',$text,$matches)){
if($cnt = preg_match_all('#'.$qpreg.'#usi',$lctext,$matches)){
//this is not the best way for snippet generation but the fastest I could find
//split query and only use the first token
$q = preg_split('/\s+/',$opts['query'],2);
$q = $q[0];
$p = strpos(strtolower($text),$q);
$p = utf8_strpos($lctext,$q);
$f = $p - 100;
$l = strlen($q) + 200;
$l = utf8_strlen($q) + 200;
if($f < 0) $f = 0;
$snippet = '<span class="search_sep"> ... </span>'.
htmlspecialchars(substr($text,$f,$l)).
htmlspecialchars(utf8_substr($text,$f,$l)).
'<span class="search_sep"> ... </span>';
$snippet = preg_replace('#'.$qpreg.'#si','<span class="search_hit">\\1</span>',$snippet);