Limiting use of readdir in the idx_indexLengths function (v2).
Each searches on the wiki use this function. Scanning the index directory eachtime is time consuming with a constant series of disk access. Switching a normal search to use file_exists 1 or more times, and not readdir all the directory. Switching a wildcard search to use a lengths.idx file containing all the word lengths used in the wiki, file generated if a new configuration parameter $conf[readdircache] is not 0 and fixed to a time in second. Creation of a new function idx_listIndexLengths to do this part.
This commit is contained in:
parent
3371a8b471
commit
229529655f
|
@ -0,0 +1,121 @@
|
|||
<?php
|
||||
|
||||
require_once DOKU_INC.'inc/indexer.php';
|
||||
|
||||
class indexer_idx_indexlengths_test extends UnitTestCase {
|
||||
|
||||
/**
|
||||
* Test the function with an array of one value
|
||||
*/
|
||||
function test_oneWord(){
|
||||
global $conf;
|
||||
$filter[8] = array('dokuwiki');
|
||||
// one word should return the index
|
||||
$ref[] = 8;
|
||||
sort($ref);
|
||||
$result = idx_indexLengths(&$filter);
|
||||
sort($result);
|
||||
$this->assertIdentical($result, $ref);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test the function with an array of values
|
||||
*/
|
||||
function test_moreWords() {
|
||||
global $conf;
|
||||
$filter = array( 4 => array('test'), 8 => array('dokuwiki'), 7 => array('powered'));
|
||||
// more words should return the indexes
|
||||
$ref = array(4, 7, 8);
|
||||
sort($ref);
|
||||
$result = idx_indexLengths(&$filter);
|
||||
sort($result);
|
||||
$this->assertIdentical($result, $ref);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test a minimal value in case of wildcard search
|
||||
*/
|
||||
function test_minValue() {
|
||||
global $conf;
|
||||
$filter = 5;
|
||||
// construction of the list of the index to compare
|
||||
$dir = @opendir($conf['indexdir']);
|
||||
$ref = array();
|
||||
while (($f = readdir($dir)) !== false) {
|
||||
if (substr($f,0,1) == 'i' && substr($f,-4) == '.idx'){
|
||||
$i = substr($f,1,-4);
|
||||
if (is_numeric($i) && $i >= $filter)
|
||||
$ref[] = (int)$i;
|
||||
}
|
||||
}
|
||||
closedir($dir);
|
||||
sort($ref);
|
||||
$result = idx_indexLengths(&$filter);
|
||||
sort($result);
|
||||
$this->assertIdentical($result, $ref);
|
||||
}
|
||||
}
|
||||
|
||||
class indexer_idx_indexlengths_time extends UnitTestCase {
|
||||
|
||||
/**
|
||||
* Test the time improvments of the new function
|
||||
* Time reference for 10000 call oneWords: 4,6s
|
||||
* It's 90% faster
|
||||
*/
|
||||
function test_oneWord(){
|
||||
global $conf;
|
||||
$filter[8] = array('dokuwiki');
|
||||
$start = microtime(true);
|
||||
for ($i = 0; $i < 10000; $i++) {
|
||||
$result = idx_indexLengths(&$filter);
|
||||
}
|
||||
$end = microtime(true);
|
||||
$time = $end - $start;
|
||||
$timeref = 4.6*0.10; // actual execution time of 4,6s for 10000 calls
|
||||
echo "1) 10% ref : $timeref -> $time \n";
|
||||
$this->assertTrue($time < $timeref);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test the time improvments of the new function
|
||||
* Time reference for 10000 call moreWords: 4,6s
|
||||
* It's 90% faster
|
||||
*/
|
||||
function test_moreWords() {
|
||||
global $conf;
|
||||
$filter = array( 4 => array('test'), 8 => array('dokuwiki'), 7 => array('powered'));
|
||||
// more words should return the indexes
|
||||
$start = microtime(true);
|
||||
for ($i = 0; $i < 10000; $i++) {
|
||||
$result = idx_indexLengths(&$filter);
|
||||
}
|
||||
$end = microtime(true);
|
||||
$time = $end - $start;
|
||||
$timeref = 4.6*0.10; // actual execution time of 4,6s for 10000 calls
|
||||
echo "2) 10% ref : $timeref -> $time \n";
|
||||
$this->assertTrue($time < $timeref);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test the time improvments of the new function
|
||||
* Time reference for 10000 call on minValue: 4,9s
|
||||
* Sould be at least 65% faster
|
||||
* Test fail with no cache
|
||||
*/
|
||||
function test_minValue() {
|
||||
global $conf;
|
||||
$filter = 5;
|
||||
$start = microtime(true);
|
||||
for ($i = 0; $i < 10000; $i++) {
|
||||
$result = idx_indexLengths(&$filter);
|
||||
}
|
||||
$end = microtime(true);
|
||||
$time = $end - $start;
|
||||
$timeref = 4.9 * 0.35; // actual execution time of 4,9s for 10000 calls
|
||||
echo "3) 35% ref : $timeref -> $time \n";
|
||||
$this->assertTrue($time < $timeref);
|
||||
}
|
||||
}
|
||||
|
||||
//Setup VIM: ex: et ts=4 enc=utf-8 :
|
|
@ -156,3 +156,4 @@ $conf['ftp']['user'] = 'user';
|
|||
$conf['ftp']['pass'] = 'password';
|
||||
$conf['ftp']['root'] = '/home/user/htdocs';
|
||||
|
||||
$conf['readdircache'] = 0; //time cache in second for the readdir opération, 0 to deactivate.
|
||||
|
|
|
@ -410,41 +410,86 @@ function idx_updateIndexLine($line,$pid,$count){
|
|||
return join(':',$updated)."\n";
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the list of lenghts indexed in the wiki
|
||||
*
|
||||
* Read the index directory or a cache file and returns
|
||||
* a sorted array of lengths of the words used in the wiki.
|
||||
*
|
||||
* @author YoBoY <yoboy.leguesh@gmail.com>
|
||||
*/
|
||||
function idx_listIndexLengths() {
|
||||
global $conf;
|
||||
// testing what we have to do, create a cache file or not.
|
||||
if ($conf['readdircache'] == 0) {
|
||||
$docache = false;
|
||||
} else {
|
||||
clearstatcache();
|
||||
if (@file_exists($conf['indexdir'].'/lengths.idx') and (time() < @filemtime($conf['indexdir'].'/lengths.idx') + $conf['readdircache'])) {
|
||||
if (($lengths = @file($conf['indexdir'].'/lengths.idx', FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES) ) !== false) {
|
||||
$idx = array();
|
||||
foreach ( $lengths as $length) {
|
||||
$idx[] = (int)$length;
|
||||
}
|
||||
return $idx;
|
||||
}
|
||||
}
|
||||
$docache = true;
|
||||
}
|
||||
|
||||
if ($conf['readdircache'] == 0 or $docache ) {
|
||||
$dir = @opendir($conf['indexdir']);
|
||||
if($dir===false)
|
||||
return array();
|
||||
$idx[] = array();
|
||||
while (($f = readdir($dir)) !== false) {
|
||||
if (substr($f,0,1) == 'i' && substr($f,-4) == '.idx'){
|
||||
$i = substr($f,1,-4);
|
||||
if (is_numeric($i))
|
||||
$idx[] = (int)$i;
|
||||
}
|
||||
}
|
||||
closedir($dir);
|
||||
sort($idx);
|
||||
// we save this in a file.
|
||||
if ($docache === true) {
|
||||
$handle = @fopen($conf['indexdir'].'/lengths.idx','w');
|
||||
@fwrite($handle, implode("\n",$idx));
|
||||
@fclose($handle);
|
||||
}
|
||||
return $idx;
|
||||
}
|
||||
|
||||
return array();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the word lengths that have been indexed.
|
||||
*
|
||||
* Reads the index directory and returns an array of lengths
|
||||
* that there are indices for.
|
||||
*
|
||||
* @author Tom N Harris <tnharris@whoopdedo.org>
|
||||
* @author YoBoY <yoboy.leguesh@gmail.com>
|
||||
*/
|
||||
function idx_indexLengths(&$filter){
|
||||
global $conf;
|
||||
$dir = @opendir($conf['indexdir']);
|
||||
if($dir===false)
|
||||
return array();
|
||||
$idx = array();
|
||||
if(is_array($filter)){
|
||||
while (($f = readdir($dir)) !== false) {
|
||||
if (substr($f,0,1) == 'i' && substr($f,-4) == '.idx'){
|
||||
$i = substr($f,1,-4);
|
||||
if (is_numeric($i) && isset($filter[(int)$i]))
|
||||
$idx[] = (int)$i;
|
||||
if (is_array($filter)){
|
||||
// testing if index files exists only
|
||||
foreach ($filter as $key => $value) {
|
||||
if (@file_exists($conf['indexdir']."/i$key.idx")) {
|
||||
$idx[] = $key;
|
||||
}
|
||||
}
|
||||
}else{
|
||||
// Exact match first.
|
||||
if(@file_exists($conf['indexdir']."/i$filter.idx"))
|
||||
$idx[] = $filter;
|
||||
while (($f = readdir($dir)) !== false) {
|
||||
if (substr($f,0,1) == 'i' && substr($f,-4) == '.idx'){
|
||||
$i = substr($f,1,-4);
|
||||
if (is_numeric($i) && $i > $filter)
|
||||
$idx[] = (int)$i;
|
||||
} else {
|
||||
$lengths = idx_listIndexLengths();
|
||||
foreach ( $lengths as $key => $length) {
|
||||
// we keep all the values equal or superior
|
||||
if ((int)$length >= (int)$filter) {
|
||||
$idx[] = $length;
|
||||
}
|
||||
}
|
||||
}
|
||||
closedir($dir);
|
||||
return $idx;
|
||||
}
|
||||
|
||||
|
|
|
@ -234,3 +234,4 @@ $lang['useheading_o_navigation'] = 'Navigation Only';
|
|||
$lang['useheading_o_content'] = 'Wiki Content Only';
|
||||
$lang['useheading_o_1'] = 'Always';
|
||||
|
||||
$lang['readdircache'] = 'Maximum age for readdir cache (sec)';
|
||||
|
|
|
@ -188,6 +188,7 @@ $meta['rss_show_summary'] = array('onoff');
|
|||
$meta['broken_iua'] = array('onoff');
|
||||
$meta['xsendfile'] = array('multichoice','_choices' => array(0,1,2,3));
|
||||
$meta['renderer_xhtml'] = array('renderer','_format' => 'xhtml','_choices' => array('xhtml'));
|
||||
$meta['readdircache'] = array('numeric');
|
||||
|
||||
$meta['_network'] = array('fieldset');
|
||||
$meta['proxy____host'] = array('string','_pattern' => '#^(|[a-z0-9\-\.+]+)$#i');
|
||||
|
|
Loading…
Reference in New Issue