Limiting use of readdir in the idx_indexLengths function (v2).

Each searches on the wiki use this function. Scanning the index directory eachtime is time consuming with a constant series of disk access.
Switching a normal search to use file_exists 1 or more times, and not readdir all the directory.
Switching a wildcard search to use a lengths.idx file containing all the word lengths used in the wiki, file generated if a new configuration parameter $conf[readdircache] is not 0 and fixed to a time in second. Creation of a new function idx_listIndexLengths to do this part.
This commit is contained in:
YoBoY 2010-03-23 22:50:41 +01:00 committed by Andreas Gohr
parent 3371a8b471
commit 229529655f
5 changed files with 189 additions and 20 deletions

View File

@ -0,0 +1,121 @@
<?php
require_once DOKU_INC.'inc/indexer.php';
class indexer_idx_indexlengths_test extends UnitTestCase {
/**
* Test the function with an array of one value
*/
function test_oneWord(){
global $conf;
$filter[8] = array('dokuwiki');
// one word should return the index
$ref[] = 8;
sort($ref);
$result = idx_indexLengths(&$filter);
sort($result);
$this->assertIdentical($result, $ref);
}
/**
* Test the function with an array of values
*/
function test_moreWords() {
global $conf;
$filter = array( 4 => array('test'), 8 => array('dokuwiki'), 7 => array('powered'));
// more words should return the indexes
$ref = array(4, 7, 8);
sort($ref);
$result = idx_indexLengths(&$filter);
sort($result);
$this->assertIdentical($result, $ref);
}
/**
* Test a minimal value in case of wildcard search
*/
function test_minValue() {
global $conf;
$filter = 5;
// construction of the list of the index to compare
$dir = @opendir($conf['indexdir']);
$ref = array();
while (($f = readdir($dir)) !== false) {
if (substr($f,0,1) == 'i' && substr($f,-4) == '.idx'){
$i = substr($f,1,-4);
if (is_numeric($i) && $i >= $filter)
$ref[] = (int)$i;
}
}
closedir($dir);
sort($ref);
$result = idx_indexLengths(&$filter);
sort($result);
$this->assertIdentical($result, $ref);
}
}
class indexer_idx_indexlengths_time extends UnitTestCase {
/**
* Test the time improvments of the new function
* Time reference for 10000 call oneWords: 4,6s
* It's 90% faster
*/
function test_oneWord(){
global $conf;
$filter[8] = array('dokuwiki');
$start = microtime(true);
for ($i = 0; $i < 10000; $i++) {
$result = idx_indexLengths(&$filter);
}
$end = microtime(true);
$time = $end - $start;
$timeref = 4.6*0.10; // actual execution time of 4,6s for 10000 calls
echo "1) 10% ref : $timeref -> $time \n";
$this->assertTrue($time < $timeref);
}
/**
* Test the time improvments of the new function
* Time reference for 10000 call moreWords: 4,6s
* It's 90% faster
*/
function test_moreWords() {
global $conf;
$filter = array( 4 => array('test'), 8 => array('dokuwiki'), 7 => array('powered'));
// more words should return the indexes
$start = microtime(true);
for ($i = 0; $i < 10000; $i++) {
$result = idx_indexLengths(&$filter);
}
$end = microtime(true);
$time = $end - $start;
$timeref = 4.6*0.10; // actual execution time of 4,6s for 10000 calls
echo "2) 10% ref : $timeref -> $time \n";
$this->assertTrue($time < $timeref);
}
/**
* Test the time improvments of the new function
* Time reference for 10000 call on minValue: 4,9s
* Sould be at least 65% faster
* Test fail with no cache
*/
function test_minValue() {
global $conf;
$filter = 5;
$start = microtime(true);
for ($i = 0; $i < 10000; $i++) {
$result = idx_indexLengths(&$filter);
}
$end = microtime(true);
$time = $end - $start;
$timeref = 4.9 * 0.35; // actual execution time of 4,9s for 10000 calls
echo "3) 35% ref : $timeref -> $time \n";
$this->assertTrue($time < $timeref);
}
}
//Setup VIM: ex: et ts=4 enc=utf-8 :

View File

@ -156,3 +156,4 @@ $conf['ftp']['user'] = 'user';
$conf['ftp']['pass'] = 'password';
$conf['ftp']['root'] = '/home/user/htdocs';
$conf['readdircache'] = 0; //time cache in second for the readdir opération, 0 to deactivate.

View File

@ -410,41 +410,86 @@ function idx_updateIndexLine($line,$pid,$count){
return join(':',$updated)."\n";
}
/**
* Get the list of lenghts indexed in the wiki
*
* Read the index directory or a cache file and returns
* a sorted array of lengths of the words used in the wiki.
*
* @author YoBoY <yoboy.leguesh@gmail.com>
*/
function idx_listIndexLengths() {
global $conf;
// testing what we have to do, create a cache file or not.
if ($conf['readdircache'] == 0) {
$docache = false;
} else {
clearstatcache();
if (@file_exists($conf['indexdir'].'/lengths.idx') and (time() < @filemtime($conf['indexdir'].'/lengths.idx') + $conf['readdircache'])) {
if (($lengths = @file($conf['indexdir'].'/lengths.idx', FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES) ) !== false) {
$idx = array();
foreach ( $lengths as $length) {
$idx[] = (int)$length;
}
return $idx;
}
}
$docache = true;
}
if ($conf['readdircache'] == 0 or $docache ) {
$dir = @opendir($conf['indexdir']);
if($dir===false)
return array();
$idx[] = array();
while (($f = readdir($dir)) !== false) {
if (substr($f,0,1) == 'i' && substr($f,-4) == '.idx'){
$i = substr($f,1,-4);
if (is_numeric($i))
$idx[] = (int)$i;
}
}
closedir($dir);
sort($idx);
// we save this in a file.
if ($docache === true) {
$handle = @fopen($conf['indexdir'].'/lengths.idx','w');
@fwrite($handle, implode("\n",$idx));
@fclose($handle);
}
return $idx;
}
return array();
}
/**
* Get the word lengths that have been indexed.
*
* Reads the index directory and returns an array of lengths
* that there are indices for.
*
* @author Tom N Harris <tnharris@whoopdedo.org>
* @author YoBoY <yoboy.leguesh@gmail.com>
*/
function idx_indexLengths(&$filter){
global $conf;
$dir = @opendir($conf['indexdir']);
if($dir===false)
return array();
$idx = array();
if(is_array($filter)){
while (($f = readdir($dir)) !== false) {
if (substr($f,0,1) == 'i' && substr($f,-4) == '.idx'){
$i = substr($f,1,-4);
if (is_numeric($i) && isset($filter[(int)$i]))
$idx[] = (int)$i;
if (is_array($filter)){
// testing if index files exists only
foreach ($filter as $key => $value) {
if (@file_exists($conf['indexdir']."/i$key.idx")) {
$idx[] = $key;
}
}
}else{
// Exact match first.
if(@file_exists($conf['indexdir']."/i$filter.idx"))
$idx[] = $filter;
while (($f = readdir($dir)) !== false) {
if (substr($f,0,1) == 'i' && substr($f,-4) == '.idx'){
$i = substr($f,1,-4);
if (is_numeric($i) && $i > $filter)
$idx[] = (int)$i;
} else {
$lengths = idx_listIndexLengths();
foreach ( $lengths as $key => $length) {
// we keep all the values equal or superior
if ((int)$length >= (int)$filter) {
$idx[] = $length;
}
}
}
closedir($dir);
return $idx;
}

View File

@ -234,3 +234,4 @@ $lang['useheading_o_navigation'] = 'Navigation Only';
$lang['useheading_o_content'] = 'Wiki Content Only';
$lang['useheading_o_1'] = 'Always';
$lang['readdircache'] = 'Maximum age for readdir cache (sec)';

View File

@ -188,6 +188,7 @@ $meta['rss_show_summary'] = array('onoff');
$meta['broken_iua'] = array('onoff');
$meta['xsendfile'] = array('multichoice','_choices' => array(0,1,2,3));
$meta['renderer_xhtml'] = array('renderer','_format' => 'xhtml','_choices' => array('xhtml'));
$meta['readdircache'] = array('numeric');
$meta['_network'] = array('fieldset');
$meta['proxy____host'] = array('string','_pattern' => '#^(|[a-z0-9\-\.+]+)$#i');