Index: add.php =================================================================== RCS file: /cvsroot/moodle/moodle/search/add.php,v retrieving revision 1.5.2.3 diff -r1.5.2.3 add.php 2,22c2,28 < /** < * Global Search Engine for Moodle < * < * @package search < * @category core < * @subpackage search_engine < * @author Michael Champanis (mchampan) [cynnical@gmail.com], Valery Fremaux [valery.fremaux@club-internet.fr] > 1.8 < * @date 2008/03/31 < * @license http://www.gnu.org/copyleft/gpl.html GNU Public License < * < * Asynchronous adder for new indexable contents < * < * Major chages in this review is passing the xxxx_db_names return to < * multiple arity to handle multiple document types modules < */ < < /** < * includes and requires < */ < require_once('../config.php'); < require_once("$CFG->dirroot/search/lib.php"); --- > /** > * Global Search Engine for Moodle > * > * @package search > * @category core > * @subpackage search_engine > * @author Michael Champanis (mchampan) [cynnical@gmail.com], Valery Fremaux [valery.fremaux@club-internet.fr] > 1.8 > * @date 2008/03/31 > * @license http://www.gnu.org/copyleft/gpl.html GNU Public License > * > * Asynchronous adder for new indexable contents > * > * Major chages in this review is passing the xxxx_db_names return to > * multiple arity to handle multiple document types modules > */ > > /** > * includes and requires > */ > require_once('../config.php'); > > /// makes inclusions of the Zend Engine more reliable > $separator = (array_key_exists('WINDIR', $_SERVER)) ? ';' : ':' ; > ini_set('include_path', $CFG->dirroot.'/search'.$separator.ini_get('include_path')); > > require_once("$CFG->dirroot/search/lib.php"); > require_once("$CFG->dirroot/search/indexlib.php"); 32c38 < if (!isadmin()) { --- > if (!has_capability('moodle/site:doanything', get_context_instance(CONTEXT_SYSTEM))) { 38,46c44,49 < if (!search_check_php5()) { < $phpversion = phpversion(); < mtrace("Sorry, global search requires PHP 5.0.0 or later (currently using version ".phpversion().")"); < exit(0); < } < < require_once("$CFG->dirroot/search/indexlib.php"); < < $index = new Zend_Search_Lucene(SEARCH_INDEX_PATH); --- > try { > $index = new Zend_Search_Lucene(SEARCH_INDEX_PATH); > } catch(LuceneException $e) { > mtrace("Could not construct a valid index. Maybe the first indexation was never made, or files might be corrupted. Run complete indexation again."); > return; > } 91c94 < doctype = '{$mod->name}' --- > doctype = ? 94c97 < $docIds = get_records_sql_menu($query); --- > $docIds = $DB->get_records_sql_menu($query, array($mod->name)); 158c161 < ?> --- > ?> \ No newline at end of file Index: cron.php =================================================================== RCS file: /cvsroot/moodle/moodle/search/cron.php,v retrieving revision 1.10.2.2 diff -r1.10.2.2 cron.php 21,25c21 < if (!search_check_php5()) { < $phpversion = phpversion(); < mtrace("Sorry, cannot cron global search as it requires PHP 5.0.0 or later (currently using version $phpversion)"); < } < else if (empty($CFG->enableglobalsearch)) { --- > if (empty($CFG->enableglobalsearch)) { Index: delete.php =================================================================== RCS file: /cvsroot/moodle/moodle/search/delete.php,v retrieving revision 1.6.2.3 diff -r1.6.2.3 delete.php 2,22c2,25 < /** < * Global Search Engine for Moodle < * < * @package search < * @category core < * @subpackage search_engine < * @author Michael Champanis (mchampan) [cynnical@gmail.com], Valery Fremaux [valery.fremaux@club-internet.fr] > 1.8 < * @date 2008/03/31 < * @license http://www.gnu.org/copyleft/gpl.html GNU Public License < * < * Asynchronous index cleaner < * < * Major chages in this review is passing the xxxx_db_names return to < * multiple arity to handle multiple document types modules < */ < < /** < * includes and requires < */ < require_once('../config.php'); < require_once("$CFG->dirroot/search/lib.php"); --- > /** > * Global Search Engine for Moodle > * > * @package search > * @category core > * @subpackage search_engine > * @author Michael Champanis (mchampan) [cynnical@gmail.com], Valery Fremaux [valery.fremaux@club-internet.fr] > 1.8 > * @date 2008/03/31 > * @license http://www.gnu.org/copyleft/gpl.html GNU Public License > * > * Asynchronous index cleaner > * > * Major chages in this review is passing the xxxx_db_names return to > * multiple arity to handle multiple document types modules > */ > > /** > * includes and requires > */ > require_once('../config.php'); > > /// makes inclusions of the Zend Engine more reliable > $separator = (array_key_exists('WINDIR', $_SERVER)) ? ';' : ':' ; > ini_set('include_path', $CFG->dirroot.'/search'.$separator.ini_get('include_path')); 23a27,31 > require_once("$CFG->dirroot/search/lib.php"); > require_once("$CFG->dirroot/search/indexlib.php"); > > > /// checks global search activation 26c34 < --- > 31c39 < if (!isadmin()) { --- > if (!has_capability('moodle/site:doanything', get_context_instance(CONTEXT_SYSTEM))) { 35,39c43,47 < //check for php5 (lib.php) < if (!search_check_php5()) { < $phpversion = phpversion(); < mtrace("Sorry, global search requires PHP 5.0.0 or later (currently using version ".phpversion().")"); < exit(0); --- > try { > $index = new Zend_Search_Lucene(SEARCH_INDEX_PATH); > } catch(LuceneException $e) { > mtrace("Could not construct a valid index. Maybe the first indexation was never made, or files might be corrupted. Run complete indexation again."); > return; 41,44d48 < < require_once("$CFG->dirroot/search/indexlib.php"); < < $index = new Zend_Search_Lucene(SEARCH_INDEX_PATH); 51a56 > /// check all modules 142c147 < ?> --- > ?> \ No newline at end of file Index: indexer.php =================================================================== RCS file: /cvsroot/moodle/moodle/search/indexer.php,v retrieving revision 1.13.2.4 diff -r1.13.2.4 indexer.php 37a38,41 > //require_once("debugging.php"); > > $separator = (array_key_exists('WINDIR', $_SERVER)) ? ';' : ':' ; > ini_set('include_path', $CFG->dirroot.'\search'.$separator.ini_get('include_path')); 47c51 < if (!isadmin()) { --- > if (!has_capability('moodle/site:doanything', get_context_instance(CONTEXT_SYSTEM))) { 64,69d67 < if (!search_check_php5()) { < $phpversion = phpversion(); < mtrace("Sorry, global search requires PHP 5.0.0 or later (currently using version ".phpversion().")"); < exit(0); < } < 97c95,96 < } else { --- > } > else { 100c99,100 < } else { --- > } > else { 103a104 > Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8()); 127,129d127 < $searchables = array(); < < /// collects modules 131,146c129,131 < if ($mods = get_records('modules', '', '', '', 'id,name')) { < $searchables = array_merge($searchables, $mods); < } < mtrace(count($searchables).' modules found.'); < < // collects blocks as indexable information may be found in blocks either < if ($blocks = get_records('block', '', '', '', 'id,name')) { < // prepend the "block_" prefix to discriminate document type plugins < foreach(array_keys($blocks) as $aBlockId){ < $blocks[$aBlockId]->name = 'block_'.$blocks[$aBlockId]->name; < } < $searchables = array_merge($searchables, $blocks); < mtrace(count($blocks).' blocks found.'); < } < < /// add virtual modules onto the back of the array --- > $searchables = search_collect_searchables(); > > /// start indexation 148d132 < $searchables = array_merge($searchables, search_get_additional_modules()); 151c135,158 < $class_file = $CFG->dirroot.'/search/documents/'.$mod->name.'_document.php'; --- > > $key = 'search_in_'.$mod->name; > if (isset($CFG->$key) && !$CFG->$key) { > mtrace("module $key has been administratively disabled. Skipping...\n"); > continue; > } > > if ($mod->location == 'internal'){ > $class_file = $CFG->dirroot.'/search/documents/'.$mod->name.'_document.php'; > } else { > $class_file = $CFG->dirroot.'/'.$mod->location.'/'.$mod->name.'/search_document.php'; > } > > /* > if (!file_exists($class_file)){ > if (defined("PATH_FOR_SEARCH_TYPE_{$mod->name}")){ > eval("\$pluginpath = PATH_FOR_SEARCH_TYPE_{$mod->name}"); > $class_file = "{$CFG->dirroot}/{$pluginpath}/searchlib.php"; > } else { > mtrace ("No search document found for plugin {$mod->name}. Ignoring."); > continue; > } > } > */ 197a205,206 > } else { > mtrace ("No search document found for plugin {$mod->name}. Ignoring."); Index: indexersplash.php =================================================================== RCS file: /cvsroot/moodle/moodle/search/indexersplash.php,v retrieving revision 1.9.2.4 diff -r1.9.2.4 indexersplash.php 21c21,25 < require_once("$CFG->dirroot/search/lib.php"); --- > require_once("{$CFG->dirroot}/search/lib.php"); > > /// makes inclusions of the Zend Engine more reliable > $separator = (array_key_exists('WINDIR', $_SERVER)) ? ';' : ':' ; > ini_set('include_path', $CFG->dirroot.'\search'.$separator.ini_get('include_path')); 31c35 < if (!isadmin()) { --- > if (!has_capability('moodle/site:doanything', get_context_instance(CONTEXT_SYSTEM))) { 35,42d38 < /// check for php5 (lib.php) < < if (!search_check_php5()) { < $phpversion = phpversion(); < mtrace("Sorry, global search requires PHP 5.0.0 or later (currently using version ".phpversion().")"); < exit(0); < } < Index: indexlib.php =================================================================== RCS file: /cvsroot/moodle/moodle/search/indexlib.php,v retrieving revision 1.7.2.3 diff -r1.7.2.3 indexlib.php 81c81,82 < $types = search_get_document_types(); --- > // $types = search_get_document_types(); > $types = search_collect_searchables(true, false); Index: lib.php =================================================================== RCS file: /cvsroot/moodle/moodle/search/lib.php,v retrieving revision 1.13.2.2 diff -r1.13.2.2 lib.php 20a21 > function search_collect_searchables($namelist=false, $verbose=true){ 21a23 > function search_get_document_types($prefix = 'X_SEARCH_TYPE_') { 25d26 < function search_check_php5($feedback = false) { 33,50c34,35 < //document types that can be searched < //define('SEARCH_TYPE_NONE', 'none'); < define('SEARCH_TYPE_WIKI', 'wiki'); < define('PATH_FOR_SEARCH_TYPE_WIKI', 'mod/wiki'); < define('SEARCH_TYPE_FORUM', 'forum'); < define('PATH_FOR_SEARCH_TYPE_FORUM', 'mod/forum'); < define('SEARCH_TYPE_GLOSSARY', 'glossary'); < define('PATH_FOR_SEARCH_TYPE_GLOSSARY', 'mod/glossary'); < define('SEARCH_TYPE_RESOURCE', 'resource'); < define('PATH_FOR_SEARCH_TYPE_RESOURCE', 'mod/resource'); < define('SEARCH_TYPE_TECHPROJECT', 'techproject'); < define('PATH_FOR_SEARCH_TYPE_TECHPROJECT', 'mod/techproject'); < define('SEARCH_TYPE_DATA', 'data'); < define('PATH_FOR_SEARCH_TYPE_DATA', 'mod/data'); < define('SEARCH_TYPE_CHAT', 'chat'); < define('PATH_FOR_SEARCH_TYPE_CHAT', 'mod/chat'); < define('SEARCH_TYPE_LESSON', 'lesson'); < define('PATH_FOR_SEARCH_TYPE_LESSON', 'mod/lesson'); --- > // get document types > include "{$CFG->dirroot}/search/searchtypes.php"; 53c38,109 < * returns all the document type constants --- > * collects all searchable items identities > * @param boolean $namelist if true, only returns list of names of searchable items > * @param boolean $verbose if true, prints a discovering status > * @return an array of names or an array of type descriptors > */ > function search_collect_searchables($namelist=false, $verbose=true){ > global $CFG; > > $searchables = array(); > $searchables_names = array(); > > /// get all installed modules > if ($mods = get_records('modules', '', '', 'name', 'id,name')){ > > $searchabletypes = array_values(search_get_document_types()); > > foreach($mods as $mod){ > if (in_array($mod->name, $searchabletypes)){ > $mod->location = 'internal'; > $searchables[$mod->name] = $mod; > $searchables_names[] = $mod->name; > } else { > $documentfile = $CFG->dirroot."/mod/{$mod->name}/search_document.php"; > $mod->location = 'mod'; > if (file_exists($documentfile)){ > $searchables[$mod->name] = $mod; > $searchables_names[] = $mod->name; > } > } > } > if ($verbose) mtrace(count($searchables).' modules to search in / '.count($mods).' modules found.'); > } > > /// collects blocks as indexable information may be found in blocks either > if ($blocks = get_records('block', '', '', 'name', 'id,name')) { > $blocks_searchables = array(); > // prepend the "block_" prefix to discriminate document type plugins > foreach($blocks as $block){ > $block->dirname = $block->name; > $block->name = 'block_'.$block->name; > if (in_array('SEARCH_TYPE_'.strtoupper($block->name), $searchabletypes)){ > $mod->location = 'internal'; > $blocks_searchables[] = $block; > $searchables_names[] = $block->name; > } else { > $documentfile = $CFG->dirroot."/blocks/{$block->dirname}/search_document.php"; > if (file_exists($documentfile)){ > $mod->location = 'blocks'; > $blocks_searchables[$block->name] = $block; > $searchables_names[] = $block->name; > } > } > } > if ($verbose) mtrace(count($blocks_searchables).' blocks to search in / '.count($blocks).' blocks found.'); > $searchables = array_merge($searchables, $blocks_searchables); > } > > /// add virtual modules onto the back of the array > > $additional = search_get_additional_modules(); > if (!empty($additional)){ > if ($verbose) mtrace(count($additional).' additional to search in.'); > $searchables = array_merge($searchables, $additional); > } > > if ($namelist) > return $searchables_names; > return $searchables; > } > > /** > * returns all the document type constants that are known in core implementation 66c122 < } //search_get_document_types --- > } 77a134,137 > if (defined('SEARCH_EXTRAS')){ > $extras = explode(',', SEARCH_EXTRAS); > } > 81c141,142 < $ret[] = clone($temp); --- > $temp->location = 'internal'; > $ret[$temp->name] = clone($temp); 117,133d177 < * get a real php 5 version number, using 5.0.0 arbitrarily < * @param feedback if true, prints a feedback message to output. < * @return true if version of PHP is high enough < */ < function search_check_php5($feedback = false) { < if (!check_php_version("5.0.0")) { < if ($feedback) { < print_heading(get_string('versiontoolow', 'search')); < } < return false; < } < else { < return true; < } < } //search_check_php5 < < /** Index: query.php =================================================================== RCS file: /cvsroot/moodle/moodle/search/query.php,v retrieving revision 1.16.2.5 diff -r1.16.2.5 query.php 53d52 < if ($check = search_check_php5()) { 59a59,67 > > /** > * discard harmfull searches > */ > if (preg_match("/^[\*\?]+$/", $query_string)){ > $query_string = ''; > $error = get_string('fullwildcardquery','search'); > } > 64,65c72 < } < else if ($advanced) { --- > } elseif ($advanced) { 88c95 < // get all available module types --- > // get all available module types adding third party modules 89a97 > $module_types = array_merge($module_types, array_values(search_get_document_types('X_SEARCH_TYPE'))); 136c144,145 < //run the query against the index --- > //run the query against the index ensuring internal coding works in UTF-8 > Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8()); 138d146 < } 159,164c167,169 < //keep things pretty, even if php5 isn't available < if (!$check) { < print_heading(search_check_php5(true)); < print_footer(); < exit(0); < } --- > if (!empty($error)){ > notice ($error); > } 181d185 < 290c294 < if (!$sq->is_valid_index() and isadmin()) { --- > if (!$sq->is_valid_index() and has_capability('moodle/site:doanything', get_context_instance(CONTEXT_SYSTEM))) { 299c303,304 < // prints all the results in a box --- > /// prints all the results in a box > 326a332,334 > > $searchables = search_collect_searchables(false, false); > 327a336,339 > > $iconpath = $CFG->modpixpath.'/'.$listing->doctype.'/icon.gif'; > $coursename = get_field('course', 'fullname', 'id', $listing->courseid); > $courseword = mb_convert_case(get_string('course', 'moodle'), MB_CASE_LOWER, 'UTF-8'); 332c344,349 < require_once "{$CFG->dirroot}/search/documents/{$listing->doctype}_document.php"; --- > $searchable_instance = $searchables[$listing->doctype]; > if ($searchable_instance->location == 'internal'){ > require_once "{$CFG->dirroot}/search/documents/{$listing->doctype}_document.php"; > } else { > require_once "{$CFG->dirroot}/{$searchable_instance->location}/{$listing->doctype}/search_document.php"; > } 337,339c354,360 < print "
  • url)."'>$listing->title
    \n" < ."".search_shorten_url($listing->url, 70)."
    \n" < ."{$typestr}: ".$listing->doctype.", {$scorestr}: ".round($listing->score, 3); --- > echo "
  • url) > ."'> ."\" class=\"activityicon\" alt=\"\"/> $listing->title (".$courseword.": '".$coursename."')
    \n"; > // print "
  • url)."'>$listing->title
    \n" > // ."".search_shorten_url($listing->url, 70)."
    \n" > echo "{$typestr}: " . $listing->doctype . ", {$scorestr}: " . round($listing->score, 3); 341c362 < print ", {$authorstr}: ".$listing->author."\n" --- > echo ", {$authorstr}: ".$listing->author."\n" 345,346c366,367 < print ""; < print $page_links; --- > echo ""; > echo $page_links; Index: querylib.php =================================================================== RCS file: /cvsroot/moodle/moodle/search/querylib.php,v retrieving revision 1.7.2.3 diff -r1.7.2.3 querylib.php 29c29,30 < $number; --- > $number, > $courseid; 223c224,225 < --- > $page = optional_param('page', 1, PARAM_INT); > 236c238,242 < $totalpages = ceil($hitcount/$this->results_per_page); --- > $resultdoc = new SearchResult(); > $resultdocs = array(); > $searchables = search_collect_searchables(false, false); > > $realindex = 0; 237a244 > /** 239c246 < if ($hitcount < $this->results_per_page) { --- > if ($finalresults < $this->results_per_page) { 241c248 < } else if ($this->pagenumber > $totalpages) { --- > } elseif ($this->pagenumber > $totalpages) { 248,249c255,256 < if ($end > $hitcount) { < $end = $hitcount; --- > if ($end > $finalresults) { > $end = $finalresults; 253,257c260,261 < $end = $hitcount; < } < < $resultdoc = new SearchResult(); < $resultdocs = array(); --- > $end = $finalresults; > } */ 259c263 < for ($i = $start; $i < $end; $i++) { --- > for ($i = 0; $i < min($hitcount, ($page) * $this->results_per_page); $i++) { 263,272c267,280 < if ($this->can_display($USER, $hit->docid, $hit->doctype, $hit->course_id, $hit->group_id, $hit->path, $hit->itemtype, $hit->context_id )) { < $resultdoc->number = $i; < $resultdoc->url = $hit->url; < $resultdoc->title = $hit->title; < $resultdoc->score = $hit->score; < $resultdoc->doctype = $hit->doctype; < $resultdoc->author = $hit->author; < < //and store it < $resultdocs[] = clone($resultdoc); --- > if ($this->can_display($USER, $hit->docid, $hit->doctype, $hit->course_id, $hit->group_id, $hit->path, $hit->itemtype, $hit->context_id, $searchables )) { > if ($i >= ($page - 1) * $this->results_per_page){ > $resultdoc->number = $realindex; > $resultdoc->url = $hit->url; > $resultdoc->title = $hit->title; > $resultdoc->score = $hit->score; > $resultdoc->doctype = $hit->doctype; > $resultdoc->author = $hit->author; > $resultdoc->courseid = $hit->course_id; > > //and store it > $resultdocs[] = clone($resultdoc); > } > $realindex++; 278a287,289 > $totalpages = ceil($this->total_results/$this->results_per_page); > > 301c312 < //print "Caching disabled!"; --- > // print "Caching disabled!"; 373c384 < private function can_display(&$user, $this_id, $doctype, $course_id, $group_id, $path, $item_type, $context_id) { --- > private function can_display(&$user, $this_id, $doctype, $course_id, $group_id, $path, $item_type, $context_id, &$searchables) { 380c391 < if (isadmin()){ --- > if (has_capability('moodle/site:doanything', get_context_instance(CONTEXT_SYSTEM))){ 384,420c395,400 < // first check course compatibility against user : enrolled users to that course can see. < $myCourses = get_my_courses($user->id); < $unenroled = !in_array($course_id, array_keys($myCourses)); < < // if guests are allowed, logged guest can see < $isallowedguest = (isguest()) ? get_field('course', 'guest', 'id', $course_id) : false ; < < if ($unenroled && !$isallowedguest){ < return false; < } < < // if user is enrolled or is allowed user and course is hidden, can he see it ? < $visibility = get_field('course', 'visible', 'id', $course_id); < if ($visibility <= 0){ < if (!has_capability('moodle/course:viewhiddencourses', get_context_instance(CONTEXT_COURSE, $course_id))){ < return false; < } < } < < /** < * prerecorded capabilities < */ < // get context caching information and tries to discard unwanted records here < < < /** < * final checks < */ < // then give back indexing data to the module for local check < include_once "{$CFG->dirroot}/search/documents/{$doctype}_document.php"; < $access_check_function = "{$doctype}_check_text_access"; < < if (function_exists($access_check_function)){ < $modulecheck = $access_check_function($path, $item_type, $this_id, $user, $group_id, $context_id); < // echo "module said $modulecheck for item $doctype/$item_type/$this_id"; < return($modulecheck); < } --- > // first check course compatibility against user : enrolled users to that course can see. > $myCourses = get_my_courses($user->id); > $unenroled = !in_array($course_id, array_keys($myCourses)); > > // if guests are allowed, logged guest can see > $isallowedguest = (isguest()) ? get_field('course', 'guest', 'id', $course_id) : false ; 422c402,438 < return true; --- > if ($unenroled && !$isallowedguest){ > return false; > } > > // if user is enrolled or is allowed user and course is hidden, can he see it ? > $visibility = get_field('course', 'visible', 'id', $course_id); > if ($visibility <= 0){ > if (!has_capability('moodle/course:viewhiddencourses', get_context_instance(CONTEXT_COURSE, $course_id))){ > return false; > } > } > > /** > * prerecorded capabilities > */ > // get context caching information and tries to discard unwanted records here > > > /** > * final checks > */ > // then give back indexing data to the module for local check > $searchable_instance = $searchables[$doctype]; > if ($searchable_instance->location == 'internal'){ > include_once "{$CFG->dirroot}/search/documents/{$doctype}_document.php"; > } else { > include_once "{$CFG->dirroot}/{$searchable_instance->location}/$doctype/search_document.php"; > } > $access_check_function = "{$doctype}_check_text_access"; > > if (function_exists($access_check_function)){ > $modulecheck = $access_check_function($path, $item_type, $this_id, $user, $group_id, $context_id); > // echo "module said $modulecheck for item $doctype/$item_type/$this_id"; > return($modulecheck); > } > > return true; Index: stats.php =================================================================== RCS file: /cvsroot/moodle/moodle/search/stats.php,v retrieving revision 1.11.2.3 diff -r1.11.2.3 stats.php 35,36c35 < if ($check = search_check_php5()) { < require_once("{$CFG->dirroot}/search/indexlib.php"); --- > require_once("{$CFG->dirroot}/search/indexlib.php"); 38,39c37 < $indexinfo = new IndexInfo(); < } --- > $indexinfo = new IndexInfo(); 61,66d58 < if (!$check) { < print_heading(search_check_php5(true)); < print_footer(); < exit(0); < } < 80c72 < if (isadmin()) { --- > if (has_capability('moodle/site:doanything', get_context_instance(CONTEXT_SYSTEM))) { 150c142 < if (isadmin()) { --- > if (has_capability('moodle/site:doanything', get_context_instance(CONTEXT_SYSTEM))) { Index: update.php =================================================================== RCS file: /cvsroot/moodle/moodle/search/update.php,v retrieving revision 1.5.2.3 diff -r1.5.2.3 update.php 2,22c2,28 < /** < * Global Search Engine for Moodle < * < * @package search < * @category core < * @subpackage search_engine < * @author Michael Champanis (mchampan) [cynnical@gmail.com], Valery Fremaux [valery.fremaux@club-internet.fr] > 1.8 < * @date 2008/03/31 < * @license http://www.gnu.org/copyleft/gpl.html GNU Public License < * < * Index asynchronous updator < * < * Major chages in this review is passing the xxxx_db_names return to < * multiple arity to handle multiple document types modules < */ < < /** < * includes and requires < */ < require_once('../config.php'); < require_once("$CFG->dirroot/search/lib.php"); --- > /** > * Global Search Engine for Moodle > * > * @package search > * @category core > * @subpackage search_engine > * @author Michael Champanis (mchampan) [cynnical@gmail.com], Valery Fremaux [valery.fremaux@club-internet.fr] > 1.8 > * @date 2008/03/31 > * @license http://www.gnu.org/copyleft/gpl.html GNU Public License > * > * Index asynchronous updator > * > * Major chages in this review is passing the xxxx_db_names return to > * multiple arity to handle multiple document types modules > */ > > /** > * includes and requires > */ > require_once('../config.php'); > > /// makes inclusions of the Zend Engine more reliable > $separator = (array_key_exists('WINDIR', $_SERVER)) ? ';' : ':' ; > ini_set('include_path', $CFG->dirroot.'/search'.$separator.ini_get('include_path'));require_login(); > > require_once("$CFG->dirroot/search/lib.php"); > require_once("$CFG->dirroot/search/indexlib.php"); 32c38 < if (!isadmin()) { --- > if (!has_capability('moodle/site:doanything', get_context_instance(CONTEXT_SYSTEM))) { 36,46c42,47 < /// check for php5 (lib.php) < < if (!search_check_php5()) { < $phpversion = phpversion(); < mtrace("Sorry, global search requires PHP 5.0.0 or later (currently using version ".phpversion().")"); < exit(0); < } < < require_once("$CFG->dirroot/search/indexlib.php"); < < $index = new Zend_Search_Lucene(SEARCH_INDEX_PATH); --- > try { > $index = new Zend_Search_Lucene(SEARCH_INDEX_PATH); > } catch(LuceneException $e) { > mtrace("Could not construct a valid index. Maybe the first indexation was never made, or files might be corrupted. Run complete indexation again."); > return; > } 88c89 < doctype = '{$mod->name}' --- > doctype = ? 91c92 < $docIds = get_records_sql_menu($query); --- > $docIds = $DB->get_records_sql_menu($query, array($mod->name)); cvs diff: Diffing Zend cvs diff: Diffing Zend/Search Index: Zend/Search/Exception.php =================================================================== RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Exception.php,v retrieving revision 1.2 diff -r1.2 Exception.php 17c17 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) 25c25 < require_once $CFG->dirroot.'/search/Zend/Exception.php'; --- > require_once "Zend/Exception.php"; 31c31 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) Index: Zend/Search/Lucene.php =================================================================== RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene.php,v retrieving revision 1.2 diff -r1.2 Lucene.php 17c17 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) 21d20 < 23c22 < require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Exception.php'; --- > require_once "Zend/Search/Lucene/Exception.php"; 26c25 < require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Document.php'; --- > require_once "Zend/Search/Lucene/Document.php"; 29c28 < require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Document/Html.php'; --- > require_once "Zend/Search/Lucene/Document/Html.php"; 31,32c30,31 < /** Zend_Search_Lucene_Storage_Directory */ < require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Storage/Directory/Filesystem.php'; --- > /** Zend_Search_Lucene_Storage_Directory_Filesystem */ > require_once "Zend/Search/Lucene/Storage/Directory/Filesystem.php"; 35c34 < require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Storage/File/Memory.php'; --- > require_once "Zend/Search/Lucene/Storage/File/Memory.php"; 38c37 < require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/Term.php'; --- > require_once "Zend/Search/Lucene/Index/Term.php"; 41c40 < require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/TermInfo.php'; --- > require_once "Zend/Search/Lucene/Index/TermInfo.php"; 44c43 < require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/SegmentInfo.php'; --- > require_once "Zend/Search/Lucene/Index/SegmentInfo.php"; 47c46 < require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/FieldInfo.php'; --- > require_once "Zend/Search/Lucene/Index/FieldInfo.php"; 50c49 < require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/Writer.php'; --- > require_once "Zend/Search/Lucene/Index/Writer.php"; 53c52 < require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryParser.php'; --- > require_once "Zend/Search/Lucene/Search/QueryParser.php"; 56c55 < require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryHit.php'; --- > require_once "Zend/Search/Lucene/Search/QueryHit.php"; 59c58 < require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Similarity.php'; --- > require_once "Zend/Search/Lucene/Search/Similarity.php"; 62c61,65 < require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/SegmentInfoPriorityQueue.php'; --- > require_once "Zend/Search/Lucene/Index/SegmentInfoPriorityQueue.php"; > > /** Zend_Search_Lucene_LockManager */ > require_once "Zend/Search/Lucene/LockManager.php"; > 66c69 < require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Interface.php'; --- > require_once "Zend/Search/Lucene/Interface.php"; 69c72 < require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Proxy.php'; --- > require_once "Zend/Search/Lucene/Proxy.php"; 75c78 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) 89a93,101 > * Result set limit > * > * 0 means no limit > * > * @var integer > */ > private static $_resultSetLimit = 0; > > /** 133,139d144 < * Index lock object < * < * @var Zend_Search_Lucene_Storage_File < */ < private $_lock; < < /** 152a158,163 > /** > * Current segment generation > * > * @var integer > */ > private $_generation; 153a165 > 175a188,193 > /** Generation retrieving counter */ > const GENERATION_RETRIEVE_COUNT = 10; > > /** Pause between generation retrieving attempts in milliseconds */ > const GENERATION_RETRIEVE_PAUSE = 50; > 177c195 < * Opens the index. --- > * Get current generation number 179,180c197,199 < * IndexReader constructor needs Directory as a parameter. It should be < * a string with a path to the index folder or a Directory object. --- > * Returns generation number > * 0 means pre-2.1 index format > * -1 means there are no segments files. 182c201,202 < * @param mixed $directory --- > * @param Zend_Search_Lucene_Storage_Directory $directory > * @return integer 185c205 < public function __construct($directory = null, $create = false) --- > public static function getActualGeneration(Zend_Search_Lucene_Storage_Directory $directory) 187,197c207,226 < if ($directory === null) { < throw new Zend_Search_Exception('No index directory specified'); < } < < if ($directory instanceof Zend_Search_Lucene_Storage_Directory_Filesystem) { < $this->_directory = $directory; < $this->_closeDirOnExit = false; < } else { < $this->_directory = new Zend_Search_Lucene_Storage_Directory_Filesystem($directory); < $this->_closeDirOnExit = true; < } --- > /** > * Zend_Search_Lucene uses segments.gen file to retrieve current generation number > * > * Apache Lucene index format documentation mentions this method only as a fallback method > * > * Nevertheless we use it according to the performance considerations > * > * @todo check if we can use some modification of Apache Lucene generation determination algorithm > * without performance problems > */ > > try { > for ($count = 0; $count < self::GENERATION_RETRIEVE_COUNT; $count++) { > // Try to get generation file > $genFile = $directory->getFileObject('segments.gen', false); > > $format = $genFile->readInt(); > if ($format != (int)0xFFFFFFFE) { > throw new Zend_Search_Lucene_Exception('Wrong segments.gen file format'); > } 198a228,229 > $gen1 = $genFile->readLong(); > $gen2 = $genFile->readLong(); 200,201c231,233 < // Get a shared lock to the index < $this->_lock = $this->_directory->createFile('index.lock'); --- > if ($gen1 == $gen2) { > return $gen1; > } 203c235,236 < $this->_segmentInfos = array(); --- > usleep(self::GENERATION_RETRIEVE_PAUSE * 1000); > } 205,208c238,256 < if ($create) { < // Throw an exception if index is under processing now < if (!$this->_lock->lock(LOCK_EX, true)) { < throw new Zend_Search_Lucene_Exception('Can\'t create index. It\'s under processing now'); --- > // All passes are failed > throw new Zend_Search_Lucene_Exception('Index is under processing now'); > } catch (Zend_Search_Lucene_Exception $e) { > if (strpos($e->getMessage(), 'is not readable') !== false) { > try { > // Try to open old style segments file > $segmentsFile = $directory->getFileObject('segments', false); > > // It's pre-2.1 index > return 0; > } catch (Zend_Search_Lucene_Exception $e) { > if (strpos($e->getMessage(), 'is not readable') !== false) { > return -1; > } else { > throw $e; > } > } > } else { > throw $e; 209a258 > } 211,212c260,261 < // Writer will create segments file for empty segments list < $this->_writer = new Zend_Search_Lucene_Index_Writer($this->_directory, $this->_segmentInfos, true); --- > return -1; > } 214,222c263,272 < if (!$this->_lock->lock(LOCK_SH)) { < throw new Zend_Search_Lucene_Exception('Can\'t reduce lock level from Exclusive to Shared'); < } < } else { < // Wait if index is under switching from one set of segments to another (Index_Writer::_updateSegments()) < if (!$this->_lock->lock(LOCK_SH)) { < throw new Zend_Search_Lucene_Exception('Can\'t obtain shared index lock'); < } < $this->_writer = null; --- > /** > * Get segments file name > * > * @param integer $generation > * @return string > */ > public static function getSegmentFileName($generation) > { > if ($generation == 0) { > return 'segments'; 224a275,276 > return 'segments_' . base_convert($generation, 10, 36); > } 225a278,282 > /** > * Read segments file for pre-2.1 Lucene index format > */ > private function _readPre21SegmentsFile() > { 251,252c308,375 < $this->_segmentInfos[] = < new Zend_Search_Lucene_Index_SegmentInfo($segName, --- > $this->_segmentInfos[$segName] = > new Zend_Search_Lucene_Index_SegmentInfo($this->_directory, > $segName, > $segSize); > } > } > > /** > * Read segments file > * > * @throws Zend_Search_Lucene_Exception > */ > private function _readSegmentsFile() > { > $segmentsFile = $this->_directory->getFileObject(self::getSegmentFileName($this->_generation)); > > $format = $segmentsFile->readInt(); > > if ($format != (int)0xFFFFFFFD) { > throw new Zend_Search_Lucene_Exception('Wrong segments file format'); > } > > // read version > // $segmentsFile->readLong(); > $segmentsFile->readInt(); $segmentsFile->readInt(); > > // read segment name counter > $segmentsFile->readInt(); > > $segments = $segmentsFile->readInt(); > > $this->_docCount = 0; > > // read segmentInfos > for ($count = 0; $count < $segments; $count++) { > $segName = $segmentsFile->readString(); > $segSize = $segmentsFile->readInt(); > > // 2.1+ specific properties > //$delGen = $segmentsFile->readLong(); > $delGenHigh = $segmentsFile->readInt(); > $delGenLow = $segmentsFile->readInt(); > if ($delGenHigh == (int)0xFFFFFFFF && $delGenLow == (int)0xFFFFFFFF) { > $delGen = -1; // There are no deletes > } else { > $delGen = ($delGenHigh << 32) | $delGenLow; > } > > $hasSingleNormFile = $segmentsFile->readByte(); > $numField = $segmentsFile->readInt(); > > $normGens = array(); > if ($numField != (int)0xFFFFFFFF) { > for ($count1 = 0; $count1 < $numField; $count1++) { > $normGens[] = $segmentsFile->readLong(); > } > > throw new Zend_Search_Lucene_Exception('Separate norm files are not supported. Optimize index to use it with Zend_Search_Lucene.'); > } > > $isCompound = $segmentsFile->readByte(); > > > $this->_docCount += $segSize; > > $this->_segmentInfos[$segName] = > new Zend_Search_Lucene_Index_SegmentInfo($this->_directory, > $segName, 254c377,403 < $this->_directory); --- > $delGen, > $hasSingleNormFile, > $isCompound); > } > } > > /** > * Opens the index. > * > * IndexReader constructor needs Directory as a parameter. It should be > * a string with a path to the index folder or a Directory object. > * > * @param mixed $directory > * @throws Zend_Search_Lucene_Exception > */ > public function __construct($directory = null, $create = false) > { > if ($directory === null) { > throw new Zend_Search_Exception('No index directory specified'); > } > > if ($directory instanceof Zend_Search_Lucene_Storage_Directory_Filesystem) { > $this->_directory = $directory; > $this->_closeDirOnExit = false; > } else { > $this->_directory = new Zend_Search_Lucene_Storage_Directory_Filesystem($directory); > $this->_closeDirOnExit = true; 255a405,455 > > $this->_segmentInfos = array(); > > // Mark index as "under processing" to prevent other processes from premature index cleaning > Zend_Search_Lucene_LockManager::obtainReadLock($this->_directory); > > // Escalate read lock to prevent current generation index files to be deleted while opening process is not done > // Zend_Search_Lucene_LockManager::escalateReadLock($this->_directory); > > > $this->_generation = self::getActualGeneration($this->_directory); > > if ($create) { > try { > Zend_Search_Lucene_LockManager::obtainWriteLock($this->_directory); > } catch (Zend_Search_Lucene_Exception $e) { > if (strpos($e->getMessage(), 'Can\'t obtain exclusive index lock') === false) { > throw $e; > } else { > throw new Zend_Search_Lucene_Exception('Can\'t create index. It\'s under processing now'); > } > } > > if ($this->_generation == -1) { > // Directory doesn't contain existing index, start from 1 > $this->_generation = 1; > $nameCounter = 0; > } else { > // Directory contains existing index > $segmentsFile = $this->_directory->getFileObject(self::getSegmentFileName($this->_generation)); > $segmentsFile->seek(12); // 12 = 4 (int, file format marker) + 8 (long, index version) > > $nameCounter = $segmentsFile->readInt(); > $this->_generation++; > } > > Zend_Search_Lucene_Index_Writer::createIndex($this->_directory, $this->_generation, $nameCounter); > > Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory); > } > > if ($this->_generation == -1) { > throw new Zend_Search_Lucene_Exception('Index doesn\'t exists in the specified directory.'); > } else if ($this->_generation == 0) { > $this->_readPre21SegmentsFile(); > } else { > $this->_readSegmentsFile(); > } > > // De-escalate read lock to prevent current generation index files to be deleted while opening process is not done > // Zend_Search_Lucene_LockManager::escalateReadLock($this->_directory); 270,272c470,472 < // Free shared lock < $this->_lock->unlock(); < --- > // Release "under processing" flag > Zend_Search_Lucene_LockManager::releaseReadLock($this->_directory); > 434a635,658 > * Set result set limit. > * > * 0 (default) means no limit > * > * @param integer $limit > */ > public static function setResultSetLimit($limit) > { > self::$_resultSetLimit = $limit; > } > > /** > * Set result set limit. > * > * 0 means no limit > * > * @return integer > */ > public static function getResultSetLimit() > { > return self::$_resultSetLimit; > } > > /** 587a812,815 > > if (self::$_resultSetLimit != 0 && count($hits) >= self::$_resultSetLimit) { > break; > } 592c820 < return array(); --- > return array(); 979a1208,1209 > > $this->_hasChanges = true; 1005,1011c1235,1237 < < $this->_hasChanges = false; < } < < if ($this->_writer !== null) { < $this->_writer->commit(); < --- > > $this->getIndexWriter()->commit(); > 1012a1239,1240 > > $this->_hasChanges = false; 1062,1064c1290 < $segmentInfo->nextTerm(); < // check, if segment dictionary is finished < if ($segmentInfo->currentTerm() !== null) { --- > if ($segmentInfo->nextTerm() !== null) { 1073a1300,1418 > /** > * Terms stream queue > * > * @var Zend_Search_Lucene_Index_SegmentInfoPriorityQueue > */ > private $_termsStreamQueue = null; > > /** > * Last Term in a terms stream > * > * @var Zend_Search_Lucene_Index_Term > */ > private $_lastTerm = null; > > /** > * Reset terms stream. > */ > public function resetTermsStream() > { > $this->_termsStreamQueue = new Zend_Search_Lucene_Index_SegmentInfoPriorityQueue(); > > foreach ($this->_segmentInfos as $segmentInfo) { > $segmentInfo->reset(); > > // Skip "empty" segments > if ($segmentInfo->currentTerm() !== null) { > $this->_termsStreamQueue->put($segmentInfo); > } > } > > $this->nextTerm(); > } > > /** > * Skip terms stream up to specified term preffix. > * > * Prefix contains fully specified field info and portion of searched term > * > * @param Zend_Search_Lucene_Index_Term $prefix > */ > public function skipTo(Zend_Search_Lucene_Index_Term $prefix) > { > $segments = array(); > > while (($segmentInfo = $this->_termsStreamQueue->pop()) !== null) { > $segments[] = $segmentInfo; > } > > foreach ($segments as $segmentInfo) { > $segmentInfo->skipTo($prefix); > > if ($segmentInfo->currentTerm() !== null) { > $this->_termsStreamQueue->put($segmentInfo); > } > } > > $this->nextTerm(); > } > > /** > * Scans terms dictionary and returns next term > * > * @return Zend_Search_Lucene_Index_Term|null > */ > public function nextTerm() > { > while (($segmentInfo = $this->_termsStreamQueue->pop()) !== null) { > if ($this->_termsStreamQueue->top() === null || > $this->_termsStreamQueue->top()->currentTerm()->key() != > $segmentInfo->currentTerm()->key()) { > // We got new term > $this->_lastTerm = $segmentInfo->currentTerm(); > > if ($segmentInfo->nextTerm() !== null) { > // Put segment back into the priority queue > $this->_termsStreamQueue->put($segmentInfo); > } > > return $this->_lastTerm; > } > > if ($segmentInfo->nextTerm() !== null) { > // Put segment back into the priority queue > $this->_termsStreamQueue->put($segmentInfo); > } > } > > // End of stream > $this->_lastTerm = null; > > return null; > } > > /** > * Returns term in current position > * > * @return Zend_Search_Lucene_Index_Term|null > */ > public function currentTerm() > { > return $this->_lastTerm; > } > > /** > * Close terms stream > * > * Should be used for resources clean up if stream is not read up to the end > */ > public function closeTermsStream() > { > while (($segmentInfo = $this->_termsStreamQueue->pop()) !== null) { > $segmentInfo->closeTermsStream(); > } > > $this->_termsStreamQueue = null; > $this->_lastTerm = null; > } > > cvs diff: Diffing Zend/Search/Lucene Index: Zend/Search/Lucene/Document.php =================================================================== RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Document.php,v retrieving revision 1.2 diff -r1.2 Document.php 18c18 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) 24c24 < require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Field.php'; --- > require_once "Zend/Search/Lucene/Field.php"; 33c33 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) 57,60c57,60 < public function __get($offset) < { < return $this->getFieldValue($offset); < } --- > public function __get($offset) > { > return $this->getFieldValue($offset); > } 81c81 < return array_keys($this->_fields); --- > return array_keys($this->_fields); 108c108 < return $this->getField($fieldName)->value; --- > return $this->getField($fieldName)->value; 119c119 < return $this->getField($fieldName)->getUtf8Value(); --- > return $this->getField($fieldName)->getUtf8Value(); Index: Zend/Search/Lucene/Exception.php =================================================================== RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Exception.php,v retrieving revision 1.2 diff -r1.2 Exception.php 17c17 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) 25c25 < require_once $CFG->dirroot.'/search/Zend/Search/Exception.php'; --- > require_once "Zend/Search/Exception.php"; 31c31 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) Index: Zend/Search/Lucene/FSM.php =================================================================== RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/FSM.php,v retrieving revision 1.1 diff -r1.1 FSM.php 17c17 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) 23c23 < require_once $CFG->dirroot.'/search/Zend/Search/Lucene/FSMAction.php'; --- > require_once 'Zend/Search/Lucene/FSMAction.php'; 26c26 < require_once $CFG->dirroot.'/search/Zend/Search/Exception.php'; --- > require_once 'Zend/Search/Exception.php'; 40c40 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) Index: Zend/Search/Lucene/FSMAction.php =================================================================== RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/FSMAction.php,v retrieving revision 1.1 diff -r1.1 FSMAction.php 17c17 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) 28c28 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) Index: Zend/Search/Lucene/Field.php =================================================================== RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Field.php,v retrieving revision 1.3 diff -r1.3 Field.php 18c18 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) 34c34 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) Index: Zend/Search/Lucene/Interface.php =================================================================== RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Interface.php,v retrieving revision 1.1 diff -r1.1 Interface.php 17c17 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) 25c25 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) 89a90,107 > * Set result set limit. > * > * 0 (default) means no limit > * > * @param integer $limit > */ > public static function setResultSetLimit($limit); > > /** > * Set result set limit. > * > * 0 means no limit > * > * @return integer > */ > public static function getResultSetLimit(); > > /** 308a327,363 > > /** > * Reset terms stream. > */ > public function resetTermsStream(); > > /** > * Skip terms stream up to specified term preffix. > * > * Prefix contains fully specified field info and portion of searched term > * > * @param Zend_Search_Lucene_Index_Term $prefix > */ > public function skipTo(Zend_Search_Lucene_Index_Term $prefix); > > /** > * Scans terms dictionary and returns next term > * > * @return Zend_Search_Lucene_Index_Term|null > */ > public function nextTerm(); > > /** > * Returns term in current position > * > * @return Zend_Search_Lucene_Index_Term|null > */ > public function currentTerm(); > > /** > * Close terms stream > * > * Should be used for resources clean up if stream is not read up to the end > */ > public function closeTermsStream(); > > Index: Zend/Search/Lucene/PriorityQueue.php =================================================================== RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/PriorityQueue.php,v retrieving revision 1.1 diff -r1.1 PriorityQueue.php 17c17 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) 34c34 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) Index: Zend/Search/Lucene/Proxy.php =================================================================== RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Proxy.php,v retrieving revision 1.1 diff -r1.1 Proxy.php 17c17 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) 22c22 < require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Interface.php'; --- > require_once 'Zend/Search/Lucene/Interface.php'; 32c32 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) 147a148,171 > * Set result set limit. > * > * 0 (default) means no limit > * > * @param integer $limit > */ > public static function setResultSetLimit($limit) > { > Zend_Search_Lucene::setResultSetLimit($limit); > } > > /** > * Set result set limit. > * > * 0 means no limit > * > * @return integer > */ > public static function getResultSetLimit() > { > return Zend_Search_Lucene::getResultSetLimit(); > } > > /** 438a463,514 > > /** > * Reset terms stream. > */ > public function resetTermsStream() > { > $this->_index->resetTermsStream(); > } > > /** > * Skip terms stream up to specified term preffix. > * > * Prefix contains fully specified field info and portion of searched term > * > * @param Zend_Search_Lucene_Index_Term $prefix > */ > public function skipTo(Zend_Search_Lucene_Index_Term $prefix) > { > return $this->_index->skipTo($prefix); > } > > /** > * Scans terms dictionary and returns next term > * > * @return Zend_Search_Lucene_Index_Term|null > */ > public function nextTerm() > { > return $this->_index->nextTerm(); > } > > /** > * Returns term in current position > * > * @return Zend_Search_Lucene_Index_Term|null > */ > public function currentTerm() > { > return $this->_index->currentTerm(); > } > > /** > * Close terms stream > * > * Should be used for resources clean up if stream is not read up to the end > */ > public function closeTermsStream() > { > $this->_index->closeTermsStream(); > } > > cvs diff: Diffing Zend/Search/Lucene/Analysis Index: Zend/Search/Lucene/Analysis/Analyzer.php =================================================================== RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Analysis/Analyzer.php,v retrieving revision 1.2.2.1 diff -r1.2.2.1 Analyzer.php 18c18 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) 24c24 < require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/Token.php'; --- > require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Analysis/Token.php"; 27c27,30 < require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8.php'; --- > require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8.php"; > > /** Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8_CaseInsensitive */ > require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8/CaseInsensitive.php"; 30c33,36 < require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8Num.php'; --- > require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8Num.php"; > > /** Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive */ > require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8Num/CaseInsensitive.php"; 33c39 < require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Text.php'; --- > require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Text.php"; 36c42 < require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Text/CaseInsensitive.php'; --- > require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Text/CaseInsensitive.php"; 39c45 < require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/Analyzer/Common/TextNum.php'; --- > require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Analysis/Analyzer/Common/TextNum.php"; 42c48 < require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/Analyzer/Common/TextNum/CaseInsensitive.php'; --- > require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Analysis/Analyzer/Common/TextNum/CaseInsensitive.php"; 45c51 < require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/TokenFilter/StopWords.php'; --- > require_once 'Zend/Search/Lucene/Analysis/TokenFilter/StopWords.php'; 48c54 < require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/TokenFilter/ShortWords.php'; --- > require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Analysis/TokenFilter/ShortWords.php"; 64c70 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) 100c106 < public function tokenize($data, $encoding = 'UTF-8') --- > public function tokenize($data, $encoding = '') 102a109 > 163c170 < self::$_defaultImpl = new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8(); --- > self::$_defaultImpl = new Zend_Search_Lucene_Analysis_Analyzer_Common_Text_CaseInsensitive(); Index: Zend/Search/Lucene/Analysis/Token.php =================================================================== RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Analysis/Token.php,v retrieving revision 1.2 diff -r1.2 Token.php 18c18 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) 27c27 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) Index: Zend/Search/Lucene/Analysis/TokenFilter.php =================================================================== RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Analysis/TokenFilter.php,v retrieving revision 1.2 diff -r1.2 TokenFilter.php 18c18 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) 24c24 < require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/Token.php'; --- > require_once 'Zend/Search/Lucene/Analysis/Token.php'; 33c33 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) cvs diff: Diffing Zend/Search/Lucene/Analysis/Analyzer Index: Zend/Search/Lucene/Analysis/Analyzer/Common.php =================================================================== RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Analysis/Analyzer/Common.php,v retrieving revision 1.2 diff -r1.2 Common.php 18c18 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) 24c24 < require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/Analyzer.php'; --- > require_once 'Zend/Search/Lucene/Analysis/Analyzer.php'; 37c37 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) 71c71 < // resulting token can be null if the filter removed it --- > // resulting token can be null if the filter removes it cvs diff: Diffing Zend/Search/Lucene/Analysis/Analyzer/Common Index: Zend/Search/Lucene/Analysis/Analyzer/Common/Text.php =================================================================== RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Text.php,v retrieving revision 1.2 diff -r1.2 Text.php 18c18 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) 24c24 < require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/Analyzer/Common.php'; --- > require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common.php'; 31c31 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) 56c56,58 < $this->_input = iconv($this->_encoding, 'ASCII//TRANSLIT', $this->_input); --- > //$this->_input = iconv($this->_encoding, 'ASCII//TRANSLIT', $this->_input); > $this->_input = mb_convert_encoding($this->_input, 'ASCII', 'auto'); > 78c80 < return null; --- > return null; Index: Zend/Search/Lucene/Analysis/Analyzer/Common/TextNum.php =================================================================== RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Analysis/Analyzer/Common/TextNum.php,v retrieving revision 1.1 diff -r1.1 TextNum.php 18c18 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) 24c24 < require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/Analyzer/Common.php'; --- > require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common.php'; 31c31 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) 77c77 < return null; --- > return null; Index: Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8.php =================================================================== RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8.php,v retrieving revision 1.1 diff -r1.1 Utf8.php 18c18 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) 24c24 < require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/Analyzer/Common.php'; --- > require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common.php'; 31c31 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) 50c50 < --- > 52c52 < * Stream length --- > * Object constructor 54c54 < * @var integer --- > * @throws Zend_Search_Lucene_Exception 56c56,63 < private $_streamLength; --- > public function __construct() > { > if (@preg_match('/\pL/u', 'a') != 1) { > // PCRE unicode support is turned off > require_once 'Zend/Search/Lucene/Exception.php'; > throw new Zend_Search_Lucene_Exception('Utf8 analyzer needs PCRE unicode support to be enabled.'); > } > } 69c76 < $this->_input = iconv($this->_encoding, 'UTF-8', $this->_input); --- > $this->_input = @iconv($this->_encoding, 'UTF-8', $this->_input); 72,118d78 < < // Get UTF-8 string length. < // It also checks if it's a correct utf-8 string < $this->_streamLength = iconv_strlen($this->_input, 'UTF-8'); < } < < /** < * Check, that character is a letter < * < * @param string $char < * @return boolean < */ < private static function _isAlpha($char) < { < if (strlen($char) > 1) { < // It's an UTF-8 character < return true; < } < < return ctype_alpha($char); < } < < /** < * Get next UTF-8 char < * < * @param string $char < * @return boolean < */ < private function _nextChar() < { < $char = $this->_input[$this->_bytePosition++]; < < if (( ord($char) & 0xC0 ) == 0xC0) { < $addBytes = 1; < if (ord($char) & 0x20 ) { < $addBytes++; < if (ord($char) & 0x10 ) { < $addBytes++; < } < } < $char .= substr($this->_input, $this->_bytePosition, $addBytes); < $this->_bytePosition += $addBytes; < } < < $this->_position++; < < return $char; 134,138c94,98 < while ($this->_position < $this->_streamLength) { < // skip white space < while ($this->_position < $this->_streamLength && < !self::_isAlpha($char = $this->_nextChar())) { < $char = ''; --- > do { > if (! preg_match('/[\p{L}]+/u', $this->_input, $match, PREG_OFFSET_CAPTURE, $this->_bytePosition)) { > // It covers both cases a) there are no matches (preg_match(...) === 0) > // b) error occured (preg_match(...) === FALSE) > return null; 141,148c101,114 < $termStartPosition = $this->_position - 1; < $termText = $char; < < // read token < while ($this->_position < $this->_streamLength && < self::_isAlpha($char = $this->_nextChar())) { < $termText .= $char; < } --- > // matched string > $matchedWord = $match[0][0]; > > // binary position of the matched word in the input stream > $binStartPos = $match[0][1]; > > // character position of the matched word in the input stream > $startPos = $this->_position + > iconv_strlen(substr($this->_input, > $this->_bytePosition, > $binStartPos - $this->_bytePosition), > 'UTF-8'); > // character postion of the end of matched word in the input stream > $endPos = $startPos + iconv_strlen($matchedWord, 'UTF-8'); 150,153c116,117 < // Empty token, end of stream. < if ($termText == '') { < return null; < } --- > $this->_bytePosition = $binStartPos + strlen($matchedWord); > $this->_position = $endPos; 155,164c119,120 < $token = new Zend_Search_Lucene_Analysis_Token( < $termText, < $termStartPosition, < $this->_position - 1); < $token = $this->normalize($token); < if ($token !== null) { < return $token; < } < // Continue if token is skipped < } --- > $token = $this->normalize(new Zend_Search_Lucene_Analysis_Token($matchedWord, $startPos, $endPos)); > } while ($token === null); // try again if token is skipped 166c122 < return null; --- > return $token; Index: Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8Num.php =================================================================== RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8Num.php,v retrieving revision 1.1 diff -r1.1 Utf8Num.php 18c18 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) 24c24 < require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/Analyzer/Common.php'; --- > require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common.php'; 31c31 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) 52c52 < * Stream length --- > * Object constructor 54c54 < * @var integer --- > * @throws Zend_Search_Lucene_Exception 56c56,63 < private $_streamLength; --- > public function __construct() > { > if (@preg_match('/\pL/u', 'a') != 1) { > // PCRE unicode support is turned off > require_once 'Zend/Search/Lucene/Exception.php'; > throw new Zend_Search_Lucene_Exception('Utf8Num analyzer needs PCRE unicode support to be enabled.'); > } > } 72,118d78 < < // Get UTF-8 string length. < // It also checks if it's a correct utf-8 string < $this->_streamLength = iconv_strlen($this->_input, 'UTF-8'); < } < < /** < * Check, that character is a letter < * < * @param string $char < * @return boolean < */ < private static function _isAlNum($char) < { < if (strlen($char) > 1) { < // It's an UTF-8 character < return true; < } < < return ctype_alnum($char); < } < < /** < * Get next UTF-8 char < * < * @param string $char < * @return boolean < */ < private function _nextChar() < { < $char = $this->_input[$this->_bytePosition++]; < < if (( ord($char) & 0xC0 ) == 0xC0) { < $addBytes = 1; < if (ord($char) & 0x20 ) { < $addBytes++; < if (ord($char) & 0x10 ) { < $addBytes++; < } < } < $char .= substr($this->_input, $this->_bytePosition, $addBytes); < $this->_bytePosition += $addBytes; < } < < $this->_position++; < < return $char; 134,138c94,98 < while ($this->_position < $this->_streamLength) { < // skip white space < while ($this->_position < $this->_streamLength && < !self::_isAlNum($char = $this->_nextChar())) { < $char = ''; --- > do { > if (! preg_match('/[\p{L}\p{N}]+/u', $this->_input, $match, PREG_OFFSET_CAPTURE, $this->_bytePosition)) { > // It covers both cases a) there are no matches (preg_match(...) === 0) > // b) error occured (preg_match(...) === FALSE) > return null; 141,142c101,114 < $termStartPosition = $this->_position - 1; < $termText = $char; --- > // matched string > $matchedWord = $match[0][0]; > > // binary position of the matched word in the input stream > $binStartPos = $match[0][1]; > > // character position of the matched word in the input stream > $startPos = $this->_position + > iconv_strlen(substr($this->_input, > $this->_bytePosition, > $binStartPos - $this->_bytePosition), > 'UTF-8'); > // character postion of the end of matched word in the input stream > $endPos = $startPos + iconv_strlen($matchedWord, 'UTF-8'); 144,148c116,117 < // read token < while ($this->_position < $this->_streamLength && < self::_isAlNum($char = $this->_nextChar())) { < $termText .= $char; < } --- > $this->_bytePosition = $binStartPos + strlen($matchedWord); > $this->_position = $endPos; 150,164c119,120 < // Empty token, end of stream. < if ($termText == '') { < return null; < } < < $token = new Zend_Search_Lucene_Analysis_Token( < $termText, < $termStartPosition, < $this->_position - 1); < $token = $this->normalize($token); < if ($token !== null) { < return $token; < } < // Continue if token is skipped < } --- > $token = $this->normalize(new Zend_Search_Lucene_Analysis_Token($matchedWord, $startPos, $endPos)); > } while ($token === null); // try again if token is skipped 166c122 < return null; --- > return $token; cvs diff: Diffing Zend/Search/Lucene/Analysis/Analyzer/Common/Text Index: Zend/Search/Lucene/Analysis/Analyzer/Common/Text/CaseInsensitive.php =================================================================== RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Text/CaseInsensitive.php,v retrieving revision 1.2 diff -r1.2 CaseInsensitive.php 18c18 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) 24c24 < require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Text.php'; --- > require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common/Text.php'; 27c27 < require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/TokenFilter/LowerCase.php'; --- > require_once 'Zend/Search/Lucene/Analysis/TokenFilter/LowerCase.php'; 34c34 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) cvs diff: Diffing Zend/Search/Lucene/Analysis/Analyzer/Common/TextNum Index: Zend/Search/Lucene/Analysis/Analyzer/Common/TextNum/CaseInsensitive.php =================================================================== RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Analysis/Analyzer/Common/TextNum/CaseInsensitive.php,v retrieving revision 1.1 diff -r1.1 CaseInsensitive.php 18c18 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) 24c24 < require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/Analyzer/Common/TextNum.php'; --- > require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common/TextNum.php'; 27c27 < require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/TokenFilter/LowerCase.php'; --- > require_once 'Zend/Search/Lucene/Analysis/TokenFilter/LowerCase.php'; 34c34 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) cvs diff: Diffing Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8 cvs diff: Diffing Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8Num cvs diff: Diffing Zend/Search/Lucene/Analysis/TokenFilter Index: Zend/Search/Lucene/Analysis/TokenFilter/LowerCase.php =================================================================== RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Analysis/TokenFilter/LowerCase.php,v retrieving revision 1.2 diff -r1.2 LowerCase.php 18c18 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) 24c24 < require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/TokenFilter.php'; --- > require_once 'Zend/Search/Lucene/Analysis/TokenFilter.php'; 33c33 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) Index: Zend/Search/Lucene/Analysis/TokenFilter/LowerCaseUtf8.php =================================================================== RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Analysis/TokenFilter/LowerCaseUtf8.php,v retrieving revision 1.1.4.2 diff -r1.1.4.2 LowerCaseUtf8.php 24c24 < require_once 'Zend/Search/Lucene/Analysis/TokenFilter.php'; --- > require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Analysis/TokenFilter.php"; 43a44 > global $CFG; 46c47 < require_once 'Zend/Search/Lucene/Exception.php'; --- > require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Exception.php"; Index: Zend/Search/Lucene/Analysis/TokenFilter/ShortWords.php =================================================================== RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Analysis/TokenFilter/ShortWords.php,v retrieving revision 1.1 diff -r1.1 ShortWords.php 18c18 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) 24c24 < require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/TokenFilter.php'; --- > require_once 'Zend/Search/Lucene/Analysis/TokenFilter.php'; 33c33 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) Index: Zend/Search/Lucene/Analysis/TokenFilter/StopWords.php =================================================================== RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Analysis/TokenFilter/StopWords.php,v retrieving revision 1.1 diff -r1.1 StopWords.php 18c18 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) 24,25c24,25 < require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/TokenFilter.php'; < require_once $CFG->dirroot.'/search/Zend/Search/Exception.php'; --- > require_once 'Zend/Search/Lucene/Analysis/TokenFilter.php'; > require_once 'Zend/Search/Exception.php'; 37c37 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) 44c44 < * Minimum allowed term length --- > * Stop Words 66d65 < $t = $srcToken->getTermText(); cvs diff: Diffing Zend/Search/Lucene/Document Index: Zend/Search/Lucene/Document/Html.php =================================================================== RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Document/Html.php,v retrieving revision 1.1 diff -r1.1 Html.php 18c18 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) 24c24 < require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Document.php'; --- > require_once 'Zend/Search/Lucene/Document.php'; 33c33 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) 293c293 < $matchedNodes = $xpath->query("/html/body/*"); --- > $matchedNodes = $xpath->query("/html/body"); cvs diff: Diffing Zend/Search/Lucene/Index Index: Zend/Search/Lucene/Index/DictionaryLoader.php =================================================================== RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Index/DictionaryLoader.php,v retrieving revision 1.1 diff -r1.1 DictionaryLoader.php 18c18 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) 24c24 < require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Exception.php'; --- > require_once 'Zend/Search/Lucene/Exception.php'; 38c38 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) 64c64,65 < if ($tiVersion != (int)0xFFFFFFFE) { --- > if ($tiVersion != (int)0xFFFFFFFE /* pre-2.1 format */ && > $tiVersion != (int)0xFFFFFFFD /* 2.1+ format */) { 68c69 < // $indexTermCount = = $tiiFile->readLong(); --- > // $indexTermCount = $tiiFile->readLong(); 103a105,109 > if ($tiVersion == (int)0xFFFFFFFD /* 2.1+ format */) { > /* Skip MaxSkipLevels value */ > $pos += 4; > } > Index: Zend/Search/Lucene/Index/FieldInfo.php =================================================================== RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Index/FieldInfo.php,v retrieving revision 1.2 diff -r1.2 FieldInfo.php 18c18 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) 27c27 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) Index: Zend/Search/Lucene/Index/SegmentInfo.php =================================================================== RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Index/SegmentInfo.php,v retrieving revision 1.3 diff -r1.3 SegmentInfo.php 18c18 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) 23c23 < require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/DictionaryLoader.php'; --- > require_once 'Zend/Search/Lucene/Index/DictionaryLoader.php'; 27c27,30 < require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Exception.php'; --- > require_once 'Zend/Search/Lucene/Exception.php'; > > /** Zend_Search_Lucene_LockManager */ > require_once 'Zend/Search/Lucene/LockManager.php'; 34c37 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) 112a116,144 > /** > * Delete file generation number > * > * -1 means 'there is no delete file' > * 0 means pre-2.1 format delete file > * X specifies used delete file > * > * @var integer > */ > private $_delGen; > > /** > * Segment has single norms file > * > * If true then one .nrm file is used for all fields > * Otherwise .fN files are used > * > * @var boolean > */ > private $_hasSingleNormFile; > > /** > * Use compound segment file (*.cfs) to collect all other segment files > * (excluding .del files) > * > * @var boolean > */ > private $_isCompound; > 139c171 < private $_deleted; --- > private $_deleted = null; 150,151c182 < * Zend_Search_Lucene_Index_SegmentInfo constructor needs Segmentname, < * Documents count and Directory as a parameter. --- > * Zend_Search_Lucene_Index_SegmentInfo constructor 152a184 > * @param Zend_Search_Lucene_Storage_Directory $directory 155c187,188 < * @param Zend_Search_Lucene_Storage_Directory $directory --- > * @param integer $delGen > * @param boolean $isCompound 157c190 < public function __construct($name, $docCount, $directory) --- > public function __construct(Zend_Search_Lucene_Storage_Directory $directory, $name, $docCount, $delGen = 0, $hasSingleNormFile = false, $isCompound = null) 159,160d191 < $this->_name = $name; < $this->_docCount = $docCount; 162c193,217 < $this->_termDictionary = null; --- > $this->_name = $name; > $this->_docCount = $docCount; > $this->_hasSingleNormFile = $hasSingleNormFile; > $this->_delGen = $delGen; > $this->_termDictionary = null; > > if (!is_null($isCompound)) { > $this->_isCompound = $isCompound; > } else { > // It's a pre-2.1 segment > // detect if it uses compond file > $this->_isCompound = true; > > try { > // Try to open compound file > $this->_directory->getFileObject($name . '.cfs'); > } catch (Zend_Search_Lucene_Exception $e) { > if (strpos($e->getMessage(), 'is not readable') !== false) { > // Compound file is not found or is not readable > $this->_isCompound = false; > } else { > throw $e; > } > } > } 165c220 < if ($this->_directory->fileExists($name . '.cfs')) { --- > if ($this->_isCompound) { 205,210c260,279 < try { < $delFile = $this->openCompoundFile('.del'); < < $byteCount = $delFile->readInt(); < $byteCount = ceil($byteCount/8); < $bitCount = $delFile->readInt(); --- > if ($this->_delGen == -1) { > // There is no delete file for this segment > // Do nothing > } else if ($this->_delGen == 0) { > // It's a segment with pre-2.1 format delete file > // Try to find delete file > try { > // '.del' files always stored in a separate file > // Segment compound is not used > $delFile = $this->_directory->getFileObject($this->_name . '.del'); > > $byteCount = $delFile->readInt(); > $byteCount = ceil($byteCount/8); > $bitCount = $delFile->readInt(); > > if ($bitCount == 0) { > $delBytes = ''; > } else { > $delBytes = $delFile->readBytes($byteCount); > } 212,215c281,299 < if ($bitCount == 0) { < $delBytes = ''; < } else { < $delBytes = $delFile->readBytes($byteCount); --- > if (extension_loaded('bitset')) { > $this->_deleted = $delBytes; > } else { > $this->_deleted = array(); > for ($count = 0; $count < $byteCount; $count++) { > $byte = ord($delBytes{$count}); > for ($bit = 0; $bit < 8; $bit++) { > if ($byte & (1<<$bit)) { > $this->_deleted[$count*8 + $bit] = 1; > } > } > } > } > } catch(Zend_Search_Exception $e) { > if (strpos($e->getMessage(), 'is not readable') === false ) { > throw $e; > } > // There is no delete file > // Do nothing 216a301,305 > } else { > // It's 2.1+ format delete file > $delFile = $this->_directory->getFileObject($this->_name . '_' . base_convert($this->_delGen, 10, 36) . '.del'); > > $format = $delFile->readInt(); 218,219c307,312 < if (extension_loaded('bitset')) { < $this->_deleted = $delBytes; --- > if ($format == (int)0xFFFFFFFF) { > /** > * @todo Implement support of DGaps delete file format. > * See Lucene file format for details - http://lucene.apache.org/java/docs/fileformats.html#Deleted%20Documents > */ > throw new Zend_Search_Lucene_Exception('DGaps delete file format is not supported. Optimize index to use it with Zend_Search_Lucene'); 221,226c314,333 < $this->_deleted = array(); < for ($count = 0; $count < $byteCount; $count++) { < $byte = ord($delBytes{$count}); < for ($bit = 0; $bit < 8; $bit++) { < if ($byte & (1<<$bit)) { < $this->_deleted[$count*8 + $bit] = 1; --- > // $format is actually byte count > $byteCount = ceil($format/8); > $bitCount = $delFile->readInt(); > > if ($bitCount == 0) { > $delBytes = ''; > } else { > $delBytes = $delFile->readBytes($byteCount); > } > > if (extension_loaded('bitset')) { > $this->_deleted = $delBytes; > } else { > $this->_deleted = array(); > for ($count = 0; $count < $byteCount; $count++) { > $byte = ord($delBytes{$count}); > for ($bit = 0; $bit < 8; $bit++) { > if ($byte & (1<<$bit)) { > $this->_deleted[$count*8 + $bit] = 1; > } 231,236d337 < } catch(Zend_Search_Exception $e) { < if (strpos($e->getMessage(), 'compound file doesn\'t contain') !== false ) { < $this->_deleted = null; < } else { < throw $e; < } 252,253c353 < // Try to open common file first < if ($this->_directory->fileExists($filename)) { --- > if (!$this->_isCompound) { 258c358 < throw new Zend_Search_Lucene_Exception('Index compound file doesn\'t contain ' --- > throw new Zend_Search_Lucene_Exception('Segment compound file doesn\'t contain ' 346a447,456 > * Returns actual deletions file generation number. > * > * @return integer > */ > public function getDelGen() > { > return $this->_delGen; > } > > /** 434a545,579 > * Load terms dictionary index > * > * @throws Zend_Search_Lucene_Exception > */ > private function _loadDictionaryIndex() > { > // Check, if index is already serialized > if ($this->_directory->fileExists($this->_name . '.sti')) { > // Load serialized dictionary index data > $stiFile = $this->_directory->getFileObject($this->_name . '.sti'); > $stiFileData = $stiFile->readBytes($this->_directory->fileLength($this->_name . '.sti')); > > // Load dictionary index data > if (($unserializedData = @unserialize($stiFileData)) !== false) { > list($this->_termDictionary, $this->_termDictionaryInfos) = $unserializedData; > return; > } > } > > // Load data from .tii file and generate .sti file > > // Prefetch dictionary index data > $tiiFile = $this->openCompoundFile('.tii'); > $tiiFileData = $tiiFile->readBytes($this->compoundFileLength('.tii')); > > // Load dictionary index data > list($this->_termDictionary, $this->_termDictionaryInfos) = > Zend_Search_Lucene_Index_DictionaryLoader::load($tiiFileData); > > $stiFileData = serialize(array($this->_termDictionary, $this->_termDictionaryInfos)); > $stiFile = $this->_directory->createFile($this->_name . '.sti'); > $stiFile->writeBytes($stiFileData); > } > > /** 455,476c600 < // Check, if index is already serialized < if ($this->_directory->fileExists($this->_name . '.sti')) { < // Prefetch dictionary index data < $stiFile = $this->_directory->getFileObject($this->_name . '.sti'); < $stiFileData = $stiFile->readBytes($this->_directory->fileLength($this->_name . '.sti')); < < // Load dictionary index data < list($this->_termDictionary, $this->_termDictionaryInfos) = unserialize($stiFileData); < } else { < // Prefetch dictionary index data < $tiiFile = $this->openCompoundFile('.tii'); < $tiiFileData = $tiiFile->readBytes($this->compoundFileLength('.tii')); < < // Load dictionary index data < list($this->_termDictionary, $this->_termDictionaryInfos) = < Zend_Search_Lucene_Index_DictionaryLoader::load($tiiFileData); < < $stiFileData = serialize(array($this->_termDictionary, $this->_termDictionaryInfos)); < $stiFile = $this->_directory->createFile($this->_name . '.sti'); < $stiFile->writeBytes($stiFileData); < } < --- > $this->_loadDictionaryIndex(); 479,480d602 < < 529c651,652 < if ($tiVersion != (int)0xFFFFFFFE) { --- > if ($tiVersion != (int)0xFFFFFFFE /* pre-2.1 format */ && > $tiVersion != (int)0xFFFFFFFD /* 2.1+ format */) { 535a659,661 > if ($tiVersion == (int)0xFFFFFFFD /* 2.1+ format */) { > $maxSkipLevels = $tisFile->readInt(); > } 537c663 < $tisFile->seek($prevTermInfo[4] /* indexPointer */ - 20 /* header size*/, SEEK_CUR); --- > $tisFile->seek($prevTermInfo[4] /* indexPointer */ - (($tiVersion == (int)0xFFFFFFFD)? 24 : 20) /* header size*/, SEEK_CUR); 668a795 > * @throws Zend_Search_Lucene_Exception 672,673c799,817 < $fFile = $this->openCompoundFile('.f' . $fieldNum); < $this->_norms[$fieldNum] = $fFile->readBytes($this->_docCount); --- > if ($this->_hasSingleNormFile) { > $normfFile = $this->openCompoundFile('.nrm'); > > $header = $normfFile->readBytes(3); > $headerFormatVersion = $normfFile->readByte(); > > if ($header != 'NRM' || $headerFormatVersion != (int)0xFF) { > throw new Zend_Search_Lucene_Exception('Wrong norms file format.'); > } > > foreach ($this->_fields as $fieldNum => $fieldInfo) { > if ($fieldInfo->isIndexed) { > $this->_norms[$fieldNum] = $normfFile->readBytes($this->_docCount); > } > } > } else { > $fFile = $this->openCompoundFile('.f' . $fieldNum); > $this->_norms[$fieldNum] = $fFile->readBytes($this->_docCount); > } 734a879,898 > * Returns true if segment has single norms file. > * > * @return boolean > */ > public function hasSingleNormFile() > { > return $this->_hasSingleNormFile ? 1 : 0; > } > > /** > * Returns true if segment is stored using compound segment file. > * > * @return boolean > */ > public function isCompound() > { > return $this->_isCompound ? 1 : 0; > } > > /** 806c970,998 < $delFile = $this->_directory->createFile($this->_name . '.del'); --- > // Get new generation number > Zend_Search_Lucene_LockManager::obtainWriteLock($this->_directory); > > $delFileList = array(); > foreach ($this->_directory->fileList() as $file) { > if ($file == $this->_name . '.del') { > // Matches .del file name > $delFileList[] = 0; > } else if (preg_match('/^' . $this->_name . '_([a-zA-Z0-9]+)\.del$/i', $file, $matches)) { > // Matches _NNN.del file names > $delFileList[] = (int)base_convert($matches[1], 36, 10); > } > } > > if (count($delFileList) == 0) { > // There is no deletions file for current segment in the directory > // Set detetions file generation number to 1 > $this->_delGen = 1; > } else { > // There are some deletions files for current segment in the directory > // Set detetions file generation number to the highest + 1 > $this->_delGen = max($delFileList) + 1; > } > > $delFile = $this->_directory->createFile($this->_name . '_' . base_convert($this->_delGen, 10, 36) . '.del'); > > Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory); > > 823a1016,1022 > * Actual offset of the .tis file data > * > * @var integer > */ > private $_tisFileOffset; > > /** 831c1030 < * Offset of the .frq file in the compound file --- > * Actual offset of the .frq file data 845c1044 < * Offset of the .prx file in the compound file --- > * Actual offset of the .prx file in the compound file 853c1052 < * Number of terms in term stream --- > * Actual number of terms in term stream 859a1059,1072 > * Overall number of terms in term stream > * > * @var integer > */ > private $_termNum = 0; > > /** > * Segment index interval > * > * @var integer > */ > private $_indexInterval; > > /** 894c1107,1109 < * @var array --- > * Is set to null if term positions loading has to be skipped > * > * @var array|null 897a1113,1132 > > /** > * Terms scan mode > * > * Values: > * > * self::SM_TERMS_ONLY - terms are scanned, no additional info is retrieved > * self::SM_MERGE_INFO - terms are scanned, frequency and position info is retrieved > * document numbers are compacted (shifted if segment has deleted documents) > * > * @var integer > */ > private $_termsScanMode; > > /** Scan modes */ > const SM_TERMS_ONLY = 0; // terms are scanned, no additional info is retrieved > const SM_FULL_INFO = 1; // terms are scanned, frequency and position info is retrieved > const SM_MERGE_INFO = 2; // terms are scanned, frequency and position info is retrieved > // document numbers are compacted (shifted if segment contains deleted documents) > 907c1142 < * @param boolean $compact --- > * @param integer $mode 911c1146 < public function reset($startId = 0, $compact = false) --- > public function reset($startId = 0, $mode = self::SM_TERMS_ONLY) 917a1153,1154 > $this->_tisFileOffset = $this->_tisFile->tell(); > 919c1156,1157 < if ($tiVersion != (int)0xFFFFFFFE) { --- > if ($tiVersion != (int)0xFFFFFFFE /* pre-2.1 format */ && > $tiVersion != (int)0xFFFFFFFD /* 2.1+ format */) { 923,925c1161,1167 < $this->_termCount = $this->_tisFile->readLong(); < $this->_tisFile->readInt(); // Read Index interval < $this->_skipInterval = $this->_tisFile->readInt(); // Read skip interval --- > $this->_termCount = > $this->_termNum = $this->_tisFile->readLong(); // Read terms count > $this->_indexInterval = $this->_tisFile->readInt(); // Read Index interval > $this->_skipInterval = $this->_tisFile->readInt(); // Read skip interval > if ($tiVersion == (int)0xFFFFFFFD /* 2.1+ format */) { > $maxSkipLevels = $this->_tisFile->readInt(); > } 930,932d1171 < $this->_frqFile = $this->openCompoundFile('.frq', false); < $this->_frqFileOffset = $this->_frqFile->tell(); < 936,937c1175 < $this->_prxFile = $this->openCompoundFile('.prx', false); < $this->_prxFileOffset = $this->_prxFile->tell(); --- > $this->_docMap = array(); 939,940c1177,1186 < $this->_lastTerm = new Zend_Search_Lucene_Index_Term('', -1); < $this->_lastTermInfo = new Zend_Search_Lucene_Index_TermInfo(0, 0, 0, 0); --- > $this->_lastTerm = new Zend_Search_Lucene_Index_Term('', -1); > $this->_lastTermInfo = new Zend_Search_Lucene_Index_TermInfo(0, 0, 0, 0); > $this->_lastTermPositions = null; > > $this->_termsScanMode = $mode; > > switch ($mode) { > case self::SM_TERMS_ONLY: > // Do nothing > break; 942,946c1188,1206 < $this->_docMap = array(); < for ($count = 0; $count < $this->_docCount; $count++) { < if (!$this->isDeleted($count)) { < $this->_docMap[$count] = $startId + ($compact ? count($this->_docMap) : $count); < } --- > case self::SM_FULL_INFO: > // break intentionally omitted > case self::SM_MERGE_INFO: > $this->_frqFile = $this->openCompoundFile('.frq', false); > $this->_frqFileOffset = $this->_frqFile->tell(); > > $this->_prxFile = $this->openCompoundFile('.prx', false); > $this->_prxFileOffset = $this->_prxFile->tell(); > > for ($count = 0; $count < $this->_docCount; $count++) { > if (!$this->isDeleted($count)) { > $this->_docMap[$count] = $startId + (($mode == self::SM_MERGE_INFO) ? count($this->_docMap) : $count); > } > } > break; > > default: > throw new Zend_Search_Lucene_Exception('Wrong terms scaning mode specified.'); > break; 948a1209 > 950c1211,1358 < return $startId + ($compact ? count($this->_docMap) : $this->_docCount); --- > return $startId + (($mode == self::SM_MERGE_INFO) ? count($this->_docMap) : $this->_docCount); > } > > > /** > * Skip terms stream up to specified term preffix. > * > * Prefix contains fully specified field info and portion of searched term > * > * @param Zend_Search_Lucene_Index_Term $prefix > * @throws Zend_Search_Lucene_Exception > */ > public function skipTo(Zend_Search_Lucene_Index_Term $prefix) > { > if ($this->_termDictionary === null) { > $this->_loadDictionaryIndex(); > } > > $searchField = $this->getFieldNum($prefix->field); > > if ($searchField == -1) { > /** > * Field is not presented in this segment > * Go to the end of dictionary > */ > $this->_tisFile = null; > $this->_frqFile = null; > $this->_prxFile = null; > > $this->_lastTerm = null; > $this->_lastTermInfo = null; > $this->_lastTermPositions = null; > > return; > } > $searchDicField = $this->_getFieldPosition($searchField); > > // search for appropriate value in dictionary > $lowIndex = 0; > $highIndex = count($this->_termDictionary)-1; > while ($highIndex >= $lowIndex) { > // $mid = ($highIndex - $lowIndex)/2; > $mid = ($highIndex + $lowIndex) >> 1; > $midTerm = $this->_termDictionary[$mid]; > > $fieldNum = $this->_getFieldPosition($midTerm[0] /* field */); > $delta = $searchDicField - $fieldNum; > if ($delta == 0) { > $delta = strcmp($prefix->text, $midTerm[1] /* text */); > } > > if ($delta < 0) { > $highIndex = $mid-1; > } elseif ($delta > 0) { > $lowIndex = $mid+1; > } else { > // We have reached term we are looking for > break; > } > } > > if ($highIndex == -1) { > // Term is out of the dictionary range > $this->_tisFile = null; > $this->_frqFile = null; > $this->_prxFile = null; > > $this->_lastTerm = null; > $this->_lastTermInfo = null; > $this->_lastTermPositions = null; > > return; > } > > $prevPosition = $highIndex; > $prevTerm = $this->_termDictionary[$prevPosition]; > $prevTermInfo = $this->_termDictionaryInfos[$prevPosition]; > > if ($this->_tisFile === null) { > // The end of terms stream is reached and terms dictionary file is closed > // Perform mini-reset operation > $this->_tisFile = $this->openCompoundFile('.tis', false); > > if ($this->_termsScanMode == self::SM_FULL_INFO || $this->_termsScanMode == self::SM_MERGE_INFO) { > $this->_frqFile = $this->openCompoundFile('.frq', false); > $this->_prxFile = $this->openCompoundFile('.prx', false); > } > } > $this->_tisFile->seek($this->_tisFileOffset + $prevTermInfo[4], SEEK_SET); > > $this->_lastTerm = new Zend_Search_Lucene_Index_Term($prevTerm[1] /* text */, > ($prevTerm[0] == -1) ? '' : $this->_fields[$prevTerm[0] /* field */]->name); > $this->_lastTermInfo = new Zend_Search_Lucene_Index_TermInfo($prevTermInfo[0] /* docFreq */, > $prevTermInfo[1] /* freqPointer */, > $prevTermInfo[2] /* proxPointer */, > $prevTermInfo[3] /* skipOffset */); > $this->_termCount = $this->_termNum - $prevPosition*$this->_indexInterval; > > if ($highIndex == 0) { > // skip start entry > $this->nextTerm(); > } else if ($prefix->field == $this->_lastTerm->field && $prefix->text == $this->_lastTerm->text) { > // We got exact match in the dictionary index > > if ($this->_termsScanMode == self::SM_FULL_INFO || $this->_termsScanMode == self::SM_MERGE_INFO) { > $this->_lastTermPositions = array(); > > $this->_frqFile->seek($this->_lastTermInfo->freqPointer + $this->_frqFileOffset, SEEK_SET); > $freqs = array(); $docId = 0; > for( $count = 0; $count < $this->_lastTermInfo->docFreq; $count++ ) { > $docDelta = $this->_frqFile->readVInt(); > if( $docDelta % 2 == 1 ) { > $docId += ($docDelta-1)/2; > $freqs[ $docId ] = 1; > } else { > $docId += $docDelta/2; > $freqs[ $docId ] = $this->_frqFile->readVInt(); > } > } > > $this->_prxFile->seek($this->_lastTermInfo->proxPointer + $this->_prxFileOffset, SEEK_SET); > foreach ($freqs as $docId => $freq) { > $termPosition = 0; $positions = array(); > > for ($count = 0; $count < $freq; $count++ ) { > $termPosition += $this->_prxFile->readVInt(); > $positions[] = $termPosition; > } > > if (isset($this->_docMap[$docId])) { > $this->_lastTermPositions[$this->_docMap[$docId]] = $positions; > } > } > } > > return; > } > > // Search term matching specified prefix > while ($this->_lastTerm !== null) { > if ( strcmp($this->_lastTerm->field, $prefix->field) > 0 || > ($prefix->field == $this->_lastTerm->field && strcmp($this->_lastTerm->text, $prefix->text) >= 0) ) { > // Current term matches or greate than the pattern > return; > } > > $this->nextTerm(); > } 962,963c1370,1373 < $this->_lastTerm = null; < $this->_lastTermInfo = null; --- > $this->_lastTerm = null; > $this->_lastTermInfo = null; > $this->_lastTermPositions = null; > $this->_docMap = null; 992c1402,1403 < $this->_lastTermPositions = array(); --- > if ($this->_termsScanMode == self::SM_FULL_INFO || $this->_termsScanMode == self::SM_MERGE_INFO) { > $this->_lastTermPositions = array(); 994,1003c1405,1415 < $this->_frqFile->seek($this->_lastTermInfo->freqPointer + $this->_frqFileOffset, SEEK_SET); < $freqs = array(); $docId = 0; < for( $count = 0; $count < $this->_lastTermInfo->docFreq; $count++ ) { < $docDelta = $this->_frqFile->readVInt(); < if( $docDelta % 2 == 1 ) { < $docId += ($docDelta-1)/2; < $freqs[ $docId ] = 1; < } else { < $docId += $docDelta/2; < $freqs[ $docId ] = $this->_frqFile->readVInt(); --- > $this->_frqFile->seek($this->_lastTermInfo->freqPointer + $this->_frqFileOffset, SEEK_SET); > $freqs = array(); $docId = 0; > for( $count = 0; $count < $this->_lastTermInfo->docFreq; $count++ ) { > $docDelta = $this->_frqFile->readVInt(); > if( $docDelta % 2 == 1 ) { > $docId += ($docDelta-1)/2; > $freqs[ $docId ] = 1; > } else { > $docId += $docDelta/2; > $freqs[ $docId ] = $this->_frqFile->readVInt(); > } 1005d1416 < } 1007,1014c1418,1425 < $this->_prxFile->seek($this->_lastTermInfo->proxPointer + $this->_prxFileOffset, SEEK_SET); < foreach ($freqs as $docId => $freq) { < $termPosition = 0; $positions = array(); < < for ($count = 0; $count < $freq; $count++ ) { < $termPosition += $this->_prxFile->readVInt(); < $positions[] = $termPosition; < } --- > $this->_prxFile->seek($this->_lastTermInfo->proxPointer + $this->_prxFileOffset, SEEK_SET); > foreach ($freqs as $docId => $freq) { > $termPosition = 0; $positions = array(); > > for ($count = 0; $count < $freq; $count++ ) { > $termPosition += $this->_prxFile->readVInt(); > $positions[] = $termPosition; > } 1016,1017c1427,1429 < if (isset($this->_docMap[$docId])) { < $this->_lastTermPositions[$this->_docMap[$docId]] = $positions; --- > if (isset($this->_docMap[$docId])) { > $this->_lastTermPositions[$this->_docMap[$docId]] = $positions; > } 1021d1432 < 1031a1443,1460 > /** > * Close terms stream > * > * Should be used for resources clean up if stream is not read up to the end > */ > public function closeTermsStream() > { > $this->_tisFile = null; > $this->_frqFile = null; > $this->_prxFile = null; > > $this->_lastTerm = null; > $this->_lastTermInfo = null; > $this->_lastTermPositions = null; > > $this->_docMap = null; > } > 1036d1464 < * @param Zend_Search_Lucene_Index_Term $term Index: Zend/Search/Lucene/Index/SegmentInfoPriorityQueue.php =================================================================== RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Index/SegmentInfoPriorityQueue.php,v retrieving revision 1.1 diff -r1.1 SegmentInfoPriorityQueue.php 18c18 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) 24c24 < require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Exception.php'; --- > require_once 'Zend/Search/Lucene/Exception.php'; 27c27 < require_once $CFG->dirroot.'/search/Zend/Search/Lucene/PriorityQueue.php'; --- > require_once 'Zend/Search/Lucene/PriorityQueue.php'; 34c34 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) Index: Zend/Search/Lucene/Index/SegmentMerger.php =================================================================== RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Index/SegmentMerger.php,v retrieving revision 1.1 diff -r1.1 SegmentMerger.php 18c18 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) 24c24 < require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Exception.php'; --- > require_once 'Zend/Search/Lucene/Exception.php'; 27c27 < require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/SegmentInfo.php'; --- > require_once 'Zend/Search/Lucene/Index/SegmentInfo.php'; 30c30 < require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/SegmentWriter/StreamWriter.php'; --- > require_once 'Zend/Search/Lucene/Index/SegmentWriter/StreamWriter.php'; 33c33 < require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/SegmentInfoPriorityQueue.php'; --- > require_once 'Zend/Search/Lucene/Index/SegmentInfoPriorityQueue.php'; 40c40 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) 235c235 < $segmentStartId = $segmentInfo->reset($segmentStartId, true); --- > $segmentStartId = $segmentInfo->reset($segmentStartId, Zend_Search_Lucene_Index_SegmentInfo::SM_MERGE_INFO); Index: Zend/Search/Lucene/Index/SegmentWriter.php =================================================================== RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Index/SegmentWriter.php,v retrieving revision 1.2 diff -r1.2 SegmentWriter.php 18c18 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) 24c24 < require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Exception.php'; --- > require_once 'Zend/Search/Lucene/Exception.php'; 27c27 < require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/SegmentInfo.php'; --- > require_once 'Zend/Search/Lucene/Index/SegmentInfo.php'; 34c34 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) 50c50,51 < /** Expert: The fraction of TermDocs entries stored in skip tables. --- > /** > * Expert: The fraction of TermDocs entries stored in skip tables. 56,57c57,59 < * 0x0x7FFFFFFF indicates that we don't use skip data < * Default value is 16 --- > * 0x7FFFFFFF indicates that we don't use skip data > * > * Note: not used in current implementation 63a66,77 > * Expert: The maximum number of skip levels. Smaller values result in > * slightly smaller indexes, but slower skipping in big posting lists. > * > * 0 indicates that we don't use skip data > * > * Note: not used in current implementation > * > * @var integer > */ > public static $maxSkipLevels = 0; > > /** 248a263,272 > * Return segment name > * > * @return string > */ > public function getName() > { > return $this->_name; > } > > /** 255a280,285 > $nrmFile = $this->_directory->createFile($this->_name . '.nrm'); > // Write header > $nrmFile->writeBytes('NRM'); > // Write format specifier > $nrmFile->writeByte((int)0xFF); > 265,268c295,301 < $normFileName = $this->_name . '.f' . $field->number; < $fFile = $this->_directory->createFile($normFileName); < $fFile->writeBytes($this->_norms[$field->name]); < $this->_files[] = $normFileName; --- > // pre-2.1 index mode (not used now) > // $normFileName = $this->_name . '.f' . $field->number; > // $fFile = $this->_directory->createFile($normFileName); > // $fFile->writeBytes($this->_norms[$field->name]); > // $this->_files[] = $normFileName; > > $nrmFile->writeBytes($this->_norms[$field->name]); 272a306 > $this->_files[] = $this->_name . '.nrm'; 354c388 < $this->_tisFile->writeInt((int)0xFFFFFFFE); --- > $this->_tisFile->writeInt((int)0xFFFFFFFD); 357a392 > $this->_tisFile->writeInt(self::$maxSkipLevels); 360c395 < $this->_tiiFile->writeInt((int)0xFFFFFFFE); --- > $this->_tiiFile->writeInt((int)0xFFFFFFFD); 363a399 > $this->_tiiFile->writeInt(self::$maxSkipLevels); 373c409 < $this->_tiiFile->writeVInt(20); // IndexDelta --- > $this->_tiiFile->writeVInt(24); // IndexDelta 387c423 < $this->_lastIndexPosition = 20; --- > $this->_lastIndexPosition = 24; Index: Zend/Search/Lucene/Index/Term.php =================================================================== RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Index/Term.php,v retrieving revision 1.2 diff -r1.2 Term.php 18c18 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) 34c34 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) 76a77 > * @param string $str 106a108,141 > > /** > * Get UTF-8 string length > * > * @param string $str > * @return string > */ > public static function getLength($str) > { > $bytes = 0; > $chars = 0; > while ($bytes < strlen($str)) { > $charBytes = 1; > if ((ord($str[$bytes]) & 0xC0) == 0xC0) { > $charBytes++; > if (ord($str[$bytes]) & 0x20 ) { > $charBytes++; > if (ord($str[$bytes]) & 0x10 ) { > $charBytes++; > } > } > } > > if ($bytes + $charBytes > strlen($str)) { > // wrong character > break; > } > > $chars++; > $bytes += $charBytes; > } > > return $chars; > } Index: Zend/Search/Lucene/Index/TermInfo.php =================================================================== RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Index/TermInfo.php,v retrieving revision 1.2 diff -r1.2 TermInfo.php 18c18 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) 29c29 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) Index: Zend/Search/Lucene/Index/Writer.php =================================================================== RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Index/Writer.php,v retrieving revision 1.2 diff -r1.2 Writer.php 18c18 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) 23,24c23,24 < /** Zend_Search_Lucene_Index_SegmentWriter_ */ < require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/SegmentWriter/DocumentWriter.php'; --- > /** Zend_Search_Lucene_Index_SegmentWriter_DocumentWriter */ > require_once 'Zend/Search/Lucene/Index/SegmentWriter/DocumentWriter.php'; 27c27 < require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/SegmentInfo.php'; --- > require_once 'Zend/Search/Lucene/Index/SegmentInfo.php'; 30c30,33 < require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/SegmentMerger.php'; --- > require_once 'Zend/Search/Lucene/Index/SegmentMerger.php'; > > /** Zend_Search_Lucene_LockManager */ > require_once 'Zend/Search/Lucene/LockManager.php'; 38c41 < * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) --- > * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) 156a160 > 158,163c162 < * Opens the index for writing < * < * IndexWriter constructor needs Directory as a parameter. It should be < * a string with a path to the index folder or a Directory object. < * Second constructor parameter create is optional - true to create the < * index or overwrite the existing one. --- > * Create empty index 166,167c165,166 < * @param array $segmentInfos < * @param boolean $create --- > * @param integer $generation > * @param int