Index: add.php
===================================================================
RCS file: /cvsroot/moodle/moodle/search/add.php,v
retrieving revision 1.5.2.3
diff -r1.5.2.3 add.php
2,22c2,28
< /**
< * Global Search Engine for Moodle
< *
< * @package search
< * @category core
< * @subpackage search_engine
< * @author Michael Champanis (mchampan) [cynnical@gmail.com], Valery Fremaux [valery.fremaux@club-internet.fr] > 1.8
< * @date 2008/03/31
< * @license http://www.gnu.org/copyleft/gpl.html GNU Public License
< *
< * Asynchronous adder for new indexable contents
< *
< * Major chages in this review is passing the xxxx_db_names return to
< * multiple arity to handle multiple document types modules
< */
<
< /**
< * includes and requires
< */
< require_once('../config.php');
< require_once("$CFG->dirroot/search/lib.php");
---
> /**
> * Global Search Engine for Moodle
> *
> * @package search
> * @category core
> * @subpackage search_engine
> * @author Michael Champanis (mchampan) [cynnical@gmail.com], Valery Fremaux [valery.fremaux@club-internet.fr] > 1.8
> * @date 2008/03/31
> * @license http://www.gnu.org/copyleft/gpl.html GNU Public License
> *
> * Asynchronous adder for new indexable contents
> *
> * Major chages in this review is passing the xxxx_db_names return to
> * multiple arity to handle multiple document types modules
> */
>
> /**
> * includes and requires
> */
> require_once('../config.php');
>
> /// makes inclusions of the Zend Engine more reliable
> $separator = (array_key_exists('WINDIR', $_SERVER)) ? ';' : ':' ;
> ini_set('include_path', $CFG->dirroot.'/search'.$separator.ini_get('include_path'));
>
> require_once("$CFG->dirroot/search/lib.php");
> require_once("$CFG->dirroot/search/indexlib.php");
32c38
< if (!isadmin()) {
---
> if (!has_capability('moodle/site:doanything', get_context_instance(CONTEXT_SYSTEM))) {
38,46c44,49
< if (!search_check_php5()) {
< $phpversion = phpversion();
< mtrace("Sorry, global search requires PHP 5.0.0 or later (currently using version ".phpversion().")");
< exit(0);
< }
<
< require_once("$CFG->dirroot/search/indexlib.php");
<
< $index = new Zend_Search_Lucene(SEARCH_INDEX_PATH);
---
> try {
> $index = new Zend_Search_Lucene(SEARCH_INDEX_PATH);
> } catch(LuceneException $e) {
> mtrace("Could not construct a valid index. Maybe the first indexation was never made, or files might be corrupted. Run complete indexation again.");
> return;
> }
91c94
< doctype = '{$mod->name}'
---
> doctype = ?
94c97
< $docIds = get_records_sql_menu($query);
---
> $docIds = $DB->get_records_sql_menu($query, array($mod->name));
158c161
< ?>
---
> ?>
\ No newline at end of file
Index: cron.php
===================================================================
RCS file: /cvsroot/moodle/moodle/search/cron.php,v
retrieving revision 1.10.2.2
diff -r1.10.2.2 cron.php
21,25c21
< if (!search_check_php5()) {
< $phpversion = phpversion();
< mtrace("Sorry, cannot cron global search as it requires PHP 5.0.0 or later (currently using version $phpversion)");
< }
< else if (empty($CFG->enableglobalsearch)) {
---
> if (empty($CFG->enableglobalsearch)) {
Index: delete.php
===================================================================
RCS file: /cvsroot/moodle/moodle/search/delete.php,v
retrieving revision 1.6.2.3
diff -r1.6.2.3 delete.php
2,22c2,25
< /**
< * Global Search Engine for Moodle
< *
< * @package search
< * @category core
< * @subpackage search_engine
< * @author Michael Champanis (mchampan) [cynnical@gmail.com], Valery Fremaux [valery.fremaux@club-internet.fr] > 1.8
< * @date 2008/03/31
< * @license http://www.gnu.org/copyleft/gpl.html GNU Public License
< *
< * Asynchronous index cleaner
< *
< * Major chages in this review is passing the xxxx_db_names return to
< * multiple arity to handle multiple document types modules
< */
<
< /**
< * includes and requires
< */
< require_once('../config.php');
< require_once("$CFG->dirroot/search/lib.php");
---
> /**
> * Global Search Engine for Moodle
> *
> * @package search
> * @category core
> * @subpackage search_engine
> * @author Michael Champanis (mchampan) [cynnical@gmail.com], Valery Fremaux [valery.fremaux@club-internet.fr] > 1.8
> * @date 2008/03/31
> * @license http://www.gnu.org/copyleft/gpl.html GNU Public License
> *
> * Asynchronous index cleaner
> *
> * Major chages in this review is passing the xxxx_db_names return to
> * multiple arity to handle multiple document types modules
> */
>
> /**
> * includes and requires
> */
> require_once('../config.php');
>
> /// makes inclusions of the Zend Engine more reliable
> $separator = (array_key_exists('WINDIR', $_SERVER)) ? ';' : ':' ;
> ini_set('include_path', $CFG->dirroot.'/search'.$separator.ini_get('include_path'));
23a27,31
> require_once("$CFG->dirroot/search/lib.php");
> require_once("$CFG->dirroot/search/indexlib.php");
>
>
> /// checks global search activation
26c34
<
---
>
31c39
< if (!isadmin()) {
---
> if (!has_capability('moodle/site:doanything', get_context_instance(CONTEXT_SYSTEM))) {
35,39c43,47
< //check for php5 (lib.php)
< if (!search_check_php5()) {
< $phpversion = phpversion();
< mtrace("Sorry, global search requires PHP 5.0.0 or later (currently using version ".phpversion().")");
< exit(0);
---
> try {
> $index = new Zend_Search_Lucene(SEARCH_INDEX_PATH);
> } catch(LuceneException $e) {
> mtrace("Could not construct a valid index. Maybe the first indexation was never made, or files might be corrupted. Run complete indexation again.");
> return;
41,44d48
<
< require_once("$CFG->dirroot/search/indexlib.php");
<
< $index = new Zend_Search_Lucene(SEARCH_INDEX_PATH);
51a56
> /// check all modules
142c147
< ?>
---
> ?>
\ No newline at end of file
Index: indexer.php
===================================================================
RCS file: /cvsroot/moodle/moodle/search/indexer.php,v
retrieving revision 1.13.2.4
diff -r1.13.2.4 indexer.php
37a38,41
> //require_once("debugging.php");
>
> $separator = (array_key_exists('WINDIR', $_SERVER)) ? ';' : ':' ;
> ini_set('include_path', $CFG->dirroot.'\search'.$separator.ini_get('include_path'));
47c51
< if (!isadmin()) {
---
> if (!has_capability('moodle/site:doanything', get_context_instance(CONTEXT_SYSTEM))) {
64,69d67
< if (!search_check_php5()) {
< $phpversion = phpversion();
< mtrace("Sorry, global search requires PHP 5.0.0 or later (currently using version ".phpversion().")");
< exit(0);
< }
<
97c95,96
< } else {
---
> }
> else {
100c99,100
< } else {
---
> }
> else {
103a104
> Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8());
127,129d127
< $searchables = array();
<
< /// collects modules
131,146c129,131
< if ($mods = get_records('modules', '', '', '', 'id,name')) {
< $searchables = array_merge($searchables, $mods);
< }
< mtrace(count($searchables).' modules found.');
<
< // collects blocks as indexable information may be found in blocks either
< if ($blocks = get_records('block', '', '', '', 'id,name')) {
< // prepend the "block_" prefix to discriminate document type plugins
< foreach(array_keys($blocks) as $aBlockId){
< $blocks[$aBlockId]->name = 'block_'.$blocks[$aBlockId]->name;
< }
< $searchables = array_merge($searchables, $blocks);
< mtrace(count($blocks).' blocks found.');
< }
<
< /// add virtual modules onto the back of the array
---
> $searchables = search_collect_searchables();
>
> /// start indexation
148d132
< $searchables = array_merge($searchables, search_get_additional_modules());
151c135,158
< $class_file = $CFG->dirroot.'/search/documents/'.$mod->name.'_document.php';
---
>
> $key = 'search_in_'.$mod->name;
> if (isset($CFG->$key) && !$CFG->$key) {
> mtrace("module $key has been administratively disabled. Skipping...\n");
> continue;
> }
>
> if ($mod->location == 'internal'){
> $class_file = $CFG->dirroot.'/search/documents/'.$mod->name.'_document.php';
> } else {
> $class_file = $CFG->dirroot.'/'.$mod->location.'/'.$mod->name.'/search_document.php';
> }
>
> /*
> if (!file_exists($class_file)){
> if (defined("PATH_FOR_SEARCH_TYPE_{$mod->name}")){
> eval("\$pluginpath = PATH_FOR_SEARCH_TYPE_{$mod->name}");
> $class_file = "{$CFG->dirroot}/{$pluginpath}/searchlib.php";
> } else {
> mtrace ("No search document found for plugin {$mod->name}. Ignoring.");
> continue;
> }
> }
> */
197a205,206
> } else {
> mtrace ("No search document found for plugin {$mod->name}. Ignoring.");
Index: indexersplash.php
===================================================================
RCS file: /cvsroot/moodle/moodle/search/indexersplash.php,v
retrieving revision 1.9.2.4
diff -r1.9.2.4 indexersplash.php
21c21,25
< require_once("$CFG->dirroot/search/lib.php");
---
> require_once("{$CFG->dirroot}/search/lib.php");
>
> /// makes inclusions of the Zend Engine more reliable
> $separator = (array_key_exists('WINDIR', $_SERVER)) ? ';' : ':' ;
> ini_set('include_path', $CFG->dirroot.'\search'.$separator.ini_get('include_path'));
31c35
< if (!isadmin()) {
---
> if (!has_capability('moodle/site:doanything', get_context_instance(CONTEXT_SYSTEM))) {
35,42d38
< /// check for php5 (lib.php)
<
< if (!search_check_php5()) {
< $phpversion = phpversion();
< mtrace("Sorry, global search requires PHP 5.0.0 or later (currently using version ".phpversion().")");
< exit(0);
< }
<
Index: indexlib.php
===================================================================
RCS file: /cvsroot/moodle/moodle/search/indexlib.php,v
retrieving revision 1.7.2.3
diff -r1.7.2.3 indexlib.php
81c81,82
< $types = search_get_document_types();
---
> // $types = search_get_document_types();
> $types = search_collect_searchables(true, false);
Index: lib.php
===================================================================
RCS file: /cvsroot/moodle/moodle/search/lib.php,v
retrieving revision 1.13.2.2
diff -r1.13.2.2 lib.php
20a21
> function search_collect_searchables($namelist=false, $verbose=true){
21a23
> function search_get_document_types($prefix = 'X_SEARCH_TYPE_') {
25d26
< function search_check_php5($feedback = false) {
33,50c34,35
< //document types that can be searched
< //define('SEARCH_TYPE_NONE', 'none');
< define('SEARCH_TYPE_WIKI', 'wiki');
< define('PATH_FOR_SEARCH_TYPE_WIKI', 'mod/wiki');
< define('SEARCH_TYPE_FORUM', 'forum');
< define('PATH_FOR_SEARCH_TYPE_FORUM', 'mod/forum');
< define('SEARCH_TYPE_GLOSSARY', 'glossary');
< define('PATH_FOR_SEARCH_TYPE_GLOSSARY', 'mod/glossary');
< define('SEARCH_TYPE_RESOURCE', 'resource');
< define('PATH_FOR_SEARCH_TYPE_RESOURCE', 'mod/resource');
< define('SEARCH_TYPE_TECHPROJECT', 'techproject');
< define('PATH_FOR_SEARCH_TYPE_TECHPROJECT', 'mod/techproject');
< define('SEARCH_TYPE_DATA', 'data');
< define('PATH_FOR_SEARCH_TYPE_DATA', 'mod/data');
< define('SEARCH_TYPE_CHAT', 'chat');
< define('PATH_FOR_SEARCH_TYPE_CHAT', 'mod/chat');
< define('SEARCH_TYPE_LESSON', 'lesson');
< define('PATH_FOR_SEARCH_TYPE_LESSON', 'mod/lesson');
---
> // get document types
> include "{$CFG->dirroot}/search/searchtypes.php";
53c38,109
< * returns all the document type constants
---
> * collects all searchable items identities
> * @param boolean $namelist if true, only returns list of names of searchable items
> * @param boolean $verbose if true, prints a discovering status
> * @return an array of names or an array of type descriptors
> */
> function search_collect_searchables($namelist=false, $verbose=true){
> global $CFG;
>
> $searchables = array();
> $searchables_names = array();
>
> /// get all installed modules
> if ($mods = get_records('modules', '', '', 'name', 'id,name')){
>
> $searchabletypes = array_values(search_get_document_types());
>
> foreach($mods as $mod){
> if (in_array($mod->name, $searchabletypes)){
> $mod->location = 'internal';
> $searchables[$mod->name] = $mod;
> $searchables_names[] = $mod->name;
> } else {
> $documentfile = $CFG->dirroot."/mod/{$mod->name}/search_document.php";
> $mod->location = 'mod';
> if (file_exists($documentfile)){
> $searchables[$mod->name] = $mod;
> $searchables_names[] = $mod->name;
> }
> }
> }
> if ($verbose) mtrace(count($searchables).' modules to search in / '.count($mods).' modules found.');
> }
>
> /// collects blocks as indexable information may be found in blocks either
> if ($blocks = get_records('block', '', '', 'name', 'id,name')) {
> $blocks_searchables = array();
> // prepend the "block_" prefix to discriminate document type plugins
> foreach($blocks as $block){
> $block->dirname = $block->name;
> $block->name = 'block_'.$block->name;
> if (in_array('SEARCH_TYPE_'.strtoupper($block->name), $searchabletypes)){
> $mod->location = 'internal';
> $blocks_searchables[] = $block;
> $searchables_names[] = $block->name;
> } else {
> $documentfile = $CFG->dirroot."/blocks/{$block->dirname}/search_document.php";
> if (file_exists($documentfile)){
> $mod->location = 'blocks';
> $blocks_searchables[$block->name] = $block;
> $searchables_names[] = $block->name;
> }
> }
> }
> if ($verbose) mtrace(count($blocks_searchables).' blocks to search in / '.count($blocks).' blocks found.');
> $searchables = array_merge($searchables, $blocks_searchables);
> }
>
> /// add virtual modules onto the back of the array
>
> $additional = search_get_additional_modules();
> if (!empty($additional)){
> if ($verbose) mtrace(count($additional).' additional to search in.');
> $searchables = array_merge($searchables, $additional);
> }
>
> if ($namelist)
> return $searchables_names;
> return $searchables;
> }
>
> /**
> * returns all the document type constants that are known in core implementation
66c122
< } //search_get_document_types
---
> }
77a134,137
> if (defined('SEARCH_EXTRAS')){
> $extras = explode(',', SEARCH_EXTRAS);
> }
>
81c141,142
< $ret[] = clone($temp);
---
> $temp->location = 'internal';
> $ret[$temp->name] = clone($temp);
117,133d177
< * get a real php 5 version number, using 5.0.0 arbitrarily
< * @param feedback if true, prints a feedback message to output.
< * @return true if version of PHP is high enough
< */
< function search_check_php5($feedback = false) {
< if (!check_php_version("5.0.0")) {
< if ($feedback) {
< print_heading(get_string('versiontoolow', 'search'));
< }
< return false;
< }
< else {
< return true;
< }
< } //search_check_php5
<
< /**
Index: query.php
===================================================================
RCS file: /cvsroot/moodle/moodle/search/query.php,v
retrieving revision 1.16.2.5
diff -r1.16.2.5 query.php
53d52
< if ($check = search_check_php5()) {
59a59,67
>
> /**
> * discard harmfull searches
> */
> if (preg_match("/^[\*\?]+$/", $query_string)){
> $query_string = '';
> $error = get_string('fullwildcardquery','search');
> }
>
64,65c72
< }
< else if ($advanced) {
---
> } elseif ($advanced) {
88c95
< // get all available module types
---
> // get all available module types adding third party modules
89a97
> $module_types = array_merge($module_types, array_values(search_get_document_types('X_SEARCH_TYPE')));
136c144,145
< //run the query against the index
---
> //run the query against the index ensuring internal coding works in UTF-8
> Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8());
138d146
< }
159,164c167,169
< //keep things pretty, even if php5 isn't available
< if (!$check) {
< print_heading(search_check_php5(true));
< print_footer();
< exit(0);
< }
---
> if (!empty($error)){
> notice ($error);
> }
181d185
<
290c294
< if (!$sq->is_valid_index() and isadmin()) {
---
> if (!$sq->is_valid_index() and has_capability('moodle/site:doanything', get_context_instance(CONTEXT_SYSTEM))) {
299c303,304
< // prints all the results in a box
---
> /// prints all the results in a box
>
326a332,334
>
> $searchables = search_collect_searchables(false, false);
>
327a336,339
>
> $iconpath = $CFG->modpixpath.'/'.$listing->doctype.'/icon.gif';
> $coursename = get_field('course', 'fullname', 'id', $listing->courseid);
> $courseword = mb_convert_case(get_string('course', 'moodle'), MB_CASE_LOWER, 'UTF-8');
332c344,349
< require_once "{$CFG->dirroot}/search/documents/{$listing->doctype}_document.php";
---
> $searchable_instance = $searchables[$listing->doctype];
> if ($searchable_instance->location == 'internal'){
> require_once "{$CFG->dirroot}/search/documents/{$listing->doctype}_document.php";
> } else {
> require_once "{$CFG->dirroot}/{$searchable_instance->location}/{$listing->doctype}/search_document.php";
> }
337,339c354,360
< print "
url)."'>$listing->title
\n"
< ."".search_shorten_url($listing->url, 70)."
\n"
< ."{$typestr}: ".$listing->doctype.", {$scorestr}: ".round($listing->score, 3);
---
> echo "url)
> ."'>
."\" class=\"activityicon\" alt=\"\"/> $listing->title (".$courseword.": '".$coursename."')
\n";
> // print "url)."'>$listing->title
\n"
> // ."".search_shorten_url($listing->url, 70)."
\n"
> echo "{$typestr}: " . $listing->doctype . ", {$scorestr}: " . round($listing->score, 3);
341c362
< print ", {$authorstr}: ".$listing->author."\n"
---
> echo ", {$authorstr}: ".$listing->author."\n"
345,346c366,367
< print "";
< print $page_links;
---
> echo "";
> echo $page_links;
Index: querylib.php
===================================================================
RCS file: /cvsroot/moodle/moodle/search/querylib.php,v
retrieving revision 1.7.2.3
diff -r1.7.2.3 querylib.php
29c29,30
< $number;
---
> $number,
> $courseid;
223c224,225
<
---
> $page = optional_param('page', 1, PARAM_INT);
>
236c238,242
< $totalpages = ceil($hitcount/$this->results_per_page);
---
> $resultdoc = new SearchResult();
> $resultdocs = array();
> $searchables = search_collect_searchables(false, false);
>
> $realindex = 0;
237a244
> /**
239c246
< if ($hitcount < $this->results_per_page) {
---
> if ($finalresults < $this->results_per_page) {
241c248
< } else if ($this->pagenumber > $totalpages) {
---
> } elseif ($this->pagenumber > $totalpages) {
248,249c255,256
< if ($end > $hitcount) {
< $end = $hitcount;
---
> if ($end > $finalresults) {
> $end = $finalresults;
253,257c260,261
< $end = $hitcount;
< }
<
< $resultdoc = new SearchResult();
< $resultdocs = array();
---
> $end = $finalresults;
> } */
259c263
< for ($i = $start; $i < $end; $i++) {
---
> for ($i = 0; $i < min($hitcount, ($page) * $this->results_per_page); $i++) {
263,272c267,280
< if ($this->can_display($USER, $hit->docid, $hit->doctype, $hit->course_id, $hit->group_id, $hit->path, $hit->itemtype, $hit->context_id )) {
< $resultdoc->number = $i;
< $resultdoc->url = $hit->url;
< $resultdoc->title = $hit->title;
< $resultdoc->score = $hit->score;
< $resultdoc->doctype = $hit->doctype;
< $resultdoc->author = $hit->author;
<
< //and store it
< $resultdocs[] = clone($resultdoc);
---
> if ($this->can_display($USER, $hit->docid, $hit->doctype, $hit->course_id, $hit->group_id, $hit->path, $hit->itemtype, $hit->context_id, $searchables )) {
> if ($i >= ($page - 1) * $this->results_per_page){
> $resultdoc->number = $realindex;
> $resultdoc->url = $hit->url;
> $resultdoc->title = $hit->title;
> $resultdoc->score = $hit->score;
> $resultdoc->doctype = $hit->doctype;
> $resultdoc->author = $hit->author;
> $resultdoc->courseid = $hit->course_id;
>
> //and store it
> $resultdocs[] = clone($resultdoc);
> }
> $realindex++;
278a287,289
> $totalpages = ceil($this->total_results/$this->results_per_page);
>
>
301c312
< //print "Caching disabled!";
---
> // print "Caching disabled!";
373c384
< private function can_display(&$user, $this_id, $doctype, $course_id, $group_id, $path, $item_type, $context_id) {
---
> private function can_display(&$user, $this_id, $doctype, $course_id, $group_id, $path, $item_type, $context_id, &$searchables) {
380c391
< if (isadmin()){
---
> if (has_capability('moodle/site:doanything', get_context_instance(CONTEXT_SYSTEM))){
384,420c395,400
< // first check course compatibility against user : enrolled users to that course can see.
< $myCourses = get_my_courses($user->id);
< $unenroled = !in_array($course_id, array_keys($myCourses));
<
< // if guests are allowed, logged guest can see
< $isallowedguest = (isguest()) ? get_field('course', 'guest', 'id', $course_id) : false ;
<
< if ($unenroled && !$isallowedguest){
< return false;
< }
<
< // if user is enrolled or is allowed user and course is hidden, can he see it ?
< $visibility = get_field('course', 'visible', 'id', $course_id);
< if ($visibility <= 0){
< if (!has_capability('moodle/course:viewhiddencourses', get_context_instance(CONTEXT_COURSE, $course_id))){
< return false;
< }
< }
<
< /**
< * prerecorded capabilities
< */
< // get context caching information and tries to discard unwanted records here
<
<
< /**
< * final checks
< */
< // then give back indexing data to the module for local check
< include_once "{$CFG->dirroot}/search/documents/{$doctype}_document.php";
< $access_check_function = "{$doctype}_check_text_access";
<
< if (function_exists($access_check_function)){
< $modulecheck = $access_check_function($path, $item_type, $this_id, $user, $group_id, $context_id);
< // echo "module said $modulecheck for item $doctype/$item_type/$this_id";
< return($modulecheck);
< }
---
> // first check course compatibility against user : enrolled users to that course can see.
> $myCourses = get_my_courses($user->id);
> $unenroled = !in_array($course_id, array_keys($myCourses));
>
> // if guests are allowed, logged guest can see
> $isallowedguest = (isguest()) ? get_field('course', 'guest', 'id', $course_id) : false ;
422c402,438
< return true;
---
> if ($unenroled && !$isallowedguest){
> return false;
> }
>
> // if user is enrolled or is allowed user and course is hidden, can he see it ?
> $visibility = get_field('course', 'visible', 'id', $course_id);
> if ($visibility <= 0){
> if (!has_capability('moodle/course:viewhiddencourses', get_context_instance(CONTEXT_COURSE, $course_id))){
> return false;
> }
> }
>
> /**
> * prerecorded capabilities
> */
> // get context caching information and tries to discard unwanted records here
>
>
> /**
> * final checks
> */
> // then give back indexing data to the module for local check
> $searchable_instance = $searchables[$doctype];
> if ($searchable_instance->location == 'internal'){
> include_once "{$CFG->dirroot}/search/documents/{$doctype}_document.php";
> } else {
> include_once "{$CFG->dirroot}/{$searchable_instance->location}/$doctype/search_document.php";
> }
> $access_check_function = "{$doctype}_check_text_access";
>
> if (function_exists($access_check_function)){
> $modulecheck = $access_check_function($path, $item_type, $this_id, $user, $group_id, $context_id);
> // echo "module said $modulecheck for item $doctype/$item_type/$this_id";
> return($modulecheck);
> }
>
> return true;
Index: stats.php
===================================================================
RCS file: /cvsroot/moodle/moodle/search/stats.php,v
retrieving revision 1.11.2.3
diff -r1.11.2.3 stats.php
35,36c35
< if ($check = search_check_php5()) {
< require_once("{$CFG->dirroot}/search/indexlib.php");
---
> require_once("{$CFG->dirroot}/search/indexlib.php");
38,39c37
< $indexinfo = new IndexInfo();
< }
---
> $indexinfo = new IndexInfo();
61,66d58
< if (!$check) {
< print_heading(search_check_php5(true));
< print_footer();
< exit(0);
< }
<
80c72
< if (isadmin()) {
---
> if (has_capability('moodle/site:doanything', get_context_instance(CONTEXT_SYSTEM))) {
150c142
< if (isadmin()) {
---
> if (has_capability('moodle/site:doanything', get_context_instance(CONTEXT_SYSTEM))) {
Index: update.php
===================================================================
RCS file: /cvsroot/moodle/moodle/search/update.php,v
retrieving revision 1.5.2.3
diff -r1.5.2.3 update.php
2,22c2,28
< /**
< * Global Search Engine for Moodle
< *
< * @package search
< * @category core
< * @subpackage search_engine
< * @author Michael Champanis (mchampan) [cynnical@gmail.com], Valery Fremaux [valery.fremaux@club-internet.fr] > 1.8
< * @date 2008/03/31
< * @license http://www.gnu.org/copyleft/gpl.html GNU Public License
< *
< * Index asynchronous updator
< *
< * Major chages in this review is passing the xxxx_db_names return to
< * multiple arity to handle multiple document types modules
< */
<
< /**
< * includes and requires
< */
< require_once('../config.php');
< require_once("$CFG->dirroot/search/lib.php");
---
> /**
> * Global Search Engine for Moodle
> *
> * @package search
> * @category core
> * @subpackage search_engine
> * @author Michael Champanis (mchampan) [cynnical@gmail.com], Valery Fremaux [valery.fremaux@club-internet.fr] > 1.8
> * @date 2008/03/31
> * @license http://www.gnu.org/copyleft/gpl.html GNU Public License
> *
> * Index asynchronous updator
> *
> * Major chages in this review is passing the xxxx_db_names return to
> * multiple arity to handle multiple document types modules
> */
>
> /**
> * includes and requires
> */
> require_once('../config.php');
>
> /// makes inclusions of the Zend Engine more reliable
> $separator = (array_key_exists('WINDIR', $_SERVER)) ? ';' : ':' ;
> ini_set('include_path', $CFG->dirroot.'/search'.$separator.ini_get('include_path'));require_login();
>
> require_once("$CFG->dirroot/search/lib.php");
> require_once("$CFG->dirroot/search/indexlib.php");
32c38
< if (!isadmin()) {
---
> if (!has_capability('moodle/site:doanything', get_context_instance(CONTEXT_SYSTEM))) {
36,46c42,47
< /// check for php5 (lib.php)
<
< if (!search_check_php5()) {
< $phpversion = phpversion();
< mtrace("Sorry, global search requires PHP 5.0.0 or later (currently using version ".phpversion().")");
< exit(0);
< }
<
< require_once("$CFG->dirroot/search/indexlib.php");
<
< $index = new Zend_Search_Lucene(SEARCH_INDEX_PATH);
---
> try {
> $index = new Zend_Search_Lucene(SEARCH_INDEX_PATH);
> } catch(LuceneException $e) {
> mtrace("Could not construct a valid index. Maybe the first indexation was never made, or files might be corrupted. Run complete indexation again.");
> return;
> }
88c89
< doctype = '{$mod->name}'
---
> doctype = ?
91c92
< $docIds = get_records_sql_menu($query);
---
> $docIds = $DB->get_records_sql_menu($query, array($mod->name));
cvs diff: Diffing Zend
cvs diff: Diffing Zend/Search
Index: Zend/Search/Exception.php
===================================================================
RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Exception.php,v
retrieving revision 1.2
diff -r1.2 Exception.php
17c17
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
25c25
< require_once $CFG->dirroot.'/search/Zend/Exception.php';
---
> require_once "Zend/Exception.php";
31c31
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
Index: Zend/Search/Lucene.php
===================================================================
RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene.php,v
retrieving revision 1.2
diff -r1.2 Lucene.php
17c17
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
21d20
<
23c22
< require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Exception.php';
---
> require_once "Zend/Search/Lucene/Exception.php";
26c25
< require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Document.php';
---
> require_once "Zend/Search/Lucene/Document.php";
29c28
< require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Document/Html.php';
---
> require_once "Zend/Search/Lucene/Document/Html.php";
31,32c30,31
< /** Zend_Search_Lucene_Storage_Directory */
< require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Storage/Directory/Filesystem.php';
---
> /** Zend_Search_Lucene_Storage_Directory_Filesystem */
> require_once "Zend/Search/Lucene/Storage/Directory/Filesystem.php";
35c34
< require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Storage/File/Memory.php';
---
> require_once "Zend/Search/Lucene/Storage/File/Memory.php";
38c37
< require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/Term.php';
---
> require_once "Zend/Search/Lucene/Index/Term.php";
41c40
< require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/TermInfo.php';
---
> require_once "Zend/Search/Lucene/Index/TermInfo.php";
44c43
< require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/SegmentInfo.php';
---
> require_once "Zend/Search/Lucene/Index/SegmentInfo.php";
47c46
< require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/FieldInfo.php';
---
> require_once "Zend/Search/Lucene/Index/FieldInfo.php";
50c49
< require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/Writer.php';
---
> require_once "Zend/Search/Lucene/Index/Writer.php";
53c52
< require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryParser.php';
---
> require_once "Zend/Search/Lucene/Search/QueryParser.php";
56c55
< require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryHit.php';
---
> require_once "Zend/Search/Lucene/Search/QueryHit.php";
59c58
< require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Similarity.php';
---
> require_once "Zend/Search/Lucene/Search/Similarity.php";
62c61,65
< require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/SegmentInfoPriorityQueue.php';
---
> require_once "Zend/Search/Lucene/Index/SegmentInfoPriorityQueue.php";
>
> /** Zend_Search_Lucene_LockManager */
> require_once "Zend/Search/Lucene/LockManager.php";
>
66c69
< require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Interface.php';
---
> require_once "Zend/Search/Lucene/Interface.php";
69c72
< require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Proxy.php';
---
> require_once "Zend/Search/Lucene/Proxy.php";
75c78
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
89a93,101
> * Result set limit
> *
> * 0 means no limit
> *
> * @var integer
> */
> private static $_resultSetLimit = 0;
>
> /**
133,139d144
< * Index lock object
< *
< * @var Zend_Search_Lucene_Storage_File
< */
< private $_lock;
<
< /**
152a158,163
> /**
> * Current segment generation
> *
> * @var integer
> */
> private $_generation;
153a165
>
175a188,193
> /** Generation retrieving counter */
> const GENERATION_RETRIEVE_COUNT = 10;
>
> /** Pause between generation retrieving attempts in milliseconds */
> const GENERATION_RETRIEVE_PAUSE = 50;
>
177c195
< * Opens the index.
---
> * Get current generation number
179,180c197,199
< * IndexReader constructor needs Directory as a parameter. It should be
< * a string with a path to the index folder or a Directory object.
---
> * Returns generation number
> * 0 means pre-2.1 index format
> * -1 means there are no segments files.
182c201,202
< * @param mixed $directory
---
> * @param Zend_Search_Lucene_Storage_Directory $directory
> * @return integer
185c205
< public function __construct($directory = null, $create = false)
---
> public static function getActualGeneration(Zend_Search_Lucene_Storage_Directory $directory)
187,197c207,226
< if ($directory === null) {
< throw new Zend_Search_Exception('No index directory specified');
< }
<
< if ($directory instanceof Zend_Search_Lucene_Storage_Directory_Filesystem) {
< $this->_directory = $directory;
< $this->_closeDirOnExit = false;
< } else {
< $this->_directory = new Zend_Search_Lucene_Storage_Directory_Filesystem($directory);
< $this->_closeDirOnExit = true;
< }
---
> /**
> * Zend_Search_Lucene uses segments.gen file to retrieve current generation number
> *
> * Apache Lucene index format documentation mentions this method only as a fallback method
> *
> * Nevertheless we use it according to the performance considerations
> *
> * @todo check if we can use some modification of Apache Lucene generation determination algorithm
> * without performance problems
> */
>
> try {
> for ($count = 0; $count < self::GENERATION_RETRIEVE_COUNT; $count++) {
> // Try to get generation file
> $genFile = $directory->getFileObject('segments.gen', false);
>
> $format = $genFile->readInt();
> if ($format != (int)0xFFFFFFFE) {
> throw new Zend_Search_Lucene_Exception('Wrong segments.gen file format');
> }
198a228,229
> $gen1 = $genFile->readLong();
> $gen2 = $genFile->readLong();
200,201c231,233
< // Get a shared lock to the index
< $this->_lock = $this->_directory->createFile('index.lock');
---
> if ($gen1 == $gen2) {
> return $gen1;
> }
203c235,236
< $this->_segmentInfos = array();
---
> usleep(self::GENERATION_RETRIEVE_PAUSE * 1000);
> }
205,208c238,256
< if ($create) {
< // Throw an exception if index is under processing now
< if (!$this->_lock->lock(LOCK_EX, true)) {
< throw new Zend_Search_Lucene_Exception('Can\'t create index. It\'s under processing now');
---
> // All passes are failed
> throw new Zend_Search_Lucene_Exception('Index is under processing now');
> } catch (Zend_Search_Lucene_Exception $e) {
> if (strpos($e->getMessage(), 'is not readable') !== false) {
> try {
> // Try to open old style segments file
> $segmentsFile = $directory->getFileObject('segments', false);
>
> // It's pre-2.1 index
> return 0;
> } catch (Zend_Search_Lucene_Exception $e) {
> if (strpos($e->getMessage(), 'is not readable') !== false) {
> return -1;
> } else {
> throw $e;
> }
> }
> } else {
> throw $e;
209a258
> }
211,212c260,261
< // Writer will create segments file for empty segments list
< $this->_writer = new Zend_Search_Lucene_Index_Writer($this->_directory, $this->_segmentInfos, true);
---
> return -1;
> }
214,222c263,272
< if (!$this->_lock->lock(LOCK_SH)) {
< throw new Zend_Search_Lucene_Exception('Can\'t reduce lock level from Exclusive to Shared');
< }
< } else {
< // Wait if index is under switching from one set of segments to another (Index_Writer::_updateSegments())
< if (!$this->_lock->lock(LOCK_SH)) {
< throw new Zend_Search_Lucene_Exception('Can\'t obtain shared index lock');
< }
< $this->_writer = null;
---
> /**
> * Get segments file name
> *
> * @param integer $generation
> * @return string
> */
> public static function getSegmentFileName($generation)
> {
> if ($generation == 0) {
> return 'segments';
224a275,276
> return 'segments_' . base_convert($generation, 10, 36);
> }
225a278,282
> /**
> * Read segments file for pre-2.1 Lucene index format
> */
> private function _readPre21SegmentsFile()
> {
251,252c308,375
< $this->_segmentInfos[] =
< new Zend_Search_Lucene_Index_SegmentInfo($segName,
---
> $this->_segmentInfos[$segName] =
> new Zend_Search_Lucene_Index_SegmentInfo($this->_directory,
> $segName,
> $segSize);
> }
> }
>
> /**
> * Read segments file
> *
> * @throws Zend_Search_Lucene_Exception
> */
> private function _readSegmentsFile()
> {
> $segmentsFile = $this->_directory->getFileObject(self::getSegmentFileName($this->_generation));
>
> $format = $segmentsFile->readInt();
>
> if ($format != (int)0xFFFFFFFD) {
> throw new Zend_Search_Lucene_Exception('Wrong segments file format');
> }
>
> // read version
> // $segmentsFile->readLong();
> $segmentsFile->readInt(); $segmentsFile->readInt();
>
> // read segment name counter
> $segmentsFile->readInt();
>
> $segments = $segmentsFile->readInt();
>
> $this->_docCount = 0;
>
> // read segmentInfos
> for ($count = 0; $count < $segments; $count++) {
> $segName = $segmentsFile->readString();
> $segSize = $segmentsFile->readInt();
>
> // 2.1+ specific properties
> //$delGen = $segmentsFile->readLong();
> $delGenHigh = $segmentsFile->readInt();
> $delGenLow = $segmentsFile->readInt();
> if ($delGenHigh == (int)0xFFFFFFFF && $delGenLow == (int)0xFFFFFFFF) {
> $delGen = -1; // There are no deletes
> } else {
> $delGen = ($delGenHigh << 32) | $delGenLow;
> }
>
> $hasSingleNormFile = $segmentsFile->readByte();
> $numField = $segmentsFile->readInt();
>
> $normGens = array();
> if ($numField != (int)0xFFFFFFFF) {
> for ($count1 = 0; $count1 < $numField; $count1++) {
> $normGens[] = $segmentsFile->readLong();
> }
>
> throw new Zend_Search_Lucene_Exception('Separate norm files are not supported. Optimize index to use it with Zend_Search_Lucene.');
> }
>
> $isCompound = $segmentsFile->readByte();
>
>
> $this->_docCount += $segSize;
>
> $this->_segmentInfos[$segName] =
> new Zend_Search_Lucene_Index_SegmentInfo($this->_directory,
> $segName,
254c377,403
< $this->_directory);
---
> $delGen,
> $hasSingleNormFile,
> $isCompound);
> }
> }
>
> /**
> * Opens the index.
> *
> * IndexReader constructor needs Directory as a parameter. It should be
> * a string with a path to the index folder or a Directory object.
> *
> * @param mixed $directory
> * @throws Zend_Search_Lucene_Exception
> */
> public function __construct($directory = null, $create = false)
> {
> if ($directory === null) {
> throw new Zend_Search_Exception('No index directory specified');
> }
>
> if ($directory instanceof Zend_Search_Lucene_Storage_Directory_Filesystem) {
> $this->_directory = $directory;
> $this->_closeDirOnExit = false;
> } else {
> $this->_directory = new Zend_Search_Lucene_Storage_Directory_Filesystem($directory);
> $this->_closeDirOnExit = true;
255a405,455
>
> $this->_segmentInfos = array();
>
> // Mark index as "under processing" to prevent other processes from premature index cleaning
> Zend_Search_Lucene_LockManager::obtainReadLock($this->_directory);
>
> // Escalate read lock to prevent current generation index files to be deleted while opening process is not done
> // Zend_Search_Lucene_LockManager::escalateReadLock($this->_directory);
>
>
> $this->_generation = self::getActualGeneration($this->_directory);
>
> if ($create) {
> try {
> Zend_Search_Lucene_LockManager::obtainWriteLock($this->_directory);
> } catch (Zend_Search_Lucene_Exception $e) {
> if (strpos($e->getMessage(), 'Can\'t obtain exclusive index lock') === false) {
> throw $e;
> } else {
> throw new Zend_Search_Lucene_Exception('Can\'t create index. It\'s under processing now');
> }
> }
>
> if ($this->_generation == -1) {
> // Directory doesn't contain existing index, start from 1
> $this->_generation = 1;
> $nameCounter = 0;
> } else {
> // Directory contains existing index
> $segmentsFile = $this->_directory->getFileObject(self::getSegmentFileName($this->_generation));
> $segmentsFile->seek(12); // 12 = 4 (int, file format marker) + 8 (long, index version)
>
> $nameCounter = $segmentsFile->readInt();
> $this->_generation++;
> }
>
> Zend_Search_Lucene_Index_Writer::createIndex($this->_directory, $this->_generation, $nameCounter);
>
> Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory);
> }
>
> if ($this->_generation == -1) {
> throw new Zend_Search_Lucene_Exception('Index doesn\'t exists in the specified directory.');
> } else if ($this->_generation == 0) {
> $this->_readPre21SegmentsFile();
> } else {
> $this->_readSegmentsFile();
> }
>
> // De-escalate read lock to prevent current generation index files to be deleted while opening process is not done
> // Zend_Search_Lucene_LockManager::escalateReadLock($this->_directory);
270,272c470,472
< // Free shared lock
< $this->_lock->unlock();
<
---
> // Release "under processing" flag
> Zend_Search_Lucene_LockManager::releaseReadLock($this->_directory);
>
434a635,658
> * Set result set limit.
> *
> * 0 (default) means no limit
> *
> * @param integer $limit
> */
> public static function setResultSetLimit($limit)
> {
> self::$_resultSetLimit = $limit;
> }
>
> /**
> * Set result set limit.
> *
> * 0 means no limit
> *
> * @return integer
> */
> public static function getResultSetLimit()
> {
> return self::$_resultSetLimit;
> }
>
> /**
587a812,815
>
> if (self::$_resultSetLimit != 0 && count($hits) >= self::$_resultSetLimit) {
> break;
> }
592c820
< return array();
---
> return array();
979a1208,1209
>
> $this->_hasChanges = true;
1005,1011c1235,1237
<
< $this->_hasChanges = false;
< }
<
< if ($this->_writer !== null) {
< $this->_writer->commit();
<
---
>
> $this->getIndexWriter()->commit();
>
1012a1239,1240
>
> $this->_hasChanges = false;
1062,1064c1290
< $segmentInfo->nextTerm();
< // check, if segment dictionary is finished
< if ($segmentInfo->currentTerm() !== null) {
---
> if ($segmentInfo->nextTerm() !== null) {
1073a1300,1418
> /**
> * Terms stream queue
> *
> * @var Zend_Search_Lucene_Index_SegmentInfoPriorityQueue
> */
> private $_termsStreamQueue = null;
>
> /**
> * Last Term in a terms stream
> *
> * @var Zend_Search_Lucene_Index_Term
> */
> private $_lastTerm = null;
>
> /**
> * Reset terms stream.
> */
> public function resetTermsStream()
> {
> $this->_termsStreamQueue = new Zend_Search_Lucene_Index_SegmentInfoPriorityQueue();
>
> foreach ($this->_segmentInfos as $segmentInfo) {
> $segmentInfo->reset();
>
> // Skip "empty" segments
> if ($segmentInfo->currentTerm() !== null) {
> $this->_termsStreamQueue->put($segmentInfo);
> }
> }
>
> $this->nextTerm();
> }
>
> /**
> * Skip terms stream up to specified term preffix.
> *
> * Prefix contains fully specified field info and portion of searched term
> *
> * @param Zend_Search_Lucene_Index_Term $prefix
> */
> public function skipTo(Zend_Search_Lucene_Index_Term $prefix)
> {
> $segments = array();
>
> while (($segmentInfo = $this->_termsStreamQueue->pop()) !== null) {
> $segments[] = $segmentInfo;
> }
>
> foreach ($segments as $segmentInfo) {
> $segmentInfo->skipTo($prefix);
>
> if ($segmentInfo->currentTerm() !== null) {
> $this->_termsStreamQueue->put($segmentInfo);
> }
> }
>
> $this->nextTerm();
> }
>
> /**
> * Scans terms dictionary and returns next term
> *
> * @return Zend_Search_Lucene_Index_Term|null
> */
> public function nextTerm()
> {
> while (($segmentInfo = $this->_termsStreamQueue->pop()) !== null) {
> if ($this->_termsStreamQueue->top() === null ||
> $this->_termsStreamQueue->top()->currentTerm()->key() !=
> $segmentInfo->currentTerm()->key()) {
> // We got new term
> $this->_lastTerm = $segmentInfo->currentTerm();
>
> if ($segmentInfo->nextTerm() !== null) {
> // Put segment back into the priority queue
> $this->_termsStreamQueue->put($segmentInfo);
> }
>
> return $this->_lastTerm;
> }
>
> if ($segmentInfo->nextTerm() !== null) {
> // Put segment back into the priority queue
> $this->_termsStreamQueue->put($segmentInfo);
> }
> }
>
> // End of stream
> $this->_lastTerm = null;
>
> return null;
> }
>
> /**
> * Returns term in current position
> *
> * @return Zend_Search_Lucene_Index_Term|null
> */
> public function currentTerm()
> {
> return $this->_lastTerm;
> }
>
> /**
> * Close terms stream
> *
> * Should be used for resources clean up if stream is not read up to the end
> */
> public function closeTermsStream()
> {
> while (($segmentInfo = $this->_termsStreamQueue->pop()) !== null) {
> $segmentInfo->closeTermsStream();
> }
>
> $this->_termsStreamQueue = null;
> $this->_lastTerm = null;
> }
>
>
cvs diff: Diffing Zend/Search/Lucene
Index: Zend/Search/Lucene/Document.php
===================================================================
RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Document.php,v
retrieving revision 1.2
diff -r1.2 Document.php
18c18
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
24c24
< require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Field.php';
---
> require_once "Zend/Search/Lucene/Field.php";
33c33
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
57,60c57,60
< public function __get($offset)
< {
< return $this->getFieldValue($offset);
< }
---
> public function __get($offset)
> {
> return $this->getFieldValue($offset);
> }
81c81
< return array_keys($this->_fields);
---
> return array_keys($this->_fields);
108c108
< return $this->getField($fieldName)->value;
---
> return $this->getField($fieldName)->value;
119c119
< return $this->getField($fieldName)->getUtf8Value();
---
> return $this->getField($fieldName)->getUtf8Value();
Index: Zend/Search/Lucene/Exception.php
===================================================================
RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Exception.php,v
retrieving revision 1.2
diff -r1.2 Exception.php
17c17
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
25c25
< require_once $CFG->dirroot.'/search/Zend/Search/Exception.php';
---
> require_once "Zend/Search/Exception.php";
31c31
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
Index: Zend/Search/Lucene/FSM.php
===================================================================
RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/FSM.php,v
retrieving revision 1.1
diff -r1.1 FSM.php
17c17
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
23c23
< require_once $CFG->dirroot.'/search/Zend/Search/Lucene/FSMAction.php';
---
> require_once 'Zend/Search/Lucene/FSMAction.php';
26c26
< require_once $CFG->dirroot.'/search/Zend/Search/Exception.php';
---
> require_once 'Zend/Search/Exception.php';
40c40
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
Index: Zend/Search/Lucene/FSMAction.php
===================================================================
RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/FSMAction.php,v
retrieving revision 1.1
diff -r1.1 FSMAction.php
17c17
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
28c28
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
Index: Zend/Search/Lucene/Field.php
===================================================================
RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Field.php,v
retrieving revision 1.3
diff -r1.3 Field.php
18c18
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
34c34
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
Index: Zend/Search/Lucene/Interface.php
===================================================================
RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Interface.php,v
retrieving revision 1.1
diff -r1.1 Interface.php
17c17
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
25c25
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
89a90,107
> * Set result set limit.
> *
> * 0 (default) means no limit
> *
> * @param integer $limit
> */
> public static function setResultSetLimit($limit);
>
> /**
> * Set result set limit.
> *
> * 0 means no limit
> *
> * @return integer
> */
> public static function getResultSetLimit();
>
> /**
308a327,363
>
> /**
> * Reset terms stream.
> */
> public function resetTermsStream();
>
> /**
> * Skip terms stream up to specified term preffix.
> *
> * Prefix contains fully specified field info and portion of searched term
> *
> * @param Zend_Search_Lucene_Index_Term $prefix
> */
> public function skipTo(Zend_Search_Lucene_Index_Term $prefix);
>
> /**
> * Scans terms dictionary and returns next term
> *
> * @return Zend_Search_Lucene_Index_Term|null
> */
> public function nextTerm();
>
> /**
> * Returns term in current position
> *
> * @return Zend_Search_Lucene_Index_Term|null
> */
> public function currentTerm();
>
> /**
> * Close terms stream
> *
> * Should be used for resources clean up if stream is not read up to the end
> */
> public function closeTermsStream();
>
>
Index: Zend/Search/Lucene/PriorityQueue.php
===================================================================
RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/PriorityQueue.php,v
retrieving revision 1.1
diff -r1.1 PriorityQueue.php
17c17
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
34c34
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
Index: Zend/Search/Lucene/Proxy.php
===================================================================
RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Proxy.php,v
retrieving revision 1.1
diff -r1.1 Proxy.php
17c17
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
22c22
< require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Interface.php';
---
> require_once 'Zend/Search/Lucene/Interface.php';
32c32
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
147a148,171
> * Set result set limit.
> *
> * 0 (default) means no limit
> *
> * @param integer $limit
> */
> public static function setResultSetLimit($limit)
> {
> Zend_Search_Lucene::setResultSetLimit($limit);
> }
>
> /**
> * Set result set limit.
> *
> * 0 means no limit
> *
> * @return integer
> */
> public static function getResultSetLimit()
> {
> return Zend_Search_Lucene::getResultSetLimit();
> }
>
> /**
438a463,514
>
> /**
> * Reset terms stream.
> */
> public function resetTermsStream()
> {
> $this->_index->resetTermsStream();
> }
>
> /**
> * Skip terms stream up to specified term preffix.
> *
> * Prefix contains fully specified field info and portion of searched term
> *
> * @param Zend_Search_Lucene_Index_Term $prefix
> */
> public function skipTo(Zend_Search_Lucene_Index_Term $prefix)
> {
> return $this->_index->skipTo($prefix);
> }
>
> /**
> * Scans terms dictionary and returns next term
> *
> * @return Zend_Search_Lucene_Index_Term|null
> */
> public function nextTerm()
> {
> return $this->_index->nextTerm();
> }
>
> /**
> * Returns term in current position
> *
> * @return Zend_Search_Lucene_Index_Term|null
> */
> public function currentTerm()
> {
> return $this->_index->currentTerm();
> }
>
> /**
> * Close terms stream
> *
> * Should be used for resources clean up if stream is not read up to the end
> */
> public function closeTermsStream()
> {
> $this->_index->closeTermsStream();
> }
>
>
cvs diff: Diffing Zend/Search/Lucene/Analysis
Index: Zend/Search/Lucene/Analysis/Analyzer.php
===================================================================
RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Analysis/Analyzer.php,v
retrieving revision 1.2.2.1
diff -r1.2.2.1 Analyzer.php
18c18
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
24c24
< require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/Token.php';
---
> require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Analysis/Token.php";
27c27,30
< require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8.php';
---
> require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8.php";
>
> /** Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8_CaseInsensitive */
> require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8/CaseInsensitive.php";
30c33,36
< require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8Num.php';
---
> require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8Num.php";
>
> /** Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive */
> require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8Num/CaseInsensitive.php";
33c39
< require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Text.php';
---
> require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Text.php";
36c42
< require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Text/CaseInsensitive.php';
---
> require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Text/CaseInsensitive.php";
39c45
< require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/Analyzer/Common/TextNum.php';
---
> require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Analysis/Analyzer/Common/TextNum.php";
42c48
< require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/Analyzer/Common/TextNum/CaseInsensitive.php';
---
> require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Analysis/Analyzer/Common/TextNum/CaseInsensitive.php";
45c51
< require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/TokenFilter/StopWords.php';
---
> require_once 'Zend/Search/Lucene/Analysis/TokenFilter/StopWords.php';
48c54
< require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/TokenFilter/ShortWords.php';
---
> require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Analysis/TokenFilter/ShortWords.php";
64c70
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
100c106
< public function tokenize($data, $encoding = 'UTF-8')
---
> public function tokenize($data, $encoding = '')
102a109
>
163c170
< self::$_defaultImpl = new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8();
---
> self::$_defaultImpl = new Zend_Search_Lucene_Analysis_Analyzer_Common_Text_CaseInsensitive();
Index: Zend/Search/Lucene/Analysis/Token.php
===================================================================
RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Analysis/Token.php,v
retrieving revision 1.2
diff -r1.2 Token.php
18c18
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
27c27
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
Index: Zend/Search/Lucene/Analysis/TokenFilter.php
===================================================================
RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Analysis/TokenFilter.php,v
retrieving revision 1.2
diff -r1.2 TokenFilter.php
18c18
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
24c24
< require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/Token.php';
---
> require_once 'Zend/Search/Lucene/Analysis/Token.php';
33c33
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
cvs diff: Diffing Zend/Search/Lucene/Analysis/Analyzer
Index: Zend/Search/Lucene/Analysis/Analyzer/Common.php
===================================================================
RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Analysis/Analyzer/Common.php,v
retrieving revision 1.2
diff -r1.2 Common.php
18c18
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
24c24
< require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/Analyzer.php';
---
> require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
37c37
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
71c71
< // resulting token can be null if the filter removed it
---
> // resulting token can be null if the filter removes it
cvs diff: Diffing Zend/Search/Lucene/Analysis/Analyzer/Common
Index: Zend/Search/Lucene/Analysis/Analyzer/Common/Text.php
===================================================================
RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Text.php,v
retrieving revision 1.2
diff -r1.2 Text.php
18c18
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
24c24
< require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/Analyzer/Common.php';
---
> require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common.php';
31c31
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
56c56,58
< $this->_input = iconv($this->_encoding, 'ASCII//TRANSLIT', $this->_input);
---
> //$this->_input = iconv($this->_encoding, 'ASCII//TRANSLIT', $this->_input);
> $this->_input = mb_convert_encoding($this->_input, 'ASCII', 'auto');
>
78c80
< return null;
---
> return null;
Index: Zend/Search/Lucene/Analysis/Analyzer/Common/TextNum.php
===================================================================
RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Analysis/Analyzer/Common/TextNum.php,v
retrieving revision 1.1
diff -r1.1 TextNum.php
18c18
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
24c24
< require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/Analyzer/Common.php';
---
> require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common.php';
31c31
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
77c77
< return null;
---
> return null;
Index: Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8.php
===================================================================
RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8.php,v
retrieving revision 1.1
diff -r1.1 Utf8.php
18c18
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
24c24
< require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/Analyzer/Common.php';
---
> require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common.php';
31c31
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
50c50
<
---
>
52c52
< * Stream length
---
> * Object constructor
54c54
< * @var integer
---
> * @throws Zend_Search_Lucene_Exception
56c56,63
< private $_streamLength;
---
> public function __construct()
> {
> if (@preg_match('/\pL/u', 'a') != 1) {
> // PCRE unicode support is turned off
> require_once 'Zend/Search/Lucene/Exception.php';
> throw new Zend_Search_Lucene_Exception('Utf8 analyzer needs PCRE unicode support to be enabled.');
> }
> }
69c76
< $this->_input = iconv($this->_encoding, 'UTF-8', $this->_input);
---
> $this->_input = @iconv($this->_encoding, 'UTF-8', $this->_input);
72,118d78
<
< // Get UTF-8 string length.
< // It also checks if it's a correct utf-8 string
< $this->_streamLength = iconv_strlen($this->_input, 'UTF-8');
< }
<
< /**
< * Check, that character is a letter
< *
< * @param string $char
< * @return boolean
< */
< private static function _isAlpha($char)
< {
< if (strlen($char) > 1) {
< // It's an UTF-8 character
< return true;
< }
<
< return ctype_alpha($char);
< }
<
< /**
< * Get next UTF-8 char
< *
< * @param string $char
< * @return boolean
< */
< private function _nextChar()
< {
< $char = $this->_input[$this->_bytePosition++];
<
< if (( ord($char) & 0xC0 ) == 0xC0) {
< $addBytes = 1;
< if (ord($char) & 0x20 ) {
< $addBytes++;
< if (ord($char) & 0x10 ) {
< $addBytes++;
< }
< }
< $char .= substr($this->_input, $this->_bytePosition, $addBytes);
< $this->_bytePosition += $addBytes;
< }
<
< $this->_position++;
<
< return $char;
134,138c94,98
< while ($this->_position < $this->_streamLength) {
< // skip white space
< while ($this->_position < $this->_streamLength &&
< !self::_isAlpha($char = $this->_nextChar())) {
< $char = '';
---
> do {
> if (! preg_match('/[\p{L}]+/u', $this->_input, $match, PREG_OFFSET_CAPTURE, $this->_bytePosition)) {
> // It covers both cases a) there are no matches (preg_match(...) === 0)
> // b) error occured (preg_match(...) === FALSE)
> return null;
141,148c101,114
< $termStartPosition = $this->_position - 1;
< $termText = $char;
<
< // read token
< while ($this->_position < $this->_streamLength &&
< self::_isAlpha($char = $this->_nextChar())) {
< $termText .= $char;
< }
---
> // matched string
> $matchedWord = $match[0][0];
>
> // binary position of the matched word in the input stream
> $binStartPos = $match[0][1];
>
> // character position of the matched word in the input stream
> $startPos = $this->_position +
> iconv_strlen(substr($this->_input,
> $this->_bytePosition,
> $binStartPos - $this->_bytePosition),
> 'UTF-8');
> // character postion of the end of matched word in the input stream
> $endPos = $startPos + iconv_strlen($matchedWord, 'UTF-8');
150,153c116,117
< // Empty token, end of stream.
< if ($termText == '') {
< return null;
< }
---
> $this->_bytePosition = $binStartPos + strlen($matchedWord);
> $this->_position = $endPos;
155,164c119,120
< $token = new Zend_Search_Lucene_Analysis_Token(
< $termText,
< $termStartPosition,
< $this->_position - 1);
< $token = $this->normalize($token);
< if ($token !== null) {
< return $token;
< }
< // Continue if token is skipped
< }
---
> $token = $this->normalize(new Zend_Search_Lucene_Analysis_Token($matchedWord, $startPos, $endPos));
> } while ($token === null); // try again if token is skipped
166c122
< return null;
---
> return $token;
Index: Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8Num.php
===================================================================
RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8Num.php,v
retrieving revision 1.1
diff -r1.1 Utf8Num.php
18c18
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
24c24
< require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/Analyzer/Common.php';
---
> require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common.php';
31c31
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
52c52
< * Stream length
---
> * Object constructor
54c54
< * @var integer
---
> * @throws Zend_Search_Lucene_Exception
56c56,63
< private $_streamLength;
---
> public function __construct()
> {
> if (@preg_match('/\pL/u', 'a') != 1) {
> // PCRE unicode support is turned off
> require_once 'Zend/Search/Lucene/Exception.php';
> throw new Zend_Search_Lucene_Exception('Utf8Num analyzer needs PCRE unicode support to be enabled.');
> }
> }
72,118d78
<
< // Get UTF-8 string length.
< // It also checks if it's a correct utf-8 string
< $this->_streamLength = iconv_strlen($this->_input, 'UTF-8');
< }
<
< /**
< * Check, that character is a letter
< *
< * @param string $char
< * @return boolean
< */
< private static function _isAlNum($char)
< {
< if (strlen($char) > 1) {
< // It's an UTF-8 character
< return true;
< }
<
< return ctype_alnum($char);
< }
<
< /**
< * Get next UTF-8 char
< *
< * @param string $char
< * @return boolean
< */
< private function _nextChar()
< {
< $char = $this->_input[$this->_bytePosition++];
<
< if (( ord($char) & 0xC0 ) == 0xC0) {
< $addBytes = 1;
< if (ord($char) & 0x20 ) {
< $addBytes++;
< if (ord($char) & 0x10 ) {
< $addBytes++;
< }
< }
< $char .= substr($this->_input, $this->_bytePosition, $addBytes);
< $this->_bytePosition += $addBytes;
< }
<
< $this->_position++;
<
< return $char;
134,138c94,98
< while ($this->_position < $this->_streamLength) {
< // skip white space
< while ($this->_position < $this->_streamLength &&
< !self::_isAlNum($char = $this->_nextChar())) {
< $char = '';
---
> do {
> if (! preg_match('/[\p{L}\p{N}]+/u', $this->_input, $match, PREG_OFFSET_CAPTURE, $this->_bytePosition)) {
> // It covers both cases a) there are no matches (preg_match(...) === 0)
> // b) error occured (preg_match(...) === FALSE)
> return null;
141,142c101,114
< $termStartPosition = $this->_position - 1;
< $termText = $char;
---
> // matched string
> $matchedWord = $match[0][0];
>
> // binary position of the matched word in the input stream
> $binStartPos = $match[0][1];
>
> // character position of the matched word in the input stream
> $startPos = $this->_position +
> iconv_strlen(substr($this->_input,
> $this->_bytePosition,
> $binStartPos - $this->_bytePosition),
> 'UTF-8');
> // character postion of the end of matched word in the input stream
> $endPos = $startPos + iconv_strlen($matchedWord, 'UTF-8');
144,148c116,117
< // read token
< while ($this->_position < $this->_streamLength &&
< self::_isAlNum($char = $this->_nextChar())) {
< $termText .= $char;
< }
---
> $this->_bytePosition = $binStartPos + strlen($matchedWord);
> $this->_position = $endPos;
150,164c119,120
< // Empty token, end of stream.
< if ($termText == '') {
< return null;
< }
<
< $token = new Zend_Search_Lucene_Analysis_Token(
< $termText,
< $termStartPosition,
< $this->_position - 1);
< $token = $this->normalize($token);
< if ($token !== null) {
< return $token;
< }
< // Continue if token is skipped
< }
---
> $token = $this->normalize(new Zend_Search_Lucene_Analysis_Token($matchedWord, $startPos, $endPos));
> } while ($token === null); // try again if token is skipped
166c122
< return null;
---
> return $token;
cvs diff: Diffing Zend/Search/Lucene/Analysis/Analyzer/Common/Text
Index: Zend/Search/Lucene/Analysis/Analyzer/Common/Text/CaseInsensitive.php
===================================================================
RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Text/CaseInsensitive.php,v
retrieving revision 1.2
diff -r1.2 CaseInsensitive.php
18c18
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
24c24
< require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Text.php';
---
> require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common/Text.php';
27c27
< require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/TokenFilter/LowerCase.php';
---
> require_once 'Zend/Search/Lucene/Analysis/TokenFilter/LowerCase.php';
34c34
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
cvs diff: Diffing Zend/Search/Lucene/Analysis/Analyzer/Common/TextNum
Index: Zend/Search/Lucene/Analysis/Analyzer/Common/TextNum/CaseInsensitive.php
===================================================================
RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Analysis/Analyzer/Common/TextNum/CaseInsensitive.php,v
retrieving revision 1.1
diff -r1.1 CaseInsensitive.php
18c18
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
24c24
< require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/Analyzer/Common/TextNum.php';
---
> require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common/TextNum.php';
27c27
< require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/TokenFilter/LowerCase.php';
---
> require_once 'Zend/Search/Lucene/Analysis/TokenFilter/LowerCase.php';
34c34
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
cvs diff: Diffing Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8
cvs diff: Diffing Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8Num
cvs diff: Diffing Zend/Search/Lucene/Analysis/TokenFilter
Index: Zend/Search/Lucene/Analysis/TokenFilter/LowerCase.php
===================================================================
RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Analysis/TokenFilter/LowerCase.php,v
retrieving revision 1.2
diff -r1.2 LowerCase.php
18c18
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
24c24
< require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/TokenFilter.php';
---
> require_once 'Zend/Search/Lucene/Analysis/TokenFilter.php';
33c33
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
Index: Zend/Search/Lucene/Analysis/TokenFilter/LowerCaseUtf8.php
===================================================================
RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Analysis/TokenFilter/LowerCaseUtf8.php,v
retrieving revision 1.1.4.2
diff -r1.1.4.2 LowerCaseUtf8.php
24c24
< require_once 'Zend/Search/Lucene/Analysis/TokenFilter.php';
---
> require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Analysis/TokenFilter.php";
43a44
> global $CFG;
46c47
< require_once 'Zend/Search/Lucene/Exception.php';
---
> require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Exception.php";
Index: Zend/Search/Lucene/Analysis/TokenFilter/ShortWords.php
===================================================================
RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Analysis/TokenFilter/ShortWords.php,v
retrieving revision 1.1
diff -r1.1 ShortWords.php
18c18
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
24c24
< require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/TokenFilter.php';
---
> require_once 'Zend/Search/Lucene/Analysis/TokenFilter.php';
33c33
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
Index: Zend/Search/Lucene/Analysis/TokenFilter/StopWords.php
===================================================================
RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Analysis/TokenFilter/StopWords.php,v
retrieving revision 1.1
diff -r1.1 StopWords.php
18c18
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
24,25c24,25
< require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/TokenFilter.php';
< require_once $CFG->dirroot.'/search/Zend/Search/Exception.php';
---
> require_once 'Zend/Search/Lucene/Analysis/TokenFilter.php';
> require_once 'Zend/Search/Exception.php';
37c37
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
44c44
< * Minimum allowed term length
---
> * Stop Words
66d65
< $t = $srcToken->getTermText();
cvs diff: Diffing Zend/Search/Lucene/Document
Index: Zend/Search/Lucene/Document/Html.php
===================================================================
RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Document/Html.php,v
retrieving revision 1.1
diff -r1.1 Html.php
18c18
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
24c24
< require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Document.php';
---
> require_once 'Zend/Search/Lucene/Document.php';
33c33
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
293c293
< $matchedNodes = $xpath->query("/html/body/*");
---
> $matchedNodes = $xpath->query("/html/body");
cvs diff: Diffing Zend/Search/Lucene/Index
Index: Zend/Search/Lucene/Index/DictionaryLoader.php
===================================================================
RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Index/DictionaryLoader.php,v
retrieving revision 1.1
diff -r1.1 DictionaryLoader.php
18c18
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
24c24
< require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Exception.php';
---
> require_once 'Zend/Search/Lucene/Exception.php';
38c38
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
64c64,65
< if ($tiVersion != (int)0xFFFFFFFE) {
---
> if ($tiVersion != (int)0xFFFFFFFE /* pre-2.1 format */ &&
> $tiVersion != (int)0xFFFFFFFD /* 2.1+ format */) {
68c69
< // $indexTermCount = = $tiiFile->readLong();
---
> // $indexTermCount = $tiiFile->readLong();
103a105,109
> if ($tiVersion == (int)0xFFFFFFFD /* 2.1+ format */) {
> /* Skip MaxSkipLevels value */
> $pos += 4;
> }
>
Index: Zend/Search/Lucene/Index/FieldInfo.php
===================================================================
RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Index/FieldInfo.php,v
retrieving revision 1.2
diff -r1.2 FieldInfo.php
18c18
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
27c27
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
Index: Zend/Search/Lucene/Index/SegmentInfo.php
===================================================================
RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Index/SegmentInfo.php,v
retrieving revision 1.3
diff -r1.3 SegmentInfo.php
18c18
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
23c23
< require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/DictionaryLoader.php';
---
> require_once 'Zend/Search/Lucene/Index/DictionaryLoader.php';
27c27,30
< require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Exception.php';
---
> require_once 'Zend/Search/Lucene/Exception.php';
>
> /** Zend_Search_Lucene_LockManager */
> require_once 'Zend/Search/Lucene/LockManager.php';
34c37
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
112a116,144
> /**
> * Delete file generation number
> *
> * -1 means 'there is no delete file'
> * 0 means pre-2.1 format delete file
> * X specifies used delete file
> *
> * @var integer
> */
> private $_delGen;
>
> /**
> * Segment has single norms file
> *
> * If true then one .nrm file is used for all fields
> * Otherwise .fN files are used
> *
> * @var boolean
> */
> private $_hasSingleNormFile;
>
> /**
> * Use compound segment file (*.cfs) to collect all other segment files
> * (excluding .del files)
> *
> * @var boolean
> */
> private $_isCompound;
>
139c171
< private $_deleted;
---
> private $_deleted = null;
150,151c182
< * Zend_Search_Lucene_Index_SegmentInfo constructor needs Segmentname,
< * Documents count and Directory as a parameter.
---
> * Zend_Search_Lucene_Index_SegmentInfo constructor
152a184
> * @param Zend_Search_Lucene_Storage_Directory $directory
155c187,188
< * @param Zend_Search_Lucene_Storage_Directory $directory
---
> * @param integer $delGen
> * @param boolean $isCompound
157c190
< public function __construct($name, $docCount, $directory)
---
> public function __construct(Zend_Search_Lucene_Storage_Directory $directory, $name, $docCount, $delGen = 0, $hasSingleNormFile = false, $isCompound = null)
159,160d191
< $this->_name = $name;
< $this->_docCount = $docCount;
162c193,217
< $this->_termDictionary = null;
---
> $this->_name = $name;
> $this->_docCount = $docCount;
> $this->_hasSingleNormFile = $hasSingleNormFile;
> $this->_delGen = $delGen;
> $this->_termDictionary = null;
>
> if (!is_null($isCompound)) {
> $this->_isCompound = $isCompound;
> } else {
> // It's a pre-2.1 segment
> // detect if it uses compond file
> $this->_isCompound = true;
>
> try {
> // Try to open compound file
> $this->_directory->getFileObject($name . '.cfs');
> } catch (Zend_Search_Lucene_Exception $e) {
> if (strpos($e->getMessage(), 'is not readable') !== false) {
> // Compound file is not found or is not readable
> $this->_isCompound = false;
> } else {
> throw $e;
> }
> }
> }
165c220
< if ($this->_directory->fileExists($name . '.cfs')) {
---
> if ($this->_isCompound) {
205,210c260,279
< try {
< $delFile = $this->openCompoundFile('.del');
<
< $byteCount = $delFile->readInt();
< $byteCount = ceil($byteCount/8);
< $bitCount = $delFile->readInt();
---
> if ($this->_delGen == -1) {
> // There is no delete file for this segment
> // Do nothing
> } else if ($this->_delGen == 0) {
> // It's a segment with pre-2.1 format delete file
> // Try to find delete file
> try {
> // '.del' files always stored in a separate file
> // Segment compound is not used
> $delFile = $this->_directory->getFileObject($this->_name . '.del');
>
> $byteCount = $delFile->readInt();
> $byteCount = ceil($byteCount/8);
> $bitCount = $delFile->readInt();
>
> if ($bitCount == 0) {
> $delBytes = '';
> } else {
> $delBytes = $delFile->readBytes($byteCount);
> }
212,215c281,299
< if ($bitCount == 0) {
< $delBytes = '';
< } else {
< $delBytes = $delFile->readBytes($byteCount);
---
> if (extension_loaded('bitset')) {
> $this->_deleted = $delBytes;
> } else {
> $this->_deleted = array();
> for ($count = 0; $count < $byteCount; $count++) {
> $byte = ord($delBytes{$count});
> for ($bit = 0; $bit < 8; $bit++) {
> if ($byte & (1<<$bit)) {
> $this->_deleted[$count*8 + $bit] = 1;
> }
> }
> }
> }
> } catch(Zend_Search_Exception $e) {
> if (strpos($e->getMessage(), 'is not readable') === false ) {
> throw $e;
> }
> // There is no delete file
> // Do nothing
216a301,305
> } else {
> // It's 2.1+ format delete file
> $delFile = $this->_directory->getFileObject($this->_name . '_' . base_convert($this->_delGen, 10, 36) . '.del');
>
> $format = $delFile->readInt();
218,219c307,312
< if (extension_loaded('bitset')) {
< $this->_deleted = $delBytes;
---
> if ($format == (int)0xFFFFFFFF) {
> /**
> * @todo Implement support of DGaps delete file format.
> * See Lucene file format for details - http://lucene.apache.org/java/docs/fileformats.html#Deleted%20Documents
> */
> throw new Zend_Search_Lucene_Exception('DGaps delete file format is not supported. Optimize index to use it with Zend_Search_Lucene');
221,226c314,333
< $this->_deleted = array();
< for ($count = 0; $count < $byteCount; $count++) {
< $byte = ord($delBytes{$count});
< for ($bit = 0; $bit < 8; $bit++) {
< if ($byte & (1<<$bit)) {
< $this->_deleted[$count*8 + $bit] = 1;
---
> // $format is actually byte count
> $byteCount = ceil($format/8);
> $bitCount = $delFile->readInt();
>
> if ($bitCount == 0) {
> $delBytes = '';
> } else {
> $delBytes = $delFile->readBytes($byteCount);
> }
>
> if (extension_loaded('bitset')) {
> $this->_deleted = $delBytes;
> } else {
> $this->_deleted = array();
> for ($count = 0; $count < $byteCount; $count++) {
> $byte = ord($delBytes{$count});
> for ($bit = 0; $bit < 8; $bit++) {
> if ($byte & (1<<$bit)) {
> $this->_deleted[$count*8 + $bit] = 1;
> }
231,236d337
< } catch(Zend_Search_Exception $e) {
< if (strpos($e->getMessage(), 'compound file doesn\'t contain') !== false ) {
< $this->_deleted = null;
< } else {
< throw $e;
< }
252,253c353
< // Try to open common file first
< if ($this->_directory->fileExists($filename)) {
---
> if (!$this->_isCompound) {
258c358
< throw new Zend_Search_Lucene_Exception('Index compound file doesn\'t contain '
---
> throw new Zend_Search_Lucene_Exception('Segment compound file doesn\'t contain '
346a447,456
> * Returns actual deletions file generation number.
> *
> * @return integer
> */
> public function getDelGen()
> {
> return $this->_delGen;
> }
>
> /**
434a545,579
> * Load terms dictionary index
> *
> * @throws Zend_Search_Lucene_Exception
> */
> private function _loadDictionaryIndex()
> {
> // Check, if index is already serialized
> if ($this->_directory->fileExists($this->_name . '.sti')) {
> // Load serialized dictionary index data
> $stiFile = $this->_directory->getFileObject($this->_name . '.sti');
> $stiFileData = $stiFile->readBytes($this->_directory->fileLength($this->_name . '.sti'));
>
> // Load dictionary index data
> if (($unserializedData = @unserialize($stiFileData)) !== false) {
> list($this->_termDictionary, $this->_termDictionaryInfos) = $unserializedData;
> return;
> }
> }
>
> // Load data from .tii file and generate .sti file
>
> // Prefetch dictionary index data
> $tiiFile = $this->openCompoundFile('.tii');
> $tiiFileData = $tiiFile->readBytes($this->compoundFileLength('.tii'));
>
> // Load dictionary index data
> list($this->_termDictionary, $this->_termDictionaryInfos) =
> Zend_Search_Lucene_Index_DictionaryLoader::load($tiiFileData);
>
> $stiFileData = serialize(array($this->_termDictionary, $this->_termDictionaryInfos));
> $stiFile = $this->_directory->createFile($this->_name . '.sti');
> $stiFile->writeBytes($stiFileData);
> }
>
> /**
455,476c600
< // Check, if index is already serialized
< if ($this->_directory->fileExists($this->_name . '.sti')) {
< // Prefetch dictionary index data
< $stiFile = $this->_directory->getFileObject($this->_name . '.sti');
< $stiFileData = $stiFile->readBytes($this->_directory->fileLength($this->_name . '.sti'));
<
< // Load dictionary index data
< list($this->_termDictionary, $this->_termDictionaryInfos) = unserialize($stiFileData);
< } else {
< // Prefetch dictionary index data
< $tiiFile = $this->openCompoundFile('.tii');
< $tiiFileData = $tiiFile->readBytes($this->compoundFileLength('.tii'));
<
< // Load dictionary index data
< list($this->_termDictionary, $this->_termDictionaryInfos) =
< Zend_Search_Lucene_Index_DictionaryLoader::load($tiiFileData);
<
< $stiFileData = serialize(array($this->_termDictionary, $this->_termDictionaryInfos));
< $stiFile = $this->_directory->createFile($this->_name . '.sti');
< $stiFile->writeBytes($stiFileData);
< }
<
---
> $this->_loadDictionaryIndex();
479,480d602
<
<
529c651,652
< if ($tiVersion != (int)0xFFFFFFFE) {
---
> if ($tiVersion != (int)0xFFFFFFFE /* pre-2.1 format */ &&
> $tiVersion != (int)0xFFFFFFFD /* 2.1+ format */) {
535a659,661
> if ($tiVersion == (int)0xFFFFFFFD /* 2.1+ format */) {
> $maxSkipLevels = $tisFile->readInt();
> }
537c663
< $tisFile->seek($prevTermInfo[4] /* indexPointer */ - 20 /* header size*/, SEEK_CUR);
---
> $tisFile->seek($prevTermInfo[4] /* indexPointer */ - (($tiVersion == (int)0xFFFFFFFD)? 24 : 20) /* header size*/, SEEK_CUR);
668a795
> * @throws Zend_Search_Lucene_Exception
672,673c799,817
< $fFile = $this->openCompoundFile('.f' . $fieldNum);
< $this->_norms[$fieldNum] = $fFile->readBytes($this->_docCount);
---
> if ($this->_hasSingleNormFile) {
> $normfFile = $this->openCompoundFile('.nrm');
>
> $header = $normfFile->readBytes(3);
> $headerFormatVersion = $normfFile->readByte();
>
> if ($header != 'NRM' || $headerFormatVersion != (int)0xFF) {
> throw new Zend_Search_Lucene_Exception('Wrong norms file format.');
> }
>
> foreach ($this->_fields as $fieldNum => $fieldInfo) {
> if ($fieldInfo->isIndexed) {
> $this->_norms[$fieldNum] = $normfFile->readBytes($this->_docCount);
> }
> }
> } else {
> $fFile = $this->openCompoundFile('.f' . $fieldNum);
> $this->_norms[$fieldNum] = $fFile->readBytes($this->_docCount);
> }
734a879,898
> * Returns true if segment has single norms file.
> *
> * @return boolean
> */
> public function hasSingleNormFile()
> {
> return $this->_hasSingleNormFile ? 1 : 0;
> }
>
> /**
> * Returns true if segment is stored using compound segment file.
> *
> * @return boolean
> */
> public function isCompound()
> {
> return $this->_isCompound ? 1 : 0;
> }
>
> /**
806c970,998
< $delFile = $this->_directory->createFile($this->_name . '.del');
---
> // Get new generation number
> Zend_Search_Lucene_LockManager::obtainWriteLock($this->_directory);
>
> $delFileList = array();
> foreach ($this->_directory->fileList() as $file) {
> if ($file == $this->_name . '.del') {
> // Matches .del file name
> $delFileList[] = 0;
> } else if (preg_match('/^' . $this->_name . '_([a-zA-Z0-9]+)\.del$/i', $file, $matches)) {
> // Matches _NNN.del file names
> $delFileList[] = (int)base_convert($matches[1], 36, 10);
> }
> }
>
> if (count($delFileList) == 0) {
> // There is no deletions file for current segment in the directory
> // Set detetions file generation number to 1
> $this->_delGen = 1;
> } else {
> // There are some deletions files for current segment in the directory
> // Set detetions file generation number to the highest + 1
> $this->_delGen = max($delFileList) + 1;
> }
>
> $delFile = $this->_directory->createFile($this->_name . '_' . base_convert($this->_delGen, 10, 36) . '.del');
>
> Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory);
>
>
823a1016,1022
> * Actual offset of the .tis file data
> *
> * @var integer
> */
> private $_tisFileOffset;
>
> /**
831c1030
< * Offset of the .frq file in the compound file
---
> * Actual offset of the .frq file data
845c1044
< * Offset of the .prx file in the compound file
---
> * Actual offset of the .prx file in the compound file
853c1052
< * Number of terms in term stream
---
> * Actual number of terms in term stream
859a1059,1072
> * Overall number of terms in term stream
> *
> * @var integer
> */
> private $_termNum = 0;
>
> /**
> * Segment index interval
> *
> * @var integer
> */
> private $_indexInterval;
>
> /**
894c1107,1109
< * @var array
---
> * Is set to null if term positions loading has to be skipped
> *
> * @var array|null
897a1113,1132
>
> /**
> * Terms scan mode
> *
> * Values:
> *
> * self::SM_TERMS_ONLY - terms are scanned, no additional info is retrieved
> * self::SM_MERGE_INFO - terms are scanned, frequency and position info is retrieved
> * document numbers are compacted (shifted if segment has deleted documents)
> *
> * @var integer
> */
> private $_termsScanMode;
>
> /** Scan modes */
> const SM_TERMS_ONLY = 0; // terms are scanned, no additional info is retrieved
> const SM_FULL_INFO = 1; // terms are scanned, frequency and position info is retrieved
> const SM_MERGE_INFO = 2; // terms are scanned, frequency and position info is retrieved
> // document numbers are compacted (shifted if segment contains deleted documents)
>
907c1142
< * @param boolean $compact
---
> * @param integer $mode
911c1146
< public function reset($startId = 0, $compact = false)
---
> public function reset($startId = 0, $mode = self::SM_TERMS_ONLY)
917a1153,1154
> $this->_tisFileOffset = $this->_tisFile->tell();
>
919c1156,1157
< if ($tiVersion != (int)0xFFFFFFFE) {
---
> if ($tiVersion != (int)0xFFFFFFFE /* pre-2.1 format */ &&
> $tiVersion != (int)0xFFFFFFFD /* 2.1+ format */) {
923,925c1161,1167
< $this->_termCount = $this->_tisFile->readLong();
< $this->_tisFile->readInt(); // Read Index interval
< $this->_skipInterval = $this->_tisFile->readInt(); // Read skip interval
---
> $this->_termCount =
> $this->_termNum = $this->_tisFile->readLong(); // Read terms count
> $this->_indexInterval = $this->_tisFile->readInt(); // Read Index interval
> $this->_skipInterval = $this->_tisFile->readInt(); // Read skip interval
> if ($tiVersion == (int)0xFFFFFFFD /* 2.1+ format */) {
> $maxSkipLevels = $this->_tisFile->readInt();
> }
930,932d1171
< $this->_frqFile = $this->openCompoundFile('.frq', false);
< $this->_frqFileOffset = $this->_frqFile->tell();
<
936,937c1175
< $this->_prxFile = $this->openCompoundFile('.prx', false);
< $this->_prxFileOffset = $this->_prxFile->tell();
---
> $this->_docMap = array();
939,940c1177,1186
< $this->_lastTerm = new Zend_Search_Lucene_Index_Term('', -1);
< $this->_lastTermInfo = new Zend_Search_Lucene_Index_TermInfo(0, 0, 0, 0);
---
> $this->_lastTerm = new Zend_Search_Lucene_Index_Term('', -1);
> $this->_lastTermInfo = new Zend_Search_Lucene_Index_TermInfo(0, 0, 0, 0);
> $this->_lastTermPositions = null;
>
> $this->_termsScanMode = $mode;
>
> switch ($mode) {
> case self::SM_TERMS_ONLY:
> // Do nothing
> break;
942,946c1188,1206
< $this->_docMap = array();
< for ($count = 0; $count < $this->_docCount; $count++) {
< if (!$this->isDeleted($count)) {
< $this->_docMap[$count] = $startId + ($compact ? count($this->_docMap) : $count);
< }
---
> case self::SM_FULL_INFO:
> // break intentionally omitted
> case self::SM_MERGE_INFO:
> $this->_frqFile = $this->openCompoundFile('.frq', false);
> $this->_frqFileOffset = $this->_frqFile->tell();
>
> $this->_prxFile = $this->openCompoundFile('.prx', false);
> $this->_prxFileOffset = $this->_prxFile->tell();
>
> for ($count = 0; $count < $this->_docCount; $count++) {
> if (!$this->isDeleted($count)) {
> $this->_docMap[$count] = $startId + (($mode == self::SM_MERGE_INFO) ? count($this->_docMap) : $count);
> }
> }
> break;
>
> default:
> throw new Zend_Search_Lucene_Exception('Wrong terms scaning mode specified.');
> break;
948a1209
>
950c1211,1358
< return $startId + ($compact ? count($this->_docMap) : $this->_docCount);
---
> return $startId + (($mode == self::SM_MERGE_INFO) ? count($this->_docMap) : $this->_docCount);
> }
>
>
> /**
> * Skip terms stream up to specified term preffix.
> *
> * Prefix contains fully specified field info and portion of searched term
> *
> * @param Zend_Search_Lucene_Index_Term $prefix
> * @throws Zend_Search_Lucene_Exception
> */
> public function skipTo(Zend_Search_Lucene_Index_Term $prefix)
> {
> if ($this->_termDictionary === null) {
> $this->_loadDictionaryIndex();
> }
>
> $searchField = $this->getFieldNum($prefix->field);
>
> if ($searchField == -1) {
> /**
> * Field is not presented in this segment
> * Go to the end of dictionary
> */
> $this->_tisFile = null;
> $this->_frqFile = null;
> $this->_prxFile = null;
>
> $this->_lastTerm = null;
> $this->_lastTermInfo = null;
> $this->_lastTermPositions = null;
>
> return;
> }
> $searchDicField = $this->_getFieldPosition($searchField);
>
> // search for appropriate value in dictionary
> $lowIndex = 0;
> $highIndex = count($this->_termDictionary)-1;
> while ($highIndex >= $lowIndex) {
> // $mid = ($highIndex - $lowIndex)/2;
> $mid = ($highIndex + $lowIndex) >> 1;
> $midTerm = $this->_termDictionary[$mid];
>
> $fieldNum = $this->_getFieldPosition($midTerm[0] /* field */);
> $delta = $searchDicField - $fieldNum;
> if ($delta == 0) {
> $delta = strcmp($prefix->text, $midTerm[1] /* text */);
> }
>
> if ($delta < 0) {
> $highIndex = $mid-1;
> } elseif ($delta > 0) {
> $lowIndex = $mid+1;
> } else {
> // We have reached term we are looking for
> break;
> }
> }
>
> if ($highIndex == -1) {
> // Term is out of the dictionary range
> $this->_tisFile = null;
> $this->_frqFile = null;
> $this->_prxFile = null;
>
> $this->_lastTerm = null;
> $this->_lastTermInfo = null;
> $this->_lastTermPositions = null;
>
> return;
> }
>
> $prevPosition = $highIndex;
> $prevTerm = $this->_termDictionary[$prevPosition];
> $prevTermInfo = $this->_termDictionaryInfos[$prevPosition];
>
> if ($this->_tisFile === null) {
> // The end of terms stream is reached and terms dictionary file is closed
> // Perform mini-reset operation
> $this->_tisFile = $this->openCompoundFile('.tis', false);
>
> if ($this->_termsScanMode == self::SM_FULL_INFO || $this->_termsScanMode == self::SM_MERGE_INFO) {
> $this->_frqFile = $this->openCompoundFile('.frq', false);
> $this->_prxFile = $this->openCompoundFile('.prx', false);
> }
> }
> $this->_tisFile->seek($this->_tisFileOffset + $prevTermInfo[4], SEEK_SET);
>
> $this->_lastTerm = new Zend_Search_Lucene_Index_Term($prevTerm[1] /* text */,
> ($prevTerm[0] == -1) ? '' : $this->_fields[$prevTerm[0] /* field */]->name);
> $this->_lastTermInfo = new Zend_Search_Lucene_Index_TermInfo($prevTermInfo[0] /* docFreq */,
> $prevTermInfo[1] /* freqPointer */,
> $prevTermInfo[2] /* proxPointer */,
> $prevTermInfo[3] /* skipOffset */);
> $this->_termCount = $this->_termNum - $prevPosition*$this->_indexInterval;
>
> if ($highIndex == 0) {
> // skip start entry
> $this->nextTerm();
> } else if ($prefix->field == $this->_lastTerm->field && $prefix->text == $this->_lastTerm->text) {
> // We got exact match in the dictionary index
>
> if ($this->_termsScanMode == self::SM_FULL_INFO || $this->_termsScanMode == self::SM_MERGE_INFO) {
> $this->_lastTermPositions = array();
>
> $this->_frqFile->seek($this->_lastTermInfo->freqPointer + $this->_frqFileOffset, SEEK_SET);
> $freqs = array(); $docId = 0;
> for( $count = 0; $count < $this->_lastTermInfo->docFreq; $count++ ) {
> $docDelta = $this->_frqFile->readVInt();
> if( $docDelta % 2 == 1 ) {
> $docId += ($docDelta-1)/2;
> $freqs[ $docId ] = 1;
> } else {
> $docId += $docDelta/2;
> $freqs[ $docId ] = $this->_frqFile->readVInt();
> }
> }
>
> $this->_prxFile->seek($this->_lastTermInfo->proxPointer + $this->_prxFileOffset, SEEK_SET);
> foreach ($freqs as $docId => $freq) {
> $termPosition = 0; $positions = array();
>
> for ($count = 0; $count < $freq; $count++ ) {
> $termPosition += $this->_prxFile->readVInt();
> $positions[] = $termPosition;
> }
>
> if (isset($this->_docMap[$docId])) {
> $this->_lastTermPositions[$this->_docMap[$docId]] = $positions;
> }
> }
> }
>
> return;
> }
>
> // Search term matching specified prefix
> while ($this->_lastTerm !== null) {
> if ( strcmp($this->_lastTerm->field, $prefix->field) > 0 ||
> ($prefix->field == $this->_lastTerm->field && strcmp($this->_lastTerm->text, $prefix->text) >= 0) ) {
> // Current term matches or greate than the pattern
> return;
> }
>
> $this->nextTerm();
> }
962,963c1370,1373
< $this->_lastTerm = null;
< $this->_lastTermInfo = null;
---
> $this->_lastTerm = null;
> $this->_lastTermInfo = null;
> $this->_lastTermPositions = null;
> $this->_docMap = null;
992c1402,1403
< $this->_lastTermPositions = array();
---
> if ($this->_termsScanMode == self::SM_FULL_INFO || $this->_termsScanMode == self::SM_MERGE_INFO) {
> $this->_lastTermPositions = array();
994,1003c1405,1415
< $this->_frqFile->seek($this->_lastTermInfo->freqPointer + $this->_frqFileOffset, SEEK_SET);
< $freqs = array(); $docId = 0;
< for( $count = 0; $count < $this->_lastTermInfo->docFreq; $count++ ) {
< $docDelta = $this->_frqFile->readVInt();
< if( $docDelta % 2 == 1 ) {
< $docId += ($docDelta-1)/2;
< $freqs[ $docId ] = 1;
< } else {
< $docId += $docDelta/2;
< $freqs[ $docId ] = $this->_frqFile->readVInt();
---
> $this->_frqFile->seek($this->_lastTermInfo->freqPointer + $this->_frqFileOffset, SEEK_SET);
> $freqs = array(); $docId = 0;
> for( $count = 0; $count < $this->_lastTermInfo->docFreq; $count++ ) {
> $docDelta = $this->_frqFile->readVInt();
> if( $docDelta % 2 == 1 ) {
> $docId += ($docDelta-1)/2;
> $freqs[ $docId ] = 1;
> } else {
> $docId += $docDelta/2;
> $freqs[ $docId ] = $this->_frqFile->readVInt();
> }
1005d1416
< }
1007,1014c1418,1425
< $this->_prxFile->seek($this->_lastTermInfo->proxPointer + $this->_prxFileOffset, SEEK_SET);
< foreach ($freqs as $docId => $freq) {
< $termPosition = 0; $positions = array();
<
< for ($count = 0; $count < $freq; $count++ ) {
< $termPosition += $this->_prxFile->readVInt();
< $positions[] = $termPosition;
< }
---
> $this->_prxFile->seek($this->_lastTermInfo->proxPointer + $this->_prxFileOffset, SEEK_SET);
> foreach ($freqs as $docId => $freq) {
> $termPosition = 0; $positions = array();
>
> for ($count = 0; $count < $freq; $count++ ) {
> $termPosition += $this->_prxFile->readVInt();
> $positions[] = $termPosition;
> }
1016,1017c1427,1429
< if (isset($this->_docMap[$docId])) {
< $this->_lastTermPositions[$this->_docMap[$docId]] = $positions;
---
> if (isset($this->_docMap[$docId])) {
> $this->_lastTermPositions[$this->_docMap[$docId]] = $positions;
> }
1021d1432
<
1031a1443,1460
> /**
> * Close terms stream
> *
> * Should be used for resources clean up if stream is not read up to the end
> */
> public function closeTermsStream()
> {
> $this->_tisFile = null;
> $this->_frqFile = null;
> $this->_prxFile = null;
>
> $this->_lastTerm = null;
> $this->_lastTermInfo = null;
> $this->_lastTermPositions = null;
>
> $this->_docMap = null;
> }
>
1036d1464
< * @param Zend_Search_Lucene_Index_Term $term
Index: Zend/Search/Lucene/Index/SegmentInfoPriorityQueue.php
===================================================================
RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Index/SegmentInfoPriorityQueue.php,v
retrieving revision 1.1
diff -r1.1 SegmentInfoPriorityQueue.php
18c18
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
24c24
< require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Exception.php';
---
> require_once 'Zend/Search/Lucene/Exception.php';
27c27
< require_once $CFG->dirroot.'/search/Zend/Search/Lucene/PriorityQueue.php';
---
> require_once 'Zend/Search/Lucene/PriorityQueue.php';
34c34
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
Index: Zend/Search/Lucene/Index/SegmentMerger.php
===================================================================
RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Index/SegmentMerger.php,v
retrieving revision 1.1
diff -r1.1 SegmentMerger.php
18c18
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
24c24
< require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Exception.php';
---
> require_once 'Zend/Search/Lucene/Exception.php';
27c27
< require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/SegmentInfo.php';
---
> require_once 'Zend/Search/Lucene/Index/SegmentInfo.php';
30c30
< require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/SegmentWriter/StreamWriter.php';
---
> require_once 'Zend/Search/Lucene/Index/SegmentWriter/StreamWriter.php';
33c33
< require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/SegmentInfoPriorityQueue.php';
---
> require_once 'Zend/Search/Lucene/Index/SegmentInfoPriorityQueue.php';
40c40
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
235c235
< $segmentStartId = $segmentInfo->reset($segmentStartId, true);
---
> $segmentStartId = $segmentInfo->reset($segmentStartId, Zend_Search_Lucene_Index_SegmentInfo::SM_MERGE_INFO);
Index: Zend/Search/Lucene/Index/SegmentWriter.php
===================================================================
RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Index/SegmentWriter.php,v
retrieving revision 1.2
diff -r1.2 SegmentWriter.php
18c18
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
24c24
< require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Exception.php';
---
> require_once 'Zend/Search/Lucene/Exception.php';
27c27
< require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/SegmentInfo.php';
---
> require_once 'Zend/Search/Lucene/Index/SegmentInfo.php';
34c34
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
50c50,51
< /** Expert: The fraction of TermDocs entries stored in skip tables.
---
> /**
> * Expert: The fraction of TermDocs entries stored in skip tables.
56,57c57,59
< * 0x0x7FFFFFFF indicates that we don't use skip data
< * Default value is 16
---
> * 0x7FFFFFFF indicates that we don't use skip data
> *
> * Note: not used in current implementation
63a66,77
> * Expert: The maximum number of skip levels. Smaller values result in
> * slightly smaller indexes, but slower skipping in big posting lists.
> *
> * 0 indicates that we don't use skip data
> *
> * Note: not used in current implementation
> *
> * @var integer
> */
> public static $maxSkipLevels = 0;
>
> /**
248a263,272
> * Return segment name
> *
> * @return string
> */
> public function getName()
> {
> return $this->_name;
> }
>
> /**
255a280,285
> $nrmFile = $this->_directory->createFile($this->_name . '.nrm');
> // Write header
> $nrmFile->writeBytes('NRM');
> // Write format specifier
> $nrmFile->writeByte((int)0xFF);
>
265,268c295,301
< $normFileName = $this->_name . '.f' . $field->number;
< $fFile = $this->_directory->createFile($normFileName);
< $fFile->writeBytes($this->_norms[$field->name]);
< $this->_files[] = $normFileName;
---
> // pre-2.1 index mode (not used now)
> // $normFileName = $this->_name . '.f' . $field->number;
> // $fFile = $this->_directory->createFile($normFileName);
> // $fFile->writeBytes($this->_norms[$field->name]);
> // $this->_files[] = $normFileName;
>
> $nrmFile->writeBytes($this->_norms[$field->name]);
272a306
> $this->_files[] = $this->_name . '.nrm';
354c388
< $this->_tisFile->writeInt((int)0xFFFFFFFE);
---
> $this->_tisFile->writeInt((int)0xFFFFFFFD);
357a392
> $this->_tisFile->writeInt(self::$maxSkipLevels);
360c395
< $this->_tiiFile->writeInt((int)0xFFFFFFFE);
---
> $this->_tiiFile->writeInt((int)0xFFFFFFFD);
363a399
> $this->_tiiFile->writeInt(self::$maxSkipLevels);
373c409
< $this->_tiiFile->writeVInt(20); // IndexDelta
---
> $this->_tiiFile->writeVInt(24); // IndexDelta
387c423
< $this->_lastIndexPosition = 20;
---
> $this->_lastIndexPosition = 24;
Index: Zend/Search/Lucene/Index/Term.php
===================================================================
RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Index/Term.php,v
retrieving revision 1.2
diff -r1.2 Term.php
18c18
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
34c34
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
76a77
> * @param string $str
106a108,141
>
> /**
> * Get UTF-8 string length
> *
> * @param string $str
> * @return string
> */
> public static function getLength($str)
> {
> $bytes = 0;
> $chars = 0;
> while ($bytes < strlen($str)) {
> $charBytes = 1;
> if ((ord($str[$bytes]) & 0xC0) == 0xC0) {
> $charBytes++;
> if (ord($str[$bytes]) & 0x20 ) {
> $charBytes++;
> if (ord($str[$bytes]) & 0x10 ) {
> $charBytes++;
> }
> }
> }
>
> if ($bytes + $charBytes > strlen($str)) {
> // wrong character
> break;
> }
>
> $chars++;
> $bytes += $charBytes;
> }
>
> return $chars;
> }
Index: Zend/Search/Lucene/Index/TermInfo.php
===================================================================
RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Index/TermInfo.php,v
retrieving revision 1.2
diff -r1.2 TermInfo.php
18c18
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
29c29
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
Index: Zend/Search/Lucene/Index/Writer.php
===================================================================
RCS file: /cvsroot/moodle/moodle/search/Zend/Search/Lucene/Index/Writer.php,v
retrieving revision 1.2
diff -r1.2 Writer.php
18c18
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
23,24c23,24
< /** Zend_Search_Lucene_Index_SegmentWriter_ */
< require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/SegmentWriter/DocumentWriter.php';
---
> /** Zend_Search_Lucene_Index_SegmentWriter_DocumentWriter */
> require_once 'Zend/Search/Lucene/Index/SegmentWriter/DocumentWriter.php';
27c27
< require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/SegmentInfo.php';
---
> require_once 'Zend/Search/Lucene/Index/SegmentInfo.php';
30c30,33
< require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/SegmentMerger.php';
---
> require_once 'Zend/Search/Lucene/Index/SegmentMerger.php';
>
> /** Zend_Search_Lucene_LockManager */
> require_once 'Zend/Search/Lucene/LockManager.php';
38c41
< * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
---
> * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
156a160
>
158,163c162
< * Opens the index for writing
< *
< * IndexWriter constructor needs Directory as a parameter. It should be
< * a string with a path to the index folder or a Directory object.
< * Second constructor parameter create is optional - true to create the
< * index or overwrite the existing one.
---
> * Create empty index
166,167c165,166
< * @param array $segmentInfos
< * @param boolean $create
---
> * @param integer $generation
> * @param int