19 set_include_path(get_include_path().PATH_SEPARATOR.BASE.
'wcmf/3rdparty/zend');
21 require_once BASE.
'wcmf/3rdparty/zend/Zend/Search/Lucene.php';
22 require_once BASE.
'wcmf/3rdparty/zend/Zend/Search/Lucene/Analysis/TokenFilter/StopWords.php';
23 require_once BASE.
'wcmf/lib/util/class.InifileParser.php';
24 require_once BASE.
'wcmf/lib/util/class.StringUtil.php';
52 public static function getIndex($create =
true)
54 if (!self::isActivated()) {
58 if (!self::$index || $create)
65 $stopWords = self::getStopWords();
66 $stopWordsFilter =
new Zend_Search_Lucene_Analysis_TokenFilter_StopWords($stopWords);
67 $analyzer->addFilter($stopWordsFilter);
69 Zend_Search_Lucene_Analysis_Analyzer::setDefault($analyzer);
70 Zend_Search_Lucene_Search_Query_Wildcard::setMinPrefixLength(0);
71 Zend_Search_Lucene_Search_QueryParser::setDefaultEncoding(
'UTF-8');
72 Zend_Search_Lucene_Search_QueryParser::setDefaultOperator(Zend_Search_Lucene_Search_QueryParser::B_AND);
75 self::$index = Zend_Search_Lucene::open(
$indexPath);
79 catch (Zend_Search_Lucene_Exception $ex) {
80 self::$index = self::resetIndex();
91 if (!self::isActivated()) {
105 if (!self::isActivated()) {
108 if ($obj->isIndexInSearch()) {
109 $index = self::getIndex();
111 $doc =
new Zend_Search_Lucene_Document();
115 $doc->addField(Zend_Search_Lucene_Field::unIndexed(
'oid', $obj->getOID(),
'UTF-8'));
116 $typeField = Zend_Search_Lucene_Field::keyword(
'type', $obj->getType(),
'UTF-8');
117 $typeField->isStored =
false;
118 $doc->addField($typeField);
120 foreach ($valueNames as $curValueName) {
121 $properties = $obj->getValueProperties($curValueName);
122 $inputType = $properties[
'input_type'];
123 $value = self::encodeValue($obj->getValue($curValueName,
DATATYPE_ATTRIBUTE), $inputType);
124 if (preg_match(
'/^text|^f?ckeditor/', $inputType)) {
125 $value = strip_tags($value);
126 $doc->addField(Zend_Search_Lucene_Field::unStored($curValueName, $value,
'UTF-8'));
129 $field = Zend_Search_Lucene_Field::keyword($curValueName, $value,
'UTF-8');
130 $field->isStored =
false;
131 $doc->addField($field);
135 $term =
new Zend_Search_Lucene_Index_Term($obj->getOID(),
'oid');
136 $docIds =
$index->termDocs($term);
137 foreach ($docIds as $id) {
141 $index->addDocument($doc);
142 self::$indexIsDirty =
true;
148 if (preg_match(
'/^f?ckeditor/', $inputType)) {
149 $value = html_entity_decode($value, ENT_QUOTES,
'UTF-8');
160 if (!self::isActivated()) {
163 if ($obj->isIndexInSearch())
165 $index = self::getIndex();
167 $term =
new Zend_Search_Lucene_Index_Term($obj->getOID(),
'oid');
168 $docIds =
$index->termDocs($term);
169 foreach ($docIds as $id)
173 self::$indexIsDirty =
true;
184 if (!self::isActivated()) {
187 if (self::$indexIsDirty)
189 $index = self::getIndex(
false);
204 if (!self::isActivated()) {
207 $index = self::getIndex(
false);
219 if (!self::$indexPath)
222 if (($path = $parser->getValue(self::INI_INDEX_PATH, self::INI_SECTION)) !==
false)
224 self::$indexPath = BASE .
'application/' . $path;
226 if (!file_exists(self::$indexPath)) {
230 if (!is_writeable(self::$indexPath)) {
231 Log::error(
"Index path '".self::$indexPath.
"' is not writeable.", __CLASS__);
234 Log::debug(
"Lucene index location: ".self::$indexPath, __CLASS__);
238 Log::error($parser->getErrorMsg(), __CLASS__);
241 return self::$indexPath;
250 if (self::$isActivated === null) {
252 self::$isActivated = $parser->getValue(self::INI_INDEX_PATH, self::INI_SECTION) !==
false;
254 return self::$isActivated;
263 return explode(
"\n",
$GLOBALS[
'STOP_WORDS']);
273 public static function find($searchTerm, &$pagingInfo)
276 if (!self::isActivated()) {
279 $index = self::getIndex(
false);
282 $query = Zend_Search_Lucene_Search_QueryParser::parse($searchTerm,
'UTF-8');
284 $hits =
$index->find($query);
285 if ($pagingInfo != null && $pagingInfo->getPageSize() > 0) {
286 $pagingInfo->setTotalCount(
sizeof($hits));
287 $hits = array_slice($hits, $pagingInfo->getIndex(), $pagingInfo->getPageSize());
289 foreach($hits as $hit) {
294 $highlightedRegex =
'/((<b style="color:black;background-color:#[0-9a-f]{6}">)+)([^<]+?)((<\/b>)+)/';
297 foreach ($valueNames as $curValueName) {
298 $properties = $obj->getValueProperties($curValueName);
299 $inputType = $properties[
'input_type'];
300 $value = self::encodeValue($obj->getValue($curValueName,
DATATYPE_ATTRIBUTE), $inputType);
301 if (strlen($value) > 0) {
302 $highlighted = $query->htmlFragmentHighlightMatches(strip_tags($value),
'UTF-8');
304 if (preg_match($highlightedRegex, $highlighted, $matches)) {
306 $highlighted = preg_replace($highlightedRegex,
' <em class="highlighted">$3</em> ', $highlighted);
307 $highlighted = trim(preg_replace(
'/ |[\n\r\t]/',
' ', $highlighted));
314 $results[$oid] = array(
316 'score' => $hit->score,
317 'summary' => $summary
321 catch (Exception $ex) {
329 class Analyzer extends Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive {
335 parent::setInput($data,
'UTF-8');
static commitIndex($optimize=true)
error($message, $category)
static find($searchTerm, &$pagingInfo)
static getIndex($create=true)
static encodeValue($value, $inputType)
debug($message, $category)
static deleteFromSearch(&$obj)
setInput($data, $encoding= '')
static indexInSearch(&$obj)
This class provides access to the search based on Zend_Search_Lucene. The search index stored in the ...
excerpt($text, $phrase, $radius=100)