SPTSearchEngine.php
Go to the documentation of this file.
00001 <?PHP 00002 # 00003 # FILE: SearchEngine.php 00004 # 00005 # FUNCTIONS PROVIDED: 00006 # SPTSearchEngine->SPTSearchEngine() 00007 # - constructor 00008 # (see Scout--SearchEngine.php for other public methods) 00009 # 00010 # AUTHOR: Edward Almasy 00011 # 00012 # Part of the Scout Portal Toolkit 00013 # Copyright 2002-2004 Internet Scout Project 00014 # http://scout.wisc.edu 00015 # 00016 00017 class SPTSearchEngine extends SearchEngine { 00018 00019 function SPTSearchEngine() 00020 { 00021 # create a database handle 00022 $DB = new SPTDatabase(); 00023 00024 # pass database handle and config values to real search engine object 00025 $this->SearchEngine($DB, "Resources", "ResourceId"); 00026 00027 # for each field defined in schema 00028 $this->Schema = new MetadataSchema(); 00029 $Fields = $this->Schema->GetFields(); 00030 foreach ($Fields as $Field) 00031 { 00032 # determine field type for searching 00033 switch ($Field->Type()) 00034 { 00035 case MetadataSchema::MDFTYPE_TEXT: 00036 case MetadataSchema::MDFTYPE_PARAGRAPH: 00037 case MetadataSchema::MDFTYPE_USER: 00038 case MetadataSchema::MDFTYPE_TREE: 00039 case MetadataSchema::MDFTYPE_CONTROLLEDNAME: 00040 case MetadataSchema::MDFTYPE_OPTION: 00041 case MetadataSchema::MDFTYPE_IMAGE: 00042 case MetadataSchema::MDFTYPE_FILE: 00043 case MetadataSchema::MDFTYPE_URL: 00044 $FieldType = SEARCHFIELD_TEXT; 00045 break; 00046 00047 case MetadataSchema::MDFTYPE_NUMBER: 00048 case MetadataSchema::MDFTYPE_FLAG: 00049 $FieldType = SEARCHFIELD_NUMERIC; 00050 break; 00051 00052 case MetadataSchema::MDFTYPE_DATE: 00053 $FieldType = SEARCHFIELD_DATERANGE; 00054 break; 00055 00056 case MetadataSchema::MDFTYPE_TIMESTAMP: 00057 $FieldType = SEARCHFIELD_DATE; 00058 break; 00059 00060 case MetadataSchema::MDFTYPE_POINT: 00061 $FieldType = NULL; 00062 break; 00063 00064 default: 00065 exit("ERROR: unknown field type in SPT--SearchEngine.php"); 00066 break; 00067 } 00068 00069 if ($FieldType !== NULL) 00070 { 00071 # add field to search engine 00072 $this->AddField($Field->Name(), $Field->DBFieldName(), $FieldType, 00073 $Field->SearchWeight(), $Field->IncludeInKeywordSearch()); 00074 } 00075 } 00076 } 00077 00078 # overloaded version of method to retrieve text from DB 00079 function GetFieldContent($ItemId, $FieldName) 00080 { 00081 # get resource object 00082 $Resource = new Resource($ItemId); 00083 00084 # retrieve text (including variants) from resource object and return to caller 00085 return $Resource->Get($FieldName, FALSE, TRUE); 00086 } 00087 00088 # overloaded version of method to retrieve resource/phrase match list 00089 function SearchFieldForPhrases($FieldName, $Phrase) 00090 { 00091 # normalize and escape search phrase for use in SQL query 00092 $SearchPhrase = strtolower(addslashes($Phrase)); 00093 00094 # query DB for matching list based on field type 00095 $Field = $this->Schema->GetFieldByName($FieldName); 00096 switch ($Field->Type()) 00097 { 00098 case MetadataSchema::MDFTYPE_TEXT: 00099 case MetadataSchema::MDFTYPE_PARAGRAPH: 00100 case MetadataSchema::MDFTYPE_FILE: 00101 case MetadataSchema::MDFTYPE_URL: 00102 $QueryString = "SELECT DISTINCT ResourceId FROM Resources " 00103 ."WHERE POSITION('".$SearchPhrase."'" 00104 ." IN LOWER(`".$Field->DBFieldName()."`)) "; 00105 break; 00106 00107 case MetadataSchema::MDFTYPE_IMAGE: 00108 $QueryString = "SELECT DISTINCT ResourceId FROM Resources " 00109 ."WHERE POSITION('".$SearchPhrase."'" 00110 ." IN LOWER(`".$Field->DBFieldName()."AltText`)) "; 00111 break; 00112 00113 case MetadataSchema::MDFTYPE_CONTROLLEDNAME: 00114 $NameTableSize = $this->DB->Query("SELECT COUNT(*) AS NameCount" 00115 ." FROM ControlledNames", "NameCount"); 00116 $QueryString = "SELECT DISTINCT ResourceNameInts.ResourceId " 00117 ."FROM ResourceNameInts, ControlledNames " 00118 ."WHERE POSITION('".$SearchPhrase."' IN LOWER(ControlledName)) " 00119 ."AND ControlledNames.ControlledNameId" 00120 ." = ResourceNameInts.ControlledNameId " 00121 ."AND ControlledNames.FieldId = ".$Field->Id(); 00122 $SecondQueryString = "SELECT DISTINCT ResourceNameInts.ResourceId " 00123 ."FROM ResourceNameInts, ControlledNames, VariantNames " 00124 ."WHERE POSITION('".$SearchPhrase."' IN LOWER(VariantName)) " 00125 ."AND VariantNames.ControlledNameId" 00126 ." = ResourceNameInts.ControlledNameId " 00127 ."AND ControlledNames.ControlledNameId" 00128 ." = ResourceNameInts.ControlledNameId " 00129 ."AND ControlledNames.FieldId = ".$Field->Id(); 00130 break; 00131 00132 case MetadataSchema::MDFTYPE_OPTION: 00133 $QueryString = "SELECT DISTINCT ResourceNameInts.ResourceId " 00134 ."FROM ResourceNameInts, ControlledNames " 00135 ."WHERE POSITION('".$SearchPhrase."' IN LOWER(ControlledName)) " 00136 ."AND ControlledNames.ControlledNameId = ResourceNameInts.ControlledNameId " 00137 ."AND ControlledNames.FieldId = ".$Field->Id(); 00138 break; 00139 00140 case MetadataSchema::MDFTYPE_TREE: 00141 $QueryString = "SELECT DISTINCT ResourceClassInts.ResourceId " 00142 ."FROM ResourceClassInts, Classifications " 00143 ."WHERE POSITION('".$SearchPhrase."' IN LOWER(ClassificationName)) " 00144 ."AND Classifications.ClassificationId = ResourceClassInts.ClassificationId " 00145 ."AND Classifications.FieldId = ".$Field->Id(); 00146 break; 00147 00148 case MetadataSchema::MDFTYPE_USER: 00149 $UserId = $this->DB->Query("SELECT UserId FROM APUsers " 00150 ."WHERE POSITION('".$SearchPhrase."' IN LOWER(UserName)) " 00151 ."OR POSITION('".$SearchPhrase."' IN LOWER(RealName))", "UserId"); 00152 if ($UserId != NULL) 00153 { 00154 $QueryString = "SELECT DISTINCT ResourceId FROM Resources " 00155 ."WHERE `".$Field->DBFieldName()."` = ".$UserId; 00156 } 00157 break; 00158 00159 case MetadataSchema::MDFTYPE_NUMBER: 00160 if ($SearchPhrase > 0) 00161 { 00162 $QueryString = "SELECT DISTINCT ResourceId FROM Resources " 00163 ."WHERE `".$Field->DBFieldName()."` = ".(int)$SearchPhrase; 00164 } 00165 break; 00166 00167 case MetadataSchema::MDFTYPE_FLAG: 00168 case MetadataSchema::MDFTYPE_DATE: 00169 case MetadataSchema::MDFTYPE_TIMESTAMP: 00170 # (these types not yet handled by search engine for phrases) 00171 break; 00172 } 00173 00174 # build match list based on results returned from DB 00175 if (isset($QueryString)) 00176 { 00177 if ($this->DebugLevel > 7) { print("SE: performing phrase search query" 00178 ." (<i>".$QueryString."</i>)<br>\n"); } 00179 if ($this->DebugLevel > 9) { $StartTime = microtime(TRUE); } 00180 $this->DB->Query($QueryString); 00181 if ($this->DebugLevel > 9) 00182 { 00183 $EndTime = microtime(TRUE); 00184 if (($StartTime - $EndTime) > 0.1) 00185 { 00186 printf("SE: query took %.2f seconds<br>\n", 00187 ($EndTime - $StartTime)); 00188 } 00189 } 00190 $MatchList = $this->DB->FetchColumn("ResourceId"); 00191 if (isset($SecondQueryString)) 00192 { 00193 if ($this->DebugLevel > 7) { print("SE: performing second phrase search query" 00194 ." (<i>".$SecondQueryString."</i>)<br>\n"); } 00195 if ($this->DebugLevel > 9) { $StartTime = microtime(TRUE); } 00196 $this->DB->Query($SecondQueryString); 00197 if ($this->DebugLevel > 9) 00198 { 00199 $EndTime = microtime(TRUE); 00200 if (($StartTime - $EndTime) > 0.1) 00201 { 00202 printf("SE: query took %.2f seconds<br>\n", 00203 ($EndTime - $StartTime)); 00204 } 00205 } 00206 $MatchList = $MatchList + $this->DB->FetchColumn("ResourceId"); 00207 } 00208 } 00209 else 00210 { 00211 $MatchList = array(); 00212 } 00213 00214 # return list of matching resources to caller 00215 return $MatchList; 00216 } 00217 00218 # search field for records that meet comparison 00219 function SearchFieldsForComparisonMatches($FieldNames, $Operators, $Values) 00220 { 00221 # use SQL keyword appropriate to current search logic for combining operations 00222 $CombineWord = ($this->DefaultSearchLogic == SEARCHLOGIC_AND) ? " AND " : " OR "; 00223 00224 # for each comparison 00225 foreach ($FieldNames as $Index => $FieldName) 00226 { 00227 $Operator = $Operators[$Index]; 00228 $Value = $Values[$Index]; 00229 00230 # determine query based on field type 00231 $Field = $this->Schema->GetFieldByName($FieldName); 00232 if ($Field != NULL) 00233 { 00234 switch ($Field->Type()) 00235 { 00236 case MetadataSchema::MDFTYPE_TEXT: 00237 case MetadataSchema::MDFTYPE_PARAGRAPH: 00238 case MetadataSchema::MDFTYPE_NUMBER: 00239 case MetadataSchema::MDFTYPE_FLAG: 00240 case MetadataSchema::MDFTYPE_USER: 00241 case MetadataSchema::MDFTYPE_URL: 00242 if (isset($Queries["Resources"])) 00243 { 00244 $Queries["Resources"] .= $CombineWord; 00245 } 00246 else 00247 { 00248 $Queries["Resources"] = "SELECT DISTINCT ResourceId FROM Resources WHERE "; 00249 } 00250 if ($Field->Type() == MetadataSchema::MDFTYPE_USER) 00251 { 00252 $User = new SPTUser($Value); 00253 $Value = $User->Id(); 00254 } 00255 $Queries["Resources"] .= "`".$Field->DBFieldName()."` ".$Operator." '".addslashes($Value)."' "; 00256 break; 00257 00258 case MetadataSchema::MDFTYPE_CONTROLLEDNAME: 00259 $QueryIndex = "ResourceNameInts".$Field->Id(); 00260 if (!isset($Queries[$QueryIndex]["A"])) 00261 { 00262 $Queries[$QueryIndex]["A"] = 00263 "SELECT DISTINCT ResourceId" 00264 ." FROM ResourceNameInts, ControlledNames " 00265 ." WHERE ControlledNames.FieldId = ".$Field->Id() 00266 ." AND ( "; 00267 $CloseQuery[$QueryIndex]["A"] = TRUE; 00268 } 00269 else 00270 { 00271 $Queries[$QueryIndex]["A"] .= $CombineWord; 00272 } 00273 $Queries[$QueryIndex]["A"] .= 00274 "((ResourceNameInts.ControlledNameId" 00275 ." = ControlledNames.ControlledNameId" 00276 ." AND ControlledName " 00277 .$Operator." '".addslashes($Value)."'))"; 00278 if (!isset($Queries[$QueryIndex]["B"])) 00279 { 00280 $Queries[$QueryIndex]["B"] = 00281 "SELECT DISTINCT ResourceId" 00282 . " FROM ResourceNameInts, ControlledNames," 00283 ." VariantNames " 00284 ." WHERE ControlledNames.FieldId = ".$Field->Id() 00285 ." AND ( "; 00286 $CloseQuery[$QueryIndex]["B"] = TRUE; 00287 } 00288 else 00289 { 00290 $Queries[$QueryIndex]["B"] .= $CombineWord; 00291 } 00292 $Queries[$QueryIndex]["B"] .= 00293 "((ResourceNameInts.ControlledNameId" 00294 ." = ControlledNames.ControlledNameId" 00295 ." AND ResourceNameInts.ControlledNameId" 00296 ." = VariantNames.ControlledNameId" 00297 ." AND VariantName " 00298 .$Operator." '".addslashes($Value)."'))"; 00299 break; 00300 00301 case MetadataSchema::MDFTYPE_OPTION: 00302 $QueryIndex = "ResourceNameInts".$Field->Id(); 00303 if (!isset($Queries[$QueryIndex])) 00304 { 00305 $Queries[$QueryIndex] = 00306 "SELECT DISTINCT ResourceId FROM ResourceNameInts, ControlledNames " 00307 ." WHERE ControlledNames.FieldId = ".$Field->Id() 00308 ." AND ( "; 00309 $CloseQuery[$QueryIndex] = TRUE; 00310 } 00311 else 00312 { 00313 $Queries[$QueryIndex] .= $CombineWord; 00314 } 00315 $Queries[$QueryIndex] .= "(ResourceNameInts.ControlledNameId = ControlledNames.ControlledNameId" 00316 ." AND ControlledName ".$Operator." '".addslashes($Value)."')"; 00317 break; 00318 00319 case MetadataSchema::MDFTYPE_TREE: 00320 $QueryIndex = "ResourceClassInts".$Field->Id(); 00321 if (!isset($Queries[$QueryIndex])) 00322 { 00323 $Queries[$QueryIndex] = "SELECT DISTINCT ResourceId FROM ResourceClassInts, Classifications " 00324 ." WHERE ResourceClassInts.ClassificationId = Classifications.ClassificationId" 00325 ." AND Classifications.FieldId = ".$Field->Id()." AND ( "; 00326 $CloseQuery[$QueryIndex] = TRUE; 00327 } 00328 else 00329 { 00330 $Queries[$QueryIndex] .= $CombineWord; 00331 } 00332 $Queries[$QueryIndex] .= " ClassificationName ".$Operator." '".addslashes($Value)."'"; 00333 break; 00334 00335 case MetadataSchema::MDFTYPE_TIMESTAMP: 00336 # if value appears to have time component or text description 00337 if (strpos($Value, ":") 00338 || strstr($Value, "day") 00339 || strstr($Value, "week") 00340 || strstr($Value, "month") 00341 || strstr($Value, "year") 00342 || strstr($Value, "hour") 00343 || strstr($Value, "minute")) 00344 { 00345 if (isset($Queries["Resources"])) 00346 { 00347 $Queries["Resources"] .= $CombineWord; 00348 } 00349 else 00350 { 00351 $Queries["Resources"] = "SELECT DISTINCT ResourceId" 00352 ." FROM Resources WHERE "; 00353 } 00354 00355 # flip operator if necessary 00356 if (strstr($Value, "ago")) 00357 { 00358 $OperatorFlipMap = array( 00359 "<" => ">=", 00360 ">" => "<=", 00361 "<=" => ">", 00362 ">=" => "<", 00363 ); 00364 $Operator = isset($OperatorFlipMap[$Operator]) 00365 ? $OperatorFlipMap[$Operator] : $Operator; 00366 } 00367 00368 # use strtotime method to build condition 00369 $TimestampValue = strtotime($Value); 00370 if (($TimestampValue !== FALSE) && ($TimestampValue != -1)) 00371 { 00372 if ((date("H:i:s", $TimestampValue) == "00:00:00") 00373 && (strpos($Value, "00:00") === FALSE) 00374 && ($Operator == "<=")) 00375 { 00376 $NormalizedValue = 00377 date("Y-m-d", $TimestampValue)." 23:59:59"; 00378 } 00379 else 00380 { 00381 $NormalizedValue = date("Y-m-d H:i:s", $TimestampValue); 00382 } 00383 } 00384 else 00385 { 00386 $NormalizedValue = addslashes($Value); 00387 } 00388 $Queries["Resources"] .= 00389 " ( `".$Field->DBFieldName()."` " 00390 .$Operator 00391 ." '".$NormalizedValue."' ) "; 00392 } 00393 else 00394 { 00395 # use Date object method to build condition 00396 $Date = new Date($Value); 00397 if ($Date->Precision()) 00398 { 00399 if (isset($Queries["Resources"])) 00400 { 00401 $Queries["Resources"] .= $CombineWord; 00402 } 00403 else 00404 { 00405 $Queries["Resources"] = "SELECT DISTINCT ResourceId" 00406 ." FROM Resources WHERE "; 00407 } 00408 $Queries["Resources"] .= " ( ".$Date->SqlCondition( 00409 $Field->DBFieldName(), NULL, $Operator)." ) "; 00410 } 00411 } 00412 break; 00413 00414 case MetadataSchema::MDFTYPE_DATE: 00415 $Date = new Date($Value); 00416 if ($Date->Precision()) 00417 { 00418 if (isset($Queries["Resources"])) 00419 { 00420 $Queries["Resources"] .= $CombineWord; 00421 } 00422 else 00423 { 00424 $Queries["Resources"] = "SELECT DISTINCT ResourceId" 00425 ." FROM Resources WHERE "; 00426 } 00427 $Queries["Resources"] .= " ( ".$Date->SqlCondition( 00428 $Field->DBFieldName()."Begin", 00429 $Field->DBFieldName()."End", $Operator)." ) "; 00430 } 00431 break; 00432 00433 case MetadataSchema::MDFTYPE_IMAGE: 00434 case MetadataSchema::MDFTYPE_FILE: 00435 # (these types not yet handled by search engine for comparisons) 00436 break; 00437 } 00438 } 00439 } 00440 00441 # if queries found 00442 if (isset($Queries)) 00443 { 00444 # for each assembled query 00445 foreach ($Queries as $QueryIndex => $Query) 00446 { 00447 # if query has multiple parts 00448 if (is_array($Query)) 00449 { 00450 # for each part of query 00451 $ResourceIds = array(); 00452 foreach ($Query as $PartIndex => $PartQuery) 00453 { 00454 # add closing paren if query was flagged to be closed 00455 if (isset($CloseQuery[$QueryIndex])) { $PartQuery .= " ) "; } 00456 00457 # perform query and retrieve IDs 00458 if ($this->DebugLevel > 5) { print("SE: " 00459 ." performing comparison query (<i>".$PartQuery 00460 ."</i>)<br>\n"); } 00461 $this->DB->Query($PartQuery); 00462 $ResourceIds = $ResourceIds 00463 + $this->DB->FetchColumn("ResourceId"); 00464 if ($this->DebugLevel > 5) { print("SE: " 00465 ." comparison query produced <i>" 00466 .count($ResourceIds)."</i> results<br>\n"); } 00467 } 00468 } 00469 else 00470 { 00471 # add closing paren if query was flagged to be closed 00472 if (isset($CloseQuery[$QueryIndex])) { $Query .= " ) "; } 00473 00474 # perform query and retrieve IDs 00475 if ($this->DebugLevel > 5) { print("SE: " 00476 ." performing comparison query (<i>".$Query 00477 ."</i>)<br>\n"); } 00478 $this->DB->Query($Query); 00479 $ResourceIds = $this->DB->FetchColumn("ResourceId"); 00480 if ($this->DebugLevel > 5) { print("SE: " 00481 ." comparison query produced <i>" 00482 .count($ResourceIds)."</i> results<br>\n"); } 00483 } 00484 00485 # if we already have some results 00486 if (isset($Results)) 00487 { 00488 # if search logic is set to AND 00489 if ($this->DefaultSearchLogic == SEARCHLOGIC_AND) 00490 { 00491 # remove anything from results that was not returned from query 00492 $Results = array_intersect($Results, $ResourceIds); 00493 } 00494 else 00495 { 00496 # add values returned from query to results 00497 $Results = array_unique(array_merge($Results, $ResourceIds)); 00498 } 00499 } 00500 else 00501 { 00502 # set results to values returned from query 00503 $Results = $ResourceIds; 00504 } 00505 } 00506 } 00507 else 00508 { 00509 # initialize results to empty list 00510 $Results = array(); 00511 } 00512 00513 # return results to caller 00514 return $Results; 00515 } 00516 00517 function GetItemIdsSortedByField($FieldName, $SortDescending) 00518 { 00519 $RFactory = new ResourceFactory(); 00520 return $RFactory->GetResourceIdsSortedBy($FieldName, !$SortDescending); 00521 } 00522 00523 function QueueUpdateForItem($ItemId, 00524 $Priority = ApplicationFramework::PRIORITY_LOW) 00525 { 00526 global $AF; 00527 $AF->QueueUniqueTask(array(__CLASS__, "RunUpdateForItem"), 00528 array(intval($ItemId)), $Priority); 00529 } 00530 00531 static function RunUpdateForItem($ItemId) 00532 { 00533 # check that resource still exists 00534 $RFactory = new ResourceFactory(); 00535 if (!$RFactory->ItemExists($ItemId)) { return; } 00536 00537 # update search data for resource 00538 $SearchEngine = new SPTSearchEngine(); 00539 $SearchEngine->UpdateForItem($ItemId); 00540 } 00541 00542 private $Schema; 00543 00544 # functions for backward compatability w/ old SPT code 00545 function UpdateForResource($ItemId) { $this->UpdateForItem($ItemId); } 00546 }