CWIS Developer Documentation
SearchEngine.php
Go to the documentation of this file.
1 <?PHP
2 
3 #
4 # FILE: SearchEngine.php
5 #
6 # Open Source Metadata Archive Search Engine (OSMASE)
7 # Copyright 2002-2011 Edward Almasy and Internet Scout
8 # http://scout.wisc.edu
9 #
10 
11 class SearchEngine {
12 
13  # ---- PUBLIC INTERFACE --------------------------------------------------
14 
15  # possible types of logical operators
16  const LOGIC_AND = 1;
17  const LOGIC_OR = 2;
18 
19  # flags used for indicating field types
20  const FIELDTYPE_TEXT = 1;
21  const FIELDTYPE_NUMERIC = 2;
22  const FIELDTYPE_DATE = 3;
24 
25  # object constructor
27  {
28  # save database object for our use
29  $this->DB = $DB;
30 
31  # save item access parameters
34 
35  # define flags used for indicating word states
36  if (!defined("WORD_PRESENT")) { define("WORD_PRESENT", 1); }
37  if (!defined("WORD_EXCLUDED")) { define("WORD_EXCLUDED", 2); }
38  if (!defined("WORD_REQUIRED")) { define("WORD_REQUIRED", 4); }
39 
40  # set default debug state
41  $this->DebugLevel = 0;
42  }
43 
44  # add field to be searched
45  function AddField(
46  $FieldName, $DBFieldName, $FieldType, $Weight, $UsedInKeywordSearch)
47  {
48  # save values
49  $this->FieldInfo[$FieldName]["DBFieldName"] = $DBFieldName;
50  $this->FieldInfo[$FieldName]["FieldType"] = $FieldType;
51  $this->FieldInfo[$FieldName]["Weight"] = $Weight;
52  $this->FieldInfo[$FieldName]["InKeywordSearch"] = $UsedInKeywordSearch;
53  }
54 
55  # retrieve info about tables and fields (useful for child objects)
56  function ItemTableName() { return $this->ItemTableName; }
57  function ItemIdFieldName() { return $this->ItemIdFieldName; }
58  function DBFieldName($FieldName)
59  { return $this->FieldInfo[$FieldName]["DBFieldName"]; }
60  function FieldType($FieldName)
61  { return $this->FieldInfo[$FieldName]["FieldType"]; }
62  function FieldWeight($FieldName)
63  { return $this->FieldInfo[$FieldName]["Weight"]; }
64  function FieldInKeywordSearch($FieldName)
65  { return $this->FieldInfo[$FieldName]["InKeywordSearch"]; }
66 
67  # set debug level
68  function DebugLevel($Setting)
69  {
70  $this->DebugLevel = $Setting;
71  }
72 
73 
74  # ---- search functions
75 
76  # perform keyword search
77  function Search($SearchString, $StartingResult = 0, $NumberOfResults = 10,
78  $SortByField = NULL, $SortDescending = TRUE)
79  {
80  $SearchString = $this->SetDebugLevel($SearchString);
81  $this->DMsg(0, "In Search() with search string \"".$SearchString."\"");
82 
83  # save start time to use in calculating search time
84  $StartTime = microtime(TRUE);
85 
86  # clear word counts
87  $this->InclusiveTermCount = 0;
88  $this->RequiredTermCount = 0;
89  $this->ExcludedTermCount = 0;
90 
91  # parse search string into terms
92  $Words = $this->ParseSearchStringForWords($SearchString);
93  $this->DMsg(1, "Found ".count($Words)." words");
94 
95  # parse search string for phrases
96  $Phrases = $this->ParseSearchStringForPhrases($SearchString);
97  $this->DMsg(1, "Found ".count($Phrases)." phrases");
98 
99  # if only excluded terms specified
100  if ($this->ExcludedTermCount && !$this->InclusiveTermCount)
101  {
102  # load all records
103  $this->DMsg(1, "Loading all records");
104  $Scores = $this->LoadScoresForAllRecords();
105  }
106  else
107  {
108  # perform searches
109  $Scores = $this->SearchForWords($Words);
110  $this->DMsg(1, "Found ".count($Scores)." results after word search");
111  $Scores = $this->SearchForPhrases($Phrases, $Scores);
112  $this->DMsg(1, "Found ".count($Scores)." results after phrase search");
113  }
114 
115  # if search results found
116  if (count($Scores) > 0)
117  {
118  # handle any excluded words
119  $Scores = $this->FilterOnExcludedWords($Words, $Scores);
120 
121  # strip off any results that don't contain required words
122  $Scores = $this->FilterOnRequiredWords($Scores);
123  }
124 
125  # count, sort, and trim search result scores list
126  $Scores = $this->CleanScores($Scores, $StartingResult, $NumberOfResults,
127  $SortByField, $SortDescending);
128 
129  # record search time
130  $this->LastSearchTime = microtime(TRUE) - $StartTime;
131 
132  # return list of items to caller
133  $this->DMsg(0, "Ended up with ".$this->NumberOfResultsAvailable." results");
134  return $Scores;
135  }
136 
137  # perform search across multiple fields and return trimmed results to caller
138  function FieldedSearch($SearchStrings, $StartingResult = 0, $NumberOfResults = 10,
139  $SortByField = NULL, $SortDescending = TRUE)
140  {
141  $SearchStrings = $this->SetDebugLevel($SearchStrings);
142  $this->DMsg(0, "In FieldedSearch() with "
143  .count($SearchStrings)." search strings");
144 
145  # save start time to use in calculating search time
146  $StartTime = microtime(TRUE);
147 
148  # perform search
149  $Scores = $this->SearchAcrossFields($SearchStrings);
150  $Scores = ($Scores === NULL) ? array() : $Scores;
151 
152  # count, sort, and trim search result scores list
153  $Scores = $this->CleanScores($Scores, $StartingResult, $NumberOfResults,
154  $SortByField, $SortDescending);
155 
156  # record search time
157  $this->LastSearchTime = microtime(TRUE) - $StartTime;
158 
159  # return list of items to caller
160  $this->DMsg(0, "Ended up with ".$this->NumberOfResultsAvailable." results");
161  return $Scores;
162  }
163 
164  # perform search with logical groups of fielded searches
165  function GroupedSearch($SearchGroups, $StartingResult = 0, $NumberOfResults = 10,
166  $SortByField = NULL, $SortDescending = TRUE)
167  {
168  foreach ($SearchGroups as $Index => $Groups)
169  {
170  if (isset($SearchGroups[$Index]["SearchStrings"]))
171  {
172  $SearchGroups[$Index]["SearchStrings"] =
173  $this->SetDebugLevel($SearchGroups[$Index]["SearchStrings"]);
174  }
175  }
176  $this->DMsg(0, "In GroupedSearch() with "
177  .count($SearchGroups)." search groups");
178 
179  # save start time to use in calculating search time
180  $StartTime = microtime(TRUE);
181 
182  # start with no results
183  $Scores = array();
184 
185  # save AND/OR search setting
186  $SavedSearchLogic = $this->DefaultSearchLogic;
187 
188  # for each search group
189  $FirstSearch = TRUE;
190  foreach ($SearchGroups as $Group)
191  {
192  $this->DMsg(0, "----- GROUP ---------------------------");
193 
194  # if group has AND/OR setting specified
195  if (isset($Group["Logic"]))
196  {
197  # use specified AND/OR setting
198  $this->DefaultSearchLogic = $Group["Logic"];
199  }
200  else
201  {
202  # use saved AND/OR setting
203  $this->DefaultSearchLogic = $SavedSearchLogic;
204  }
205  $this->DMsg(2, "Logic is "
206  .(($this->DefaultSearchLogic == self::LOGIC_AND) ? "AND" : "OR"));
207 
208  # if we have search strings for this group
209  if (isset($Group["SearchStrings"]))
210  {
211  # perform search
212  $GroupScores = $this->SearchAcrossFields($Group["SearchStrings"]);
213 
214  # if search was conducted
215  if ($GroupScores !== NULL)
216  {
217  # if saved AND/OR setting is OR or this is first search
218  if (($SavedSearchLogic == self::LOGIC_OR) || $FirstSearch)
219  {
220  # add search results to result list
221  foreach ($GroupScores as $ItemId => $Score)
222  {
223  if (isset($Scores[$ItemId]))
224  {
225  $Scores[$ItemId] += $Score;
226  }
227  else
228  {
229  $Scores[$ItemId] = $Score;
230  }
231  }
232 
233  # (reset flag indicating first search)
234  $FirstSearch = FALSE;
235  }
236  else
237  {
238  # AND search results with previous results
239  $OldScores = $Scores;
240  $Scores = array();
241  foreach ($GroupScores as $ItemId => $Score)
242  {
243  if (isset($OldScores[$ItemId]))
244  {
245  $Scores[$ItemId] = $OldScores[$ItemId] + $Score;
246  }
247  }
248  }
249  }
250  }
251  }
252 
253  # restore AND/OR search setting
254  $this->DefaultSearchLogic = $SavedSearchLogic;
255 
256  # count, sort, and trim search result scores list
257  $Scores = $this->CleanScores($Scores, $StartingResult, $NumberOfResults,
258  $SortByField, $SortDescending);
259 
260  # record search time
261  $this->LastSearchTime = microtime(TRUE) - $StartTime;
262 
263  # return search results to caller
264  $this->DMsg(0, "Ended up with ".$this->NumberOfResultsAvailable." results");
265  return $Scores;
266  }
267 
268  # add function that will be called to filter search results
269  function AddResultFilterFunction($FunctionName)
270  {
271  # save filter function name
272  $this->FilterFuncs[] = $FunctionName;
273  }
274 
275  # get or set default search logic (AND or OR)
276  function DefaultSearchLogic($NewSetting = NULL)
277  {
278  if ($NewSetting != NULL)
279  {
280  $this->DefaultSearchLogic = $NewSetting;
281  }
283  }
284 
285  function SearchTermsRequiredByDefault($NewSetting = TRUE)
286  {
287  if ($NewSetting)
288  {
289  $this->DefaultSearchLogic = self::LOGIC_AND;
290  }
291  else
292  {
293  $this->DefaultSearchLogic = self::LOGIC_OR;
294  }
295  }
296 
297  function NumberOfResults()
298  {
300  }
301 
302  function SearchTerms()
303  {
304  return $this->SearchTermList;
305  }
306 
307  function SearchTime()
308  {
309  return $this->LastSearchTime;
310  }
311 
312  # report total weight for all fields involved in search
313  function FieldedSearchWeightScale($SearchStrings)
314  {
315  $Weight = 0;
316  $IncludedKeywordSearch = FALSE;
317  foreach ($SearchStrings as $FieldName => $SearchStringArray)
318  {
319  if ($FieldName == "XXXKeywordXXX")
320  {
321  $IncludedKeywordSearch = TRUE;
322  }
323  else
324  {
325  $Weight += $this->FieldInfo[$FieldName]["Weight"];
326  }
327  }
328  if ($IncludedKeywordSearch)
329  {
330  foreach ($this->FieldInfo as $FieldName => $Info)
331  {
332  if ($Info["InKeywordSearch"])
333  {
334  $Weight += $Info["Weight"];
335  }
336  }
337  }
338  return $Weight;
339  }
340 
341 
342  # ---- search database update functions
343 
344  # update search DB for the specified item
345  function UpdateForItem($ItemId)
346  {
347  # bail out if item ID is negative (indicating a temporary record)
348  if ($ItemId < 0) { return; }
349 
350  # clear word count added flags for this item
351  unset($this->WordCountAdded);
352 
353  # delete any existing info for this item
354  $this->DB->Query("DELETE FROM SearchWordCounts WHERE ItemId = ".$ItemId);
355 
356  # for each metadata field
357  foreach ($this->FieldInfo as $FieldName => $Info)
358  {
359  # if search weight for field is positive
360  if ($Info["Weight"] > 0)
361  {
362  # retrieve text for field
363  $Text = $this->GetFieldContent($ItemId, $FieldName);
364 
365  # if text is array
366  if (is_array($Text))
367  {
368  # for each text string in array
369  foreach ($Text as $String)
370  {
371  # record search info for text
372  $this->RecordSearchInfoForText($ItemId, $FieldName,
373  $Info["Weight"], $String,
374  $Info["InKeywordSearch"]);
375  }
376  }
377  else
378  {
379  # record search info for text
380  $this->RecordSearchInfoForText($ItemId, $FieldName,
381  $Info["Weight"], $Text,
382  $Info["InKeywordSearch"]);
383  }
384  }
385  }
386  }
387 
388  # update search DB for the specified range of items
389  function UpdateForItems($StartingItemId, $NumberOfItems)
390  {
391  # retrieve IDs for specified number of items starting at specified ID
392  $this->DB->Query("SELECT ".$this->ItemIdFieldName." FROM ".$this->ItemTableName
393  ." WHERE ".$this->ItemIdFieldName." >= ".$StartingItemId
394  ." ORDER BY ".$this->ItemIdFieldName." LIMIT ".$NumberOfItems);
395  $ItemIds = $this->DB->FetchColumn($this->ItemIdFieldName);
396 
397  # for each retrieved item ID
398  foreach ($ItemIds as $ItemId)
399  {
400  # update search info for item
401  $this->UpdateForItem($ItemId);
402  }
403 
404  # return ID of last item updated to caller
405  return $ItemId;
406  }
407 
408  # drop all data pertaining to item from search DB
409  function DropItem($ItemId)
410  {
411  # drop all entries pertaining to item from word count table
412  $this->DB->Query("DELETE FROM SearchWordCounts WHERE ItemId = ".$ItemId);
413  }
414 
415  # drop all data pertaining to field from search DB
416  function DropField($FieldName)
417  {
418  # retrieve our ID for field
419  $FieldId = $this->DB->Query("SELECT FieldId FROM SearchFields "
420  ."WHERE FieldName = '".addslashes($FieldName)."'", "FieldId");
421 
422  # drop all entries pertaining to field from word counts table
423  $this->DB->Query("DELETE FROM SearchWordCounts WHERE FieldId = \'".$FieldId."\'");
424 
425  # drop field from our fields table
426  $this->DB->Query("DELETE FROM SearchFields WHERE FieldId = \'".$FieldId."\'");
427  }
428 
429  # return total number of terms indexed by search engine
430  function SearchTermCount()
431  {
432  return $this->DB->Query("SELECT COUNT(*) AS TermCount"
433  ." FROM SearchWords", "TermCount");
434  }
435 
436  # return total number of items indexed by search engine
437  function ItemCount()
438  {
439  return $this->DB->Query("SELECT COUNT(DISTINCT ItemId) AS ItemCount"
440  ." FROM SearchWordCounts", "ItemCount");
441  }
442 
449  function AddSynonyms($Word, $Synonyms)
450  {
451  # asssume no synonyms will be added
452  $AddCount = 0;
453 
454  # get ID for word
455  $WordId = $this->GetWordId($Word, TRUE);
456 
457  # for each synonym passed in
458  foreach ($Synonyms as $Synonym)
459  {
460  # get ID for synonym
461  $SynonymId = $this->GetWordId($Synonym, TRUE);
462 
463  # if synonym is not already in database
464  $this->DB->Query("SELECT * FROM SearchWordSynonyms"
465  ." WHERE (WordIdA = ".$WordId
466  ." AND WordIdB = ".$SynonymId.")"
467  ." OR (WordIdB = ".$WordId
468  ." AND WordIdA = ".$SynonymId.")");
469  if ($this->DB->NumRowsSelected() == 0)
470  {
471  # add synonym entry to database
472  $this->DB->Query("INSERT INTO SearchWordSynonyms"
473  ." (WordIdA, WordIdB)"
474  ." VALUES (".$WordId.", ".$SynonymId.")");
475  $AddCount++;
476  }
477  }
478 
479  # report to caller number of new synonyms added
480  return $AddCount;
481  }
482 
483  # remove synonym(s)
484  function RemoveSynonyms($Word, $Synonyms = NULL)
485  {
486  # find ID for word
487  $WordId = $this->GetWordId($Word);
488 
489  # if ID found
490  if ($WordId !== NULL)
491  {
492  # if no specific synonyms provided
493  if ($Synonyms === NULL)
494  {
495  # remove all synonyms for word
496  $this->DB->Query("DELETE FROM SearchWordSynonyms"
497  ." WHERE WordIdA = '".$WordId."'"
498  ." OR WordIdB = '".$WordId."'");
499  }
500  else
501  {
502  # for each specified synonym
503  foreach ($Synonyms as $Synonym)
504  {
505  # look up ID for synonym
506  $SynonymId = $this->GetWordId($Synonym);
507 
508  # if synonym ID was found
509  if ($SynonymId !== NULL)
510  {
511  # delete synonym entry
512  $this->DB->Query("DELETE FROM SearchWordSynonyms"
513  ." WHERE (WordIdA = '".$WordId."'"
514  ." AND WordIdB = '".$SynonymId."')"
515  ." OR (WordIdB = '".$WordId."'"
516  ." AND WordIdA = '".$SynonymId."')");
517  }
518  }
519  }
520  }
521  }
522 
523  # remove all synonyms
524  function RemoveAllSynonyms()
525  {
526  $this->DB->Query("DELETE FROM SearchWordSynonyms");
527  }
528 
529  # get synonyms for word (returns array of synonyms)
530  function GetSynonyms($Word)
531  {
532  # assume no synonyms will be found
533  $Synonyms = array();
534 
535  # look up ID for word
536  $WordId = $this->GetWordId($Word);
537 
538  # if word ID was found
539  if ($WordId !== NULL)
540  {
541  # look up IDs of all synonyms for this word
542  $this->DB->Query("SELECT WordIdA, WordIdB FROM SearchWordSynonyms"
543  ." WHERE WordIdA = ".$WordId
544  ." OR WordIdB = ".$WordId);
545  $SynonymIds = array();
546  while ($Record = $this->DB->FetchRow)
547  {
548  $SynonymIds[] = ($Record["WordIdA"] == $WordId)
549  ? $Record["WordIdB"] : $Record["WordIdA"];
550  }
551 
552  # for each synonym ID
553  foreach ($SynonymIds as $SynonymId)
554  {
555  # look up synonym word and add to synonym list
556  $Synonyms[] = $this->GetWord($SynonymId);
557  }
558  }
559 
560  # return synonyms to caller
561  return $Synonyms;
562  }
563 
564  # get all synonyms (returns 2D array w/ words as first index)
565  function GetAllSynonyms()
566  {
567  # assume no synonyms will be found
568  $SynonymList = array();
569 
570  # for each synonym ID pair
571  $OurDB = new Database();
572  $OurDB->Query("SELECT WordIdA, WordIdB FROM SearchWordSynonyms");
573  while ($Record = $OurDB->FetchRow())
574  {
575  # look up words
576  $Word = $this->GetWord($Record["WordIdA"]);
577  $Synonym = $this->GetWord($Record["WordIdB"]);
578 
579  # if we do not already have an entry for the word
580  # or synonym is not listed for this word
581  if (!isset($SynonymList[$Word])
582  || !in_array($Synonym, $SynonymList[$Word]))
583  {
584  # add entry for synonym
585  $SynonymList[$Word][] = $Synonym;
586  }
587 
588  # if we do not already have an entry for the synonym
589  # or word is not listed for this synonym
590  if (!isset($SynonymList[$Synonym])
591  || !in_array($Word, $SynonymList[$Synonym]))
592  {
593  # add entry for word
594  $SynonymList[$Synonym][] = $Word;
595  }
596  }
597 
598  # for each word
599  # (this loop removes reciprocal duplicates)
600  foreach ($SynonymList as $Word => $Synonyms)
601  {
602  # for each synonym for that word
603  foreach ($Synonyms as $Synonym)
604  {
605  # if synonym has synonyms and word is one of them
606  if (isset($SynonymList[$Synonym])
607  && isset($SynonymList[$Word])
608  && in_array($Word, $SynonymList[$Synonym])
609  && in_array($Synonym, $SynonymList[$Word]))
610  {
611  # if word has less synonyms than synonym
612  if (count($SynonymList[$Word])
613  < count($SynonymList[$Synonym]))
614  {
615  # remove synonym from synonym list for word
616  $SynonymList[$Word] = array_diff(
617  $SynonymList[$Word], array($Synonym));
618 
619  # if no synonyms left for word
620  if (!count($SynonymList[$Word]))
621  {
622  # remove empty synonym list for word
623  unset($SynonymList[$Word]);
624  }
625  }
626  else
627  {
628  # remove word from synonym list for synonym
629  $SynonymList[$Synonym] = array_diff(
630  $SynonymList[$Synonym], array($Word));
631 
632  # if no synonyms left for word
633  if (!count($SynonymList[$Synonym]))
634  {
635  # remove empty synonym list for word
636  unset($SynonymList[$Synonym]);
637  }
638  }
639  }
640  }
641  }
642 
643  # sort array alphabetically (just for convenience)
644  foreach ($SynonymList as $Word => $Synonyms)
645  {
646  asort($SynonymList[$Word]);
647  }
648  ksort($SynonymList);
649 
650  # return 2D array of synonyms to caller
651  return $SynonymList;
652  }
653 
654  # set all synonyms (accepts 2D array w/ words as first index)
655  function SetAllSynonyms($SynonymList)
656  {
657  # remove all existing synonyms
658  $this->RemoveAllSynonyms();
659 
660  # for each synonym entry passed in
661  foreach ($SynonymList as $Word => $Synonyms)
662  {
663  # add synonyms for word
664  $this->AddSynonyms($Word, $Synonyms);
665  }
666  }
667 
676  function LoadSynonymsFromFile($FileName)
677  {
678  # asssume no synonyms will be added
679  $AddCount = 0;
680 
681  # read in contents of file
682  $Lines = file($FileName, FILE_IGNORE_NEW_LINES|FILE_SKIP_EMPTY_LINES);
683 
684  # if file contained lines
685  if (count($Lines))
686  {
687  # for each line of file
688  foreach ($Lines as $Line)
689  {
690  # if line is not a comment
691  if (!preg_match("/[\s]*#/", $Line))
692  {
693  # split line into words
694  $Words = preg_split("/[\s,]+/", $Line);
695 
696  # if synonyms found
697  if (count($Words) > 1)
698  {
699  # separate out word and synonyms
700  $Word = array_shift($Words);
701 
702  # add synonyms
703  $AddCount += $this->AddSynonyms($Word, $Words);
704  }
705  }
706  }
707  }
708 
709  # return count of synonyms added to caller
710  return $AddCount;
711  }
712 
713  # suggest alternatives
714  function SuggestAlternateSearches($SearchString)
715  {
716  #
717  }
718 
719 
720  # ---- PRIVATE INTERFACE -------------------------------------------------
721 
722  protected $DB;
723  protected $DebugLevel;
724  protected $ItemTableName;
725  protected $ItemIdFieldName;
727  protected $LastSearchTime;
728  protected $FilterFuncs;
729  protected $DefaultSearchLogic = self::LOGIC_AND;
730  protected $StemmingEnabled = TRUE;
731  protected $SynonymsEnabled = TRUE;
732 
733  private $WordCountAdded;
734  private $FieldIds;
735  private $FieldInfo;
736  private $RequiredTermCount;
737  private $RequiredTermCounts;
738  private $InclusiveTermCount;
739  private $ExcludedTermCount;
740  private $SearchTermList;
741 
742  const STEM_ID_OFFSET = 1000000;
743 
744 
745  # ---- common private functions (used in both searching and DB build)
746 
747  # normalize and parse search string into list of search terms
748  private function ParseSearchStringForWords($SearchString, $IgnorePhrases = FALSE)
749  {
750  # strip off any surrounding whitespace
751  $Text = trim($SearchString);
752 
753  # set up normalization replacement strings
754  $Patterns = array(
755  "/'s[^a-z0-9\\-+~]+/i", # get rid of possessive plurals
756  "/'/", # get rid of single quotes / apostrophes
757  "/\"[^\"]*\"/", # get rid of phrases (NOTE: HARD-CODED INDEX BELOW!!!) "
758  "/\\([^)]*\\)/", # get rid of groups (NOTE: HARD-CODED INDEX BELOW!!!)
759  "/[^a-z0-9\\-+~]+/i", # convert non-alphanumerics / non-minus/plus to a space
760  "/([^\\s])-+/i", # convert minus preceded by anything but whitespace to a space
761  "/([^\\s])\\++/i", # convert plus preceded by anything but whitespace to a space
762  "/-\\s/i", # convert minus followed by whitespace to a space
763  "/\\+\\s/i", # convert plus followed by whitespace to a space
764  "/~\\s/i", # convert tilde followed by whitespace to a space
765  "/[ ]+/" # convert multiple spaces to one space
766  );
767  $Replacements = array(
768  " ",
769  "",
770  " ",
771  " ",
772  "\\1 ",
773  "\\1 ",
774  " ",
775  " ",
776  " ",
777  " ",
778  " "
779  );
780 
781  # if we are supposed to ignore phrases and groups (series of words in quotes or surrounded by parens)
782  if ($IgnorePhrases)
783  {
784  # switch phrase removal to double quote removal (HARD-CODED INDEX INTO PATTERN LIST!!)
785  $Patterns[2] = "/\"/";
786 
787  # switch group removal to paren removal (HARD-CODED INDEX INTO PATTERN LIST!!)
788  $Patterns[3] = "/[\(\)]+/";
789  }
790 
791  # remove punctuation from text and normalize whitespace
792  $Text = preg_replace($Patterns, $Replacements, $Text);
793  $this->DMsg(2, "Normalized search string is '".$Text."'");
794 
795  # convert text to lower case
796  $Text = strtolower($Text);
797 
798  # strip off any extraneous whitespace
799  $Text = trim($Text);
800 
801  # start with an empty array
802  $Words = array();
803 
804  # if we have no words left after parsing
805  if (strlen($Text) != 0)
806  {
807  # for each word
808  foreach (explode(" ", $Text) as $Word)
809  {
810  # grab first character of word
811  $FirstChar = substr($Word, 0, 1);
812 
813  # strip off option characters and set flags appropriately
814  $Flags = WORD_PRESENT;
815  if ($FirstChar == "-")
816  {
817  $Word = substr($Word, 1);
818  $Flags |= WORD_EXCLUDED;
819  if (!isset($Words[$Word]))
820  {
821  $this->ExcludedTermCount++;
822  }
823  }
824  else
825  {
826  if ($FirstChar == "~")
827  {
828  $Word = substr($Word, 1);
829  }
830  elseif (($this->DefaultSearchLogic == self::LOGIC_AND)
831  || ($FirstChar == "+"))
832  {
833  if ($FirstChar == "+")
834  {
835  $Word = substr($Word, 1);
836  }
837  $Flags |= WORD_REQUIRED;
838  if (!isset($Words[$Word]))
839  {
840  $this->RequiredTermCount++;
841  }
842  }
843  if (!isset($Words[$Word]))
844  {
845  $this->InclusiveTermCount++;
846  $this->SearchTermList[] = $Word;
847  }
848  }
849 
850  # store flags to indicate word found
851  $Words[$Word] = $Flags;
852  $this->DMsg(3, "Word identified (".$Word.")");
853  }
854  }
855 
856  # return normalized words to caller
857  return $Words;
858  }
859 
860  protected function GetFieldId($FieldName)
861  {
862  # if field ID is not in cache
863  if (!isset($this->FieldIds[$FieldName]))
864  {
865  # look up field info in database
866  $this->DB->Query("SELECT FieldId FROM SearchFields "
867  ."WHERE FieldName = '".addslashes($FieldName)."'");
868 
869  # if field was found
870  if ($Record = $this->DB->FetchRow())
871  {
872  # load info from DB record
873  $FieldId = $Record["FieldId"];
874  }
875  else
876  {
877  # add field to database
878  $this->DB->Query("INSERT INTO SearchFields (FieldName) "
879  ."VALUES ('".addslashes($FieldName)."')");
880 
881  # retrieve ID for newly added field
882  $FieldId = $this->DB->LastInsertId("SearchFields");
883  }
884 
885  # cache field info
886  $this->FieldIds[$FieldName] = $FieldId;
887  }
888 
889  # return cached ID to caller
890  return $this->FieldIds[$FieldName];
891  }
892 
893  # retrieve ID for specified word (returns NULL if no ID found)
894  private function GetWordId($Word, $AddIfNotFound = FALSE)
895  {
896  static $WordIdCache;
897 
898  # if word was in ID cache
899  if (isset($WordIdCache[$Word]))
900  {
901  # use ID from cache
902  $WordId = $WordIdCache[$Word];
903  }
904  else
905  {
906  # look up ID in database
907  $WordId = $this->DB->Query("SELECT WordId"
908  ." FROM SearchWords"
909  ." WHERE WordText='".addslashes($Word)."'",
910  "WordId");
911 
912  # if ID was not found and caller requested it be added
913  if (($WordId === NULL) && $AddIfNotFound)
914  {
915  # add word to database
916  $this->DB->Query("INSERT INTO SearchWords (WordText)"
917  ." VALUES ('".addslashes(strtolower($Word))."')");
918 
919  # get ID for newly added word
920  $WordId = $this->DB->LastInsertId("SearchWords");
921  }
922 
923  # save ID to cache
924  $WordIdCache[$Word] = $WordId;
925  }
926 
927  # return ID to caller
928  return $WordId;
929  }
930 
931  # retrieve ID for specified word stem (returns NULL if no ID found)
932  private function GetStemId($Stem, $AddIfNotFound = FALSE)
933  {
934  static $StemIdCache;
935 
936  # if stem was in ID cache
937  if (isset($StemIdCache[$Stem]))
938  {
939  # use ID from cache
940  $StemId = $StemIdCache[$Stem];
941  }
942  else
943  {
944  # look up ID in database
945  $StemId = $this->DB->Query("SELECT WordId"
946  ." FROM SearchStems"
947  ." WHERE WordText='".addslashes($Stem)."'",
948  "WordId");
949 
950  # if ID was not found and caller requested it be added
951  if (($StemId === NULL) && $AddIfNotFound)
952  {
953  # add stem to database
954  $this->DB->Query("INSERT INTO SearchStems (WordText)"
955  ." VALUES ('".addslashes(strtolower($Stem))."')");
956 
957  # get ID for newly added stem
958  $StemId = $this->DB->LastInsertId("SearchStems");
959  }
960 
961  # adjust from DB ID value to stem ID value
962  $StemId += self::STEM_ID_OFFSET;
963 
964  # save ID to cache
965  $StemIdCache[$Stem] = $StemId;
966  }
967 
968  # return ID to caller
969  return $StemId;
970  }
971 
972  # retrieve word for specified word ID (returns FALSE if no word found)
973  private function GetWord($WordId)
974  {
975  static $WordCache;
976 
977  # if word was in cache
978  if (isset($WordCache[$WordId]))
979  {
980  # use word from cache
981  $Word = $WordCache[$WordId];
982  }
983  else
984  {
985  # adjust search location and word ID if word is stem
986  $TableName = "SearchWords";
987  if ($WordId >= self::STEM_ID_OFFSET)
988  {
989  $TableName = "SearchStems";
990  $WordId -= self::STEM_ID_OFFSET;
991  }
992 
993  # look up word in database
994  $Word = $this->DB->Query("SELECT WordText"
995  ." FROM ".$TableName
996  ." WHERE WordId='".$WordId."'",
997  "WordText");
998 
999  # save word to cache
1000  $WordCache[$WordId] = $Word;
1001  }
1002 
1003  # return word to caller
1004  return $Word;
1005  }
1006 
1007 
1008  # ---- private functions used in searching
1009 
1010  # perform search across multiple fields and return raw results to caller
1011  private function SearchAcrossFields($SearchStrings)
1012  {
1013  # start by assuming no search will be done
1014  $Scores = NULL;
1015 
1016  # clear word counts
1017  $this->InclusiveTermCount = 0;
1018  $this->RequiredTermCount = 0;
1019  $this->ExcludedTermCount = 0;
1020 
1021  # for each field
1022  $NeedComparisonSearch = FALSE;
1023  foreach ($SearchStrings as $FieldName => $SearchStringArray)
1024  {
1025  # convert search string to array if needed
1026  if (!is_array($SearchStringArray))
1027  {
1028  $SearchStringArray = array($SearchStringArray);
1029  }
1030 
1031  # for each search string for this field
1032  foreach ($SearchStringArray as $SearchString)
1033  {
1034  # if field is keyword or field is text and does not look like comparison match
1035  if (($FieldName == "XXXKeywordXXX")
1036  || (isset($this->FieldInfo[$FieldName])
1037  && ($this->FieldInfo[$FieldName]["FieldType"] == self::FIELDTYPE_TEXT)
1038  && !preg_match("/^[><!]=./", $SearchString)
1039  && !preg_match("/^[><=]./", $SearchString)))
1040  {
1041  $this->DMsg(0, "Searching text field \""
1042  .$FieldName."\" for string \"$SearchString\"");
1043 
1044  # normalize text and split into words
1045  $Words[$FieldName] =
1046  $this->ParseSearchStringForWords($SearchString);
1047 
1048  # calculate scores for matching items
1049  if (count($Words[$FieldName]))
1050  {
1051  $Scores = $this->SearchForWords(
1052  $Words[$FieldName], $FieldName, $Scores);
1053  $this->DMsg(3, "Have "
1054  .count($Scores)." results after word search");
1055  }
1056 
1057  # split into phrases
1058  $Phrases[$FieldName] =
1059  $this->ParseSearchStringForPhrases($SearchString);
1060 
1061  # handle any phrases
1062  if (count($Phrases[$FieldName]))
1063  {
1064  $Scores = $this->SearchForPhrases(
1065  $Phrases[$FieldName], $Scores, $FieldName, TRUE, FALSE);
1066  $this->DMsg(3, "Have "
1067  .count($Scores)." results after phrase search");
1068  }
1069  }
1070  else
1071  {
1072  # set flag to indicate possible comparison search candidate found
1073  $NeedComparisonSearch = TRUE;
1074  }
1075  }
1076  }
1077 
1078  # perform comparison searches
1079  if ($NeedComparisonSearch)
1080  {
1081  $Scores = $this->SearchForComparisonMatches($SearchStrings, $Scores);
1082  $this->DMsg(3, "Have ".count($Scores)." results after comparison search");
1083  }
1084 
1085  # if no results found and exclusions specified
1086  if (!count($Scores) && $this->ExcludedTermCount)
1087  {
1088  # load all records
1089  $Scores = $this->LoadScoresForAllRecords();
1090  }
1091 
1092  # if search results found
1093  if (count($Scores))
1094  {
1095  # for each search text string
1096  foreach ($SearchStrings as $FieldName => $SearchStringArray)
1097  {
1098  # convert search string to array if needed
1099  if (!is_array($SearchStringArray))
1100  {
1101  $SearchStringArray = array($SearchStringArray);
1102  }
1103 
1104  # for each search string for this field
1105  foreach ($SearchStringArray as $SearchString)
1106  {
1107  # if field is text
1108  if (($FieldName == "XXXKeywordXXX")
1109  || (isset($this->FieldInfo[$FieldName])
1110  && ($this->FieldInfo[$FieldName]["FieldType"]
1111  == self::FIELDTYPE_TEXT)))
1112  {
1113  # if there are words in search text
1114  if (isset($Words[$FieldName]))
1115  {
1116  # handle any excluded words
1117  $Scores = $this->FilterOnExcludedWords($Words[$FieldName], $Scores, $FieldName);
1118  }
1119 
1120  # handle any excluded phrases
1121  if (isset($Phrases[$FieldName]))
1122  {
1123  $Scores = $this->SearchForPhrases(
1124  $Phrases[$FieldName], $Scores, $FieldName, FALSE, TRUE);
1125  }
1126  }
1127  }
1128  }
1129 
1130  # strip off any results that don't contain required words
1131  $Scores = $this->FilterOnRequiredWords($Scores);
1132  }
1133 
1134  # return search result scores to caller
1135  return $Scores;
1136  }
1137 
1138  # search for words in specified field
1139  private function SearchForWords(
1140  $Words, $FieldName = "XXXKeywordXXX", $Scores = NULL)
1141  {
1142  $DB = $this->DB;
1143 
1144  # start with empty search result scores list if none passed in
1145  if ($Scores == NULL)
1146  {
1147  $Scores = array();
1148  }
1149 
1150  # grab field ID
1151  $FieldId = $this->GetFieldId($FieldName);
1152 
1153  # for each word
1154  foreach ($Words as $Word => $Flags)
1155  {
1156  $this->DMsg(2, "Searching for word '${Word}' in field ".$FieldName);
1157 
1158  # if word is not excluded
1159  if (!($Flags & WORD_EXCLUDED))
1160  {
1161  # look up record ID for word
1162  $this->DMsg(2, "Looking up word \"".$Word."\"");
1163  $WordId = $this->GetWordId($Word);
1164 
1165  # if word is in DB
1166  if ($WordId !== NULL)
1167  {
1168  # look up counts for word
1169  $DB->Query("SELECT ItemId,Count FROM SearchWordCounts "
1170  ."WHERE WordId = ".$WordId
1171  ." AND FieldId = ".$FieldId);
1172  $Counts = $DB->FetchColumn("Count", "ItemId");
1173 
1174  # if synonym support is enabled
1175  if ($this->SynonymsEnabled)
1176  {
1177  # look for any synonyms
1178  $DB->Query("SELECT WordIdA, WordIdB"
1179  ." FROM SearchWordSynonyms"
1180  ." WHERE WordIdA = ".$WordId
1181  ." OR WordIdB = ".$WordId);
1182 
1183  # if synonyms were found
1184  if ($DB->NumRowsSelected())
1185  {
1186  # retrieve synonym IDs
1187  $SynonymIds = array();
1188  while ($Record = $DB->FetchRow())
1189  {
1190  $SynonymIds[] = ($Record["WordIdA"] == $WordId)
1191  ? $Record["WordIdB"]
1192  : $Record["WordIdA"];
1193  }
1194 
1195  # for each synonym
1196  foreach ($SynonymIds as $SynonymId)
1197  {
1198  # retrieve counts for synonym
1199  $DB->Query("SELECT ItemId,Count"
1200  ." FROM SearchWordCounts"
1201  ." WHERE WordId = ".$SynonymId
1202  ." AND FieldId = ".$FieldId);
1203  $SynonymCounts = $DB->FetchColumn("Count", "ItemId");
1204 
1205  # for each count
1206  foreach ($SynonymCounts as $ItemId => $Count)
1207  {
1208  # adjust count because it's a synonym
1209  $AdjustedCount = ceil($Count / 2);
1210 
1211  # add count to existing counts
1212  if (isset($Counts[$ItemId]))
1213  {
1214  $Counts[$ItemId] += $AdjustedCount;
1215  }
1216  else
1217  {
1218  $Counts[$ItemId] = $AdjustedCount;
1219  }
1220  }
1221  }
1222  }
1223  }
1224  }
1225 
1226  # if stemming is enabled
1227  if ($this->StemmingEnabled)
1228  {
1229  # retrieve stem ID
1230  $Stem = PorterStemmer::Stem($Word);
1231  $this->DMsg(2, "Looking up stem \"".$Stem."\"");
1232  $StemId = $this->GetStemId($Stem);
1233 
1234  # if ID found for stem
1235  if ($StemId !== NULL)
1236  {
1237  # retrieve counts for stem
1238  $DB->Query("SELECT ItemId,Count"
1239  ." FROM SearchWordCounts"
1240  ." WHERE WordId = ".$StemId
1241  ." AND FieldId = ".$FieldId);
1242  $StemCounts = $DB->FetchColumn("Count", "ItemId");
1243 
1244  # for each count
1245  foreach ($StemCounts as $ItemId => $Count)
1246  {
1247  # adjust count because it's a stem
1248  $AdjustedCount = ceil($Count / 2);
1249 
1250  # add count to existing counts
1251  if (isset($Counts[$ItemId]))
1252  {
1253  $Counts[$ItemId] += $AdjustedCount;
1254  }
1255  else
1256  {
1257  $Counts[$ItemId] = $AdjustedCount;
1258  }
1259  }
1260  }
1261  }
1262 
1263  # if counts were found
1264  if (isset($Counts))
1265  {
1266  # for each count
1267  foreach ($Counts as $ItemId => $Count)
1268  {
1269  # if word flagged as required
1270  if ($Flags & WORD_REQUIRED)
1271  {
1272  # increment required word count for record
1273  if (isset($this->RequiredTermCounts[$ItemId]))
1274  {
1275  $this->RequiredTermCounts[$ItemId]++;
1276  }
1277  else
1278  {
1279  $this->RequiredTermCounts[$ItemId] = 1;
1280  }
1281  }
1282 
1283  # add to item record score
1284  if (isset($Scores[$ItemId]))
1285  {
1286  $Scores[$ItemId] += $Count;
1287  }
1288  else
1289  {
1290  $Scores[$ItemId] = $Count;
1291  }
1292  }
1293  }
1294  }
1295  }
1296 
1297  # return basic scores to caller
1298  return $Scores;
1299  }
1300 
1301  # extract phrases (terms surrounded by quotes) from search string
1302  private function ParseSearchStringForPhrases($SearchString)
1303  {
1304  # split into chunks delimited by double quote marks
1305  $Pieces = explode("\"", $SearchString); # "
1306 
1307  # for each pair of chunks
1308  $Index = 2;
1309  $Phrases = array();
1310  while ($Index < count($Pieces))
1311  {
1312  # grab phrase from chunk
1313  $Phrase = trim(addslashes($Pieces[$Index - 1]));
1314  $Flags = WORD_PRESENT;
1315 
1316  # grab first character of phrase
1317  $FirstChar = substr($Pieces[$Index - 2], -1);
1318 
1319  # set flags to reflect any option characters
1320  if ($FirstChar == "-")
1321  {
1322  $Flags |= WORD_EXCLUDED;
1323  if (!isset($Phrases[$Phrase]))
1324  {
1325  $this->ExcludedTermCount++;
1326  }
1327  }
1328  else
1329  {
1330  if ((($this->DefaultSearchLogic == self::LOGIC_AND) && ($FirstChar != "~"))
1331  || ($FirstChar == "+"))
1332  {
1333  $Flags |= WORD_REQUIRED;
1334  if (!isset($Phrases[$Phrase]))
1335  {
1336  $this->RequiredTermCount++;
1337  }
1338  }
1339  if (!isset($Phrases[$Phrase]))
1340  {
1341  $this->InclusiveTermCount++;
1342  $this->SearchTermList[] = $Phrase;
1343  }
1344  }
1345  $Phrases[$Phrase] = $Flags;
1346 
1347  # move to next pair of chunks
1348  $Index += 2;
1349  }
1350 
1351  # return phrases to caller
1352  return $Phrases;
1353  }
1354 
1355  # extract groups (terms surrounded by parens) from search string
1356  # (NOTE: NOT YET IMPLEMENTED!!!)
1357  private function ParseSearchStringForGroups($SearchString)
1358  {
1359  # split into chunks delimited by open paren
1360  $Pieces = explode("(", $SearchString);
1361 
1362  # for each chunk
1363  $Index = 2;
1364  while ($Index < count($Pieces))
1365  {
1366  # grab phrase from chunk
1367  $Group = trim(addslashes($Pieces[$Index - 1]));
1368  $Groups[] = $Group;
1369 
1370  # move to next pair of chunks
1371  $Index += 2;
1372  }
1373 
1374  # return phrases to caller
1375  return $Groups;
1376  }
1377 
1378  protected function SearchFieldForPhrases($FieldName, $Phrase)
1379  {
1380  # error out
1381  exit("<br>SE - ERROR: SearchFieldForPhrases() not implemented<br>\n");
1382  }
1383 
1384  private function SearchForPhrases($Phrases, $Scores, $FieldName = "XXXKeywordXXX",
1385  $ProcessNonExcluded = TRUE, $ProcessExcluded = TRUE)
1386  {
1387  # if phrases are found
1388  if (count($Phrases) > 0)
1389  {
1390  # if this is a keyword search
1391  if ($FieldName == "XXXKeywordXXX")
1392  {
1393  # for each field
1394  foreach ($this->FieldInfo as $KFieldName => $Info)
1395  {
1396  # if field is marked to be included in keyword searches
1397  if ($Info["InKeywordSearch"])
1398  {
1399  # call ourself with that field
1400  $Scores = $this->SearchForPhrases($Phrases, $Scores, $KFieldName,
1401  $ProcessNonExcluded, $ProcessExcluded);
1402  }
1403  }
1404  }
1405  else
1406  {
1407  # for each phrase
1408  foreach ($Phrases as $Phrase => $Flags)
1409  {
1410  $this->DMsg(2, "Searching for phrase '".$Phrase
1411  ."' in field ".$FieldName);
1412 
1413  # if phrase flagged as excluded and we are doing excluded phrases
1414  # or phrase flagged as non-excluded and we are doing non-excluded phrases
1415  if (($ProcessExcluded && ($Flags & WORD_EXCLUDED))
1416  || ($ProcessNonExcluded && !($Flags & WORD_EXCLUDED)))
1417  {
1418  # initialize score list if necessary
1419  if ($Scores === NULL) { $Scores = array(); }
1420 
1421  # retrieve list of items that contain phrase
1422  $ItemIds = $this->SearchFieldForPhrases(
1423  $FieldName, $Phrase);
1424 
1425  # for each item that contains phrase
1426  foreach ($ItemIds as $ItemId)
1427  {
1428  # if we are doing excluded phrases and phrase flagged as excluded
1429  if ($ProcessExcluded && ($Flags & WORD_EXCLUDED))
1430  {
1431  # knock item off of list
1432  unset($Scores[$ItemId]);
1433  }
1434  elseif ($ProcessNonExcluded)
1435  {
1436  # calculate phrase value based on number of words and field weight
1437  $PhraseScore = count(preg_split("/[\s]+/", $Phrase, -1, PREG_SPLIT_NO_EMPTY))
1438  * $this->FieldInfo[$FieldName]["Weight"];
1439  $this->DMsg(2, "Phrase score is ".$PhraseScore);
1440 
1441  # bump up item record score
1442  if (isset($Scores[$ItemId]))
1443  {
1444  $Scores[$ItemId] += $PhraseScore;
1445  }
1446  else
1447  {
1448  $Scores[$ItemId] = $PhraseScore;
1449  }
1450 
1451  # if phrase flagged as required
1452  if ($Flags & WORD_REQUIRED)
1453  {
1454  # increment required word count for record
1455  if (isset($this->RequiredTermCounts[$ItemId]))
1456  {
1457  $this->RequiredTermCounts[$ItemId]++;
1458  }
1459  else
1460  {
1461  $this->RequiredTermCounts[$ItemId] = 1;
1462  }
1463  }
1464  }
1465  }
1466  }
1467  }
1468  }
1469  }
1470 
1471  # return updated scores to caller
1472  return $Scores;
1473  }
1474 
1475  private function FilterOnExcludedWords($Words, $Scores, $FieldName = "XXXKeywordXXX")
1476  {
1477  $DB = $this->DB;
1478 
1479  # grab field ID
1480  $FieldId = $this->GetFieldId($FieldName);
1481 
1482  # for each word
1483  foreach ($Words as $Word => $Flags)
1484  {
1485  # if word flagged as excluded
1486  if ($Flags & WORD_EXCLUDED)
1487  {
1488  # look up record ID for word
1489  $WordId = $this->GetWordId($Word);
1490 
1491  # if word is in DB
1492  if ($WordId !== NULL)
1493  {
1494  # look up counts for word
1495  $DB->Query("SELECT ItemId FROM SearchWordCounts "
1496  ."WHERE WordId=${WordId} AND FieldId=${FieldId}");
1497 
1498  # for each count
1499  while ($Record = $DB->FetchRow())
1500  {
1501  # if item record is in score list
1502  $ItemId = $Record["ItemId"];
1503  if (isset($Scores[$ItemId]))
1504  {
1505  # remove item record from score list
1506  $this->DMsg(3, "Filtering out item ".$ItemId
1507  ." because it contained word \"".$Word."\"");
1508  unset($Scores[$ItemId]);
1509  }
1510  }
1511  }
1512  }
1513  }
1514 
1515  # returned filtered score list to caller
1516  return $Scores;
1517  }
1518 
1519  private function FilterOnRequiredWords($Scores)
1520  {
1521  # if there were required words
1522  if ($this->RequiredTermCount > 0)
1523  {
1524  # for each item
1525  foreach ($Scores as $ItemId => $Score)
1526  {
1527  # if item does not meet required word count
1528  if (!isset($this->RequiredTermCounts[$ItemId])
1529  || ($this->RequiredTermCounts[$ItemId] < $this->RequiredTermCount))
1530  {
1531  # filter out item
1532  $this->DMsg(4, "Filtering out item ".$ItemId
1533  ." because it didn't have required word count of "
1534  .$this->RequiredTermCount
1535  .(isset($this->RequiredTermCounts[$ItemId])
1536  ? " (only had "
1537  .$this->RequiredTermCounts[$ItemId]
1538  : " (had none")
1539  .")");
1540  unset($Scores[$ItemId]);
1541  }
1542  }
1543  }
1544 
1545  # return filtered list to caller
1546  return $Scores;
1547  }
1548 
1549  # count, sort, and trim search result scores list
1550  private function CleanScores($Scores, $StartingResult, $NumberOfResults,
1551  $SortByField, $SortDescending)
1552  {
1553  # perform any requested filtering
1554  $this->DMsg(0, "Have ".count($Scores)." results before filter callbacks");
1555  $Scores = $this->FilterOnSuppliedFunctions($Scores);
1556 
1557  # save total number of results available
1558  $this->NumberOfResultsAvailable = count($Scores);
1559 
1560  # if no sorting field specified
1561  if ($SortByField === NULL)
1562  {
1563  # sort result list by score
1564  if ($SortDescending)
1565  arsort($Scores, SORT_NUMERIC);
1566  else
1567  asort($Scores, SORT_NUMERIC);
1568  }
1569  else
1570  {
1571  # get list of item IDs in sorted order
1572  $SortedIds = $this->GetItemIdsSortedByField(
1573  $SortByField, $SortDescending);
1574 
1575  # if we have sorted item IDs
1576  if (count($SortedIds) && count($Scores))
1577  {
1578  # strip sorted ID list down to those that appear in search results
1579  $SortedIds = array_intersect($SortedIds, array_keys($Scores));
1580 
1581  # rebuild score list in sorted order
1582  foreach ($SortedIds as $Id)
1583  {
1584  $NewScores[$Id] = $Scores[$Id];
1585  }
1586  $Scores = $NewScores;
1587  }
1588  else
1589  {
1590  # sort result list by score
1591  arsort($Scores, SORT_NUMERIC);
1592  }
1593  }
1594 
1595  # trim result list to match range requested by caller
1596  $ScoresKeys = array_slice(
1597  array_keys($Scores), $StartingResult, $NumberOfResults);
1598  $TrimmedScores = array();
1599  foreach ($ScoresKeys as $Key) { $TrimmedScores[$Key] = $Scores[$Key]; }
1600 
1601  # returned cleaned search result scores list to caller
1602  return $TrimmedScores;
1603  }
1604 
1605  protected function FilterOnSuppliedFunctions($Scores)
1606  {
1607  # if filter functions have been set
1608  if (isset($this->FilterFuncs))
1609  {
1610  # for each result
1611  foreach ($Scores as $ItemId => $Score)
1612  {
1613  # for each filter function
1614  foreach ($this->FilterFuncs as $FuncName)
1615  {
1616  # if filter function return TRUE for item
1617  if (call_user_func($FuncName, $ItemId))
1618  {
1619  # discard result
1620  $this->DMsg(2, "Filter callback <i>".$FuncName
1621  ."</i> rejected item ".$ItemId);
1622  unset($Scores[$ItemId]);
1623 
1624  # bail out of filter func loop
1625  continue 2;
1626  }
1627  }
1628  }
1629  }
1630 
1631  # return filtered list to caller
1632  return $Scores;
1633  }
1634 
1635  private function SearchForComparisonMatches($SearchStrings, $Scores)
1636  {
1637  # for each field
1638  $Index = 0;
1639  foreach ($SearchStrings as $SearchFieldName => $SearchStringArray)
1640  {
1641  # if field is not keyword
1642  if ($SearchFieldName != "XXXKeywordXXX")
1643  {
1644  # convert search string to array if needed
1645  if (!is_array($SearchStringArray))
1646  {
1647  $SearchStringArray = array($SearchStringArray);
1648  }
1649 
1650  # for each search string for this field
1651  foreach ($SearchStringArray as $SearchString)
1652  {
1653  # if search string looks like comparison search
1654  $FoundOperator = preg_match("/^[><!]=./", $SearchString)
1655  || preg_match("/^[><=]./", $SearchString);
1656  if ($FoundOperator
1657  || (isset($this->FieldInfo[$SearchFieldName]["FieldType"])
1658  && ($this->FieldInfo[$SearchFieldName]["FieldType"]
1659  != self::FIELDTYPE_TEXT)))
1660  {
1661  # determine value
1662  $Patterns = array("/^[><!]=/", "/^[><=]/");
1663  $Replacements = array("", "");
1664  $Value = trim(preg_replace($Patterns, $Replacements, $SearchString));
1665 
1666  # determine and save operator
1667  if (!$FoundOperator)
1668  {
1669  $Operators[$Index] = "=";
1670  }
1671  else
1672  {
1673  $Term = trim($SearchString);
1674  $FirstChar = $Term{0};
1675  $FirstTwoChars = $FirstChar.$Term{1};
1676  if ($FirstTwoChars == ">=") { $Operators[$Index] = ">="; }
1677  elseif ($FirstTwoChars == "<=") { $Operators[$Index] = "<="; }
1678  elseif ($FirstTwoChars == "!=") { $Operators[$Index] = "!="; }
1679  elseif ($FirstChar == ">") { $Operators[$Index] = ">"; }
1680  elseif ($FirstChar == "<") { $Operators[$Index] = "<"; }
1681  elseif ($FirstChar == "=") { $Operators[$Index] = "="; }
1682  }
1683 
1684  # if operator was found
1685  if (isset($Operators[$Index]))
1686  {
1687  # save value
1688  $Values[$Index] = $Value;
1689 
1690  # save field name
1691  $FieldNames[$Index] = $SearchFieldName;
1692  $this->DMsg(3, "Added comparison (field = <i>"
1693  .$FieldNames[$Index]."</i> op = <i>"
1694  .$Operators[$Index]."</i> val = <i>"
1695  .$Values[$Index]."</i>)");
1696 
1697  # move to next comparison array entry
1698  $Index++;
1699  }
1700  }
1701  }
1702  }
1703  }
1704 
1705  # if comparisons found
1706  if (isset($Operators))
1707  {
1708  # perform comparisons on fields and gather results
1709  $Results = $this->SearchFieldsForComparisonMatches($FieldNames, $Operators, $Values);
1710 
1711  # if search logic is set to AND
1712  if ($this->DefaultSearchLogic == self::LOGIC_AND)
1713  {
1714  # if results were found
1715  if (count($Results))
1716  {
1717  # if there were no prior results and no terms for keyword search
1718  if ((count($Scores) == 0) && ($this->InclusiveTermCount == 0))
1719  {
1720  # add all results to scores
1721  foreach ($Results as $ItemId)
1722  {
1723  $Scores[$ItemId] = 1;
1724  }
1725  }
1726  else
1727  {
1728  # remove anything from scores that is not part of results
1729  foreach ($Scores as $ItemId => $Score)
1730  {
1731  if (in_array($ItemId, $Results) == FALSE)
1732  {
1733  unset($Scores[$ItemId]);
1734  }
1735  }
1736  }
1737  }
1738  else
1739  {
1740  # clear scores
1741  $Scores = array();
1742  }
1743  }
1744  else
1745  {
1746  # add result items to scores
1747  if ($Scores === NULL) { $Scores = array(); }
1748  foreach ($Results as $ItemId)
1749  {
1750  if (isset($Scores[$ItemId]))
1751  {
1752  $Scores[$ItemId] += 1;
1753  }
1754  else
1755  {
1756  $Scores[$ItemId] = 1;
1757  }
1758  }
1759  }
1760  }
1761 
1762  # return results to caller
1763  return $Scores;
1764  }
1765 
1766  private function SetDebugLevel($SearchStrings)
1767  {
1768  # if search info is an array
1769  if (is_array($SearchStrings))
1770  {
1771  # for each array element
1772  foreach ($SearchStrings as $FieldName => $SearchStringArray)
1773  {
1774  # if element is an array
1775  if (is_array($SearchStringArray))
1776  {
1777  # for each array element
1778  foreach ($SearchStringArray as $Index => $SearchString)
1779  {
1780  # pull out search string if present
1781  $SearchStrings[$FieldName][$Index] = $this->ExtractDebugLevel($SearchString);
1782  }
1783  }
1784  else
1785  {
1786  # pull out search string if present
1787  $SearchStrings[$FieldName] = $this->ExtractDebugLevel($SearchStringArray);
1788  }
1789  }
1790  }
1791  else
1792  {
1793  # pull out search string if present
1794  $SearchStrings = $this->ExtractDebugLevel($SearchStrings);
1795  }
1796 
1797  # return new search info to caller
1798  return $SearchStrings;
1799  }
1800 
1801  private function ExtractDebugLevel($SearchString)
1802  {
1803  # if search string contains debug level indicator
1804  if (strstr($SearchString, "DBUGLVL="))
1805  {
1806  # remove indicator and set debug level
1807  $Level = preg_replace("/^\\s*DBUGLVL=([1-9]{1,2}).*/", "\\1", $SearchString);
1808  if ($Level > 0)
1809  {
1810  $this->DebugLevel = $Level;
1811  $this->DMsg(0, "Setting debug level to ".$Level);
1812  $SearchString = preg_replace("/DBUGLVL=${Level}/", "", $SearchString);
1813  }
1814  }
1815 
1816  # return (possibly) modified search string to caller
1817  return $SearchString;
1818  }
1819 
1820  # load and return search result scores array containing all possible records
1821  private function LoadScoresForAllRecords()
1822  {
1823  # start with empty list
1824  $Scores = array();
1825 
1826  # for every item
1827  $this->DB->Query("SELECT ".$this->ItemIdFieldName
1828  ." FROM ".$this->ItemTableName);
1829  while ($Record = $this->DB->FetchRow())
1830  {
1831  # set score for item to 1
1832  $Scores[$Record[$this->ItemIdFieldName]] = 1;
1833  }
1834 
1835  # return array with all scores to caller
1836  return $Scores;
1837  }
1838 
1839 
1840  # ---- private functions used in building search database
1841 
1849  private function UpdateWordCount($Word, $ItemId, $FieldId, $Weight = 1)
1850  {
1851  # retrieve ID for word
1852  $WordIds[] = $this->GetWordId($Word, TRUE);
1853 
1854  # if stemming is enabled
1855  if ($this->StemmingEnabled)
1856  {
1857  # retrieve ID for stem of word
1858  $Stem = PorterStemmer::Stem($Word, TRUE);
1859  $WordIds[] = $this->GetStemId($Stem, TRUE);
1860  }
1861 
1862  # for word and stem of word
1863  foreach ($WordIds as $WordId)
1864  {
1865  # if word count already added to database
1866  if (isset($this->WordCountAdded[$WordId][$FieldId]))
1867  {
1868  # update word count
1869  $this->DB->Query("UPDATE SearchWordCounts SET Count=Count+".$Weight
1870  ." WHERE WordId=".$WordId
1871  ." AND ItemId=".$ItemId
1872  ." AND FieldId=".$FieldId);
1873  }
1874  else
1875  {
1876  # add word count to DB
1877  $this->DB->Query("INSERT INTO SearchWordCounts"
1878  ." (WordId, ItemId, FieldId, Count) VALUES"
1879  ." (".$WordId.", ".$ItemId.", ".$FieldId.", ".$Weight.")");
1880 
1881  # remember that we added count for this word
1882  $this->WordCountAdded[$WordId][$FieldId] = TRUE;
1883  }
1884 
1885  # decrease weight for stem
1886  $Weight = ceil($Weight / 2);
1887  }
1888  }
1889 
1890  protected function GetFieldContent($ItemId, $FieldName)
1891  {
1892  # error out
1893  exit("<br>SE - ERROR: GetFieldContent() not implemented<br>\n");
1894  }
1895 
1896  private function RecordSearchInfoForText(
1897  $ItemId, $FieldName, $Weight, $Text, $IncludeInKeyword)
1898  {
1899  # normalize text
1900  $Words = $this->ParseSearchStringForWords($Text, TRUE);
1901 
1902  # if there was text left after parsing
1903  if (count($Words) > 0)
1904  {
1905  # get ID for field
1906  $FieldId = $this->GetFieldId($FieldName);
1907 
1908  # if text should be included in keyword searches
1909  if ($IncludeInKeyword)
1910  {
1911  # get ID for keyword field
1912  $KeywordFieldId = $this->GetFieldId("XXXKeywordXXX");
1913  }
1914 
1915  # for each word
1916  foreach ($Words as $Word => $Flags)
1917  {
1918  # update count for word
1919  $this->UpdateWordCount($Word, $ItemId, $FieldId);
1920 
1921  # if text should be included in keyword searches
1922  if ($IncludeInKeyword)
1923  {
1924  # update keyword field count for word
1925  $this->UpdateWordCount(
1926  $Word, $ItemId, $KeywordFieldId, $Weight);
1927  }
1928  }
1929  }
1930  }
1931 
1932  # print debug message if level set high enough
1933  protected function DMsg($Level, $Msg)
1934  {
1935  if ($this->DebugLevel > $Level)
1936  {
1937  print("SE: ".$Msg."<br>\n");
1938  }
1939  }
1940 
1941  # ---- BACKWARD COMPATIBILITY --------------------------------------------
1942 
1943  # possible types of logical operators
1944  const SEARCHLOGIC_AND = 1;
1945  const SEARCHLOGIC_OR = 2;
1946 }
1947 
1948 ?>