CWIS Developer Documentation
QuickSearchHelper.php
Go to the documentation of this file.
1 <?PHP
2 #
3 # FILE: QuickSearchHelper.php
4 #
5 # Part of the Collection Workflow Integration System (CWIS)
6 # Copyright 2002-2015 Edward Almasy and Internet Scout Research Group
7 # http://scout.wisc.edu/cwis/
8 #
9 
15 {
16 
26  public static function SearchField(
27  MetadataField $Field,
28  $SearchString,
29  array $IdExclusions=array(),
30  array $ValueExclusions=array())
31  {
32  $MaxResults = $Field->NumAjaxResults();
33 
34  switch ($Field->Type())
35  {
37  return self::SearchForUsers(
38  $SearchString, $MaxResults, $IdExclusions, $ValueExclusions);
39 
41  if (count($ValueExclusions))
42  {
43  throw new Exception(
44  "Cannot exclude resource by value. "
45  ."Did you want IdExclusions instead?");
46  }
47 
48  return self::SearchForResources(
49  $Field, $SearchString, $MaxResults, $IdExclusions);
50 
51  default:
52  return self::SearchForValues(
53  $Field, $SearchString, $MaxResults, $IdExclusions, $ValueExclusions);
54  }
55  }
56 
63  public static function HighlightSearchString($SearchTerms, $LabelForFormatting)
64  {
65  if(!is_array($SearchTerms))
66  {
67  $SearchTerms = array($SearchTerms);
68  }
69 
70  foreach ($SearchTerms as $SearchString)
71  {
72  $SearchString = trim($SearchString);
73  $ExplodedSearch = preg_split('/\s+/', $SearchString);
74  $Patterns = array();
75  $Index = 0;
76  $InQuote = FALSE;
77 
78  #Iterate through each term in the search string
79  foreach ($ExplodedSearch as $Term)
80  {
81  #Handle quoted terms differently
82  #if the first character is a quote
83  if ($Term[0] == '"')
84  {
85  $InQuote = TRUE;
86  }
87 
88  if (substr($Term, -1) == '"')
89  {
90  #last character is a quote means that we've found the end of the term.
91  $InQuote = FALSE;
92  }
93 
94  #remove all of the quotes if we're matched
95  $Term = str_replace('"', "", $Term);
96 
97  #Add the term to the list of patterns we'll be highlighting in the result
98  # string at the current index (quoted terms will be appended to the index,
99  # unquoted terms are added at a new index).
100  $Patterns[$Index] = (isset($Patterns[$Index]) ?
101  $Patterns[$Index]." ":"").$Term;
102 
103  if (!$InQuote)
104  {
105  # if we are not in a quoted term, the next term should go at
106  # a new index in the pattern array.
107  $Index++;
108  }
109  }
110 
111  # iterate over our terms, escaping them and including bolding
112  # for segments two more ore characters longer
113  $PregPatterns = array();
114  foreach ($Patterns as $Term)
115  {
116  if (strlen($Term)>=2)
117  {
118  $PregPatterns = "/".preg_quote($Term, "/")."/i";
119  }
120  }
121 
122  # do the highlighting
123  $LabelForFormatting = preg_replace(
124  $PregPatterns, "<b>$0</b>", $LabelForFormatting);
125 
126  }
127 
128  return $LabelForFormatting;
129  }
130 
136  private static function PrepareSearchString($SearchString)
137  {
138  # remove "--", which causes searches to fail and is often in classifications
139  # Also remove unwanted punctuation
140  $SearchString = str_replace(
141  array("--",",",".", ":"),
142  " ", $SearchString);
143 
144  # split the search string into words
145  $Words = preg_split('/\s+/', $SearchString, -1, PREG_SPLIT_NO_EMPTY);
146 
147  # the variable that holds the prepared search string
148  $PreparedSearchString = "";
149 
150  foreach ($Words as $Word)
151  {
152  # Don't do one-character "words".
153  if (strlen($Word)==1)
154  {
155  continue;
156  }
157 
158  # just add the word if it's quoted or has an asterisk already
159  if (preg_match('/\"$/', $Word) || preg_match('/\*$/', $Word))
160  {
161  $PreparedSearchString .= $Word . " ";
162  }
163 
164  # add wildcard operator for stemming
165  else
166  {
167  $PreparedSearchString .= $Word . "* ";
168  }
169  }
170 
171  # remove whitespace padding
172  $PreparedSearchString = trim($PreparedSearchString);
173 
174  return $PreparedSearchString;
175  }
176 
201  private static function SortSearchResults($Results, $SearchString, $MaxResults)
202  {
203  $Matches = array(
204  "Exact" => array(),
205  "End" => array(),
206  "BegSp" => array(),
207  "Beg" => array(),
208  "MidSp" => array(),
209  "Mid" => array(),
210  "Other" => array() );
211 
212  # escape regex characters
213  $SafeStr = preg_quote( trim( preg_replace('/\s+/', " ",
214  str_replace( array("--",",",".", ":"), " ",
215  $SearchString) )), '/');
216 
217  # iterate over search results, sorting them into bins
218  foreach ($Results as $Key => $Val)
219  {
220  # apply the same normalization to our value as we did our search string
221  $TestVal = preg_quote( trim( preg_replace('/\s+/', " ",
222  str_replace( array("--",",",".", ":"), " ",
223  $Val) )), '/');
224 
225  if (preg_match('/^'.$SafeStr.'$/i', $TestVal))
226  {
227  $ix = "Exact";
228  }
229  elseif (preg_match('/^'.$SafeStr.'\\W/i', $TestVal))
230  {
231  $ix = "BegSp";
232  }
233  elseif (preg_match('/^'.$SafeStr.'/i', $TestVal))
234  {
235  $ix = "Beg";
236  }
237  elseif (preg_match('/'.$SafeStr.'$/i', $TestVal))
238  {
239  $ix = "End";
240  }
241  elseif (preg_match('/'.$SafeStr.'\\W/i', $TestVal))
242  {
243  $ix = "MidSp";
244  }
245  elseif (preg_match('/'.$SafeStr.'/i', $TestVal))
246  {
247  $ix = "Mid";
248  }
249  else
250  {
251  $ix = "Other";
252  }
253 
254  $Matches[$ix][$Key] = $Val;
255  }
256 
257  # assemble the sorted results
258  $SortedResults = array();
259  foreach (array("Exact", "BegSp", "Beg", "End", "MidSp", "Mid", "Other") as $ix)
260  {
261  asort( $Matches[$ix] );
262  $SortedResults += $Matches[$ix];
263  }
264 
265  # trim down the list to the requested number
266  $SortedResults = array_slice($SortedResults, 0, $MaxResults, TRUE);
267 
268  return $SortedResults;
269  }
270 
282  private static function SearchForResources(
283  $DstField,
284  $SearchString,
285  $MaxResults,
286  array $IdExclusions=array() )
287  {
288  # construct search groups based on the keyword
289  $SearchParams = new SearchParameterSet();
290  $SearchParams->AddParameter($SearchString);
291 
292  $SignalResult = $GLOBALS["AF"]->SignalEvent(
293  "EVENT_FIELD_SEARCH_FILTER",
294  array(
295  "Search" => $SearchParams,
296  "Field" => $DstField));
297  $SearchParams = $SignalResult["Search"];
298 
299  # perform search
300  $SearchEngine = new SPTSearchEngine();
301  $SearchResults = $SearchEngine->Search($SearchParams);
302 
303  # get the list of referenceable schemas for this field
304  $ReferenceableSchemaIds = $DstField->ReferenceableSchemaIds();
305 
306  # iterate over search results from desired schemas
307  $SearchResultsNew = array();
308  foreach ($SearchResults as $SchemaId => $SchemaResults)
309  {
310  if (in_array($SchemaId, $ReferenceableSchemaIds))
311  {
312  # filter resources the user cannot see
313  $RFactory = new ResourceFactory($SchemaId);
314  $ViewableIds = $RFactory->FilterNonViewableResources(
315  array_keys($SchemaResults), $GLOBALS["G_User"]);
316 
317  # add these results to our list of all search results
318  $SearchResultsNew += array_intersect_key(
319  $SchemaResults, array_flip($ViewableIds));
320  }
321  }
322  $SearchResults = $SearchResultsNew;
323 
324  # filter out excluded resource IDs if necessary
325  if (count($IdExclusions))
326  {
327  $SearchResults = array_diff_key(
328  $SearchResults, array_flip($IdExclusions));
329  }
330 
331  # pull out mapped titles for all resources
332  $GLOBALS["AF"]->LoadFunction("GetResourceFieldValue");
333  $ResourceData = array();
334  foreach ($SearchResults as $ResourceId => $Score)
335  {
336  $Resource = new Resource($ResourceId);
337  $ResourceData[$ResourceId] = GetResourceFieldValue(
338  $Resource,
339  $Resource->Schema()->GetFieldByMappedName("Title") );
340  }
341 
342  # determine how many results we had in total
343  $TotalResults = count($ResourceData);
344 
345  # sort resources by title and subset if necessary
346  $ResourceData = self::SortSearchResults(
347  $ResourceData,
348  $SearchString,
349  $MaxResults);
350 
351  # compute the number of available and additional results
352  $NumSearchResults = count($ResourceData);
353  $NumAdditionalSearchResults = $TotalResults - count($ResourceData);
354 
355  return array($NumSearchResults, $NumAdditionalSearchResults, $ResourceData);
356  }
357 
367  private static function SearchForUsers(
368  $SearchString,
369  $MaxResults = 15,
370  array $IdExclusions=array(),
371  array $ValueExclusions=array())
372  {
373  # the factory used for searching
374  $UserFactory = new CWUserFactory();
375 
376  # get the minimum word length for fuzzy query matching
377  $MysqlSysVars = new MysqlSystemVariables($GLOBALS["DB"]);
378  $MinLen = intval($MysqlSysVars->Get("ft_min_word_len"));
379 
380  # initialize the result variables
381  $SearchResults = array();
382  $ResultsNeeded = $MaxResults;
383 
384  # if the search string is less than the minimum length, do exact query
385  # matching first
386  if (strlen($SearchString) < $MinLen)
387  {
388  $SearchResults = $UserFactory->FindUserNames(
389  $SearchString,
390  "UserName", "UserName", 0, # defaults
391  PHP_INT_MAX,
392  $IdExclusions,
393  $ValueExclusions);
394 
395  # decrement the max results by how many were found
396  $ResultsNeeded -= count($SearchResults);
397  }
398 
399  # if there are still some results to fetch, perform fuzzy matching
400  if ($ResultsNeeded > 0)
401  {
402  # prepare the search string
403  $PreparedSearchString = self::PrepareSearchString($SearchString);
404 
405  # perform the search
406  $SearchResults += $UserFactory->FindUserNames(
407  $PreparedSearchString,
408  "UserName", "UserName", 0, # defaults
409  PHP_INT_MAX,
410  $IdExclusions,
411  $ValueExclusions);
412  }
413 
414  # slice out just the results we want
415  $TotalResults = count($SearchResults);
416  $SearchResults = array_slice($SearchResults, 0, $MaxResults, TRUE);
417 
418  $NumResults = count($SearchResults);
419  $NumAdditionalResults = $TotalResults - $NumResults;
420 
421  return array($NumResults, $NumAdditionalResults, $SearchResults);
422  }
423 
435  private static function SearchForValues(
436  MetadataField $Field,
437  $SearchString,
438  $MaxResults,
439  array $IdExclusions,
440  array $ValueExclusions)
441  {
442  $Factory = $Field->GetFactory();
443 
444  # get the minimum word length for fuzzy query matching
445  $MysqlSysVars = new MysqlSystemVariables($GLOBALS["DB"]);
446  $MinLen = intval($MysqlSysVars->Get("ft_min_word_len"));
447 
448  # initialize the result variables
449  $Results = array();
450  $Total = 0;
451 
452  $SignalResult = $GLOBALS["AF"]->SignalEvent(
453  "EVENT_FIELD_SEARCH_FILTER",
454  array(
455  "Search" => $SearchString,
456  "Field" => $Field));
457  $SearchString = $SignalResult["Search"];
458 
459  # if the search string is less than the minimum length, do exact query
460  # matching first
461  if (strlen($SearchString) < $MinLen)
462  {
463  # search for results and get the total
464  $Results += $Factory->SearchForItemNames(
465  $SearchString,
466  $MaxResults,
467  FALSE, TRUE, 0, # defaults
468  $IdExclusions,
469  $ValueExclusions);
470  $Total += $Factory->GetCountForItemNames(
471  $SearchString,
472  FALSE, TRUE, # defaults,
473  $IdExclusions,
474  $ValueExclusions);
475 
476  # decrement the max results by how many were returned when doing exact
477  # matching
478  $MaxResults -= count($Results);
479  }
480 
481  # if more results should be fetched
482  if ($MaxResults > 0)
483  {
484  $PreparedSearchString = self::PrepareSearchString($SearchString);
485 
486  if (strlen($SearchString) >= $MinLen)
487  {
488  $Results += $Factory->FindMatchingRecentlyUsedValues(
489  $PreparedSearchString, 5, $IdExclusions, $ValueExclusions);
490 
491  if (count($Results))
492  {
493  $Results += array(-1 => "<hr>");
494  }
495  }
496 
497  # search for results and get the total
498  $Results += self::SortSearchResults(
499  $Factory->SearchForItemNames(
500  $PreparedSearchString,
501  2000,
502  FALSE, TRUE, 0, # defaults
503  $IdExclusions,
504  $ValueExclusions),
505  $SearchString,
506  $MaxResults);
507  $Total += $Factory->GetCountForItemNames(
508  $PreparedSearchString,
509  FALSE, TRUE, # defaults,
510  $IdExclusions,
511  $ValueExclusions);
512  }
513 
514  # get additional totals
515  $NumSearchResults = count($Results);
516  $NumAdditionalSearchResults = $Total - $NumSearchResults;
517 
518  return array($NumSearchResults, $NumAdditionalSearchResults, $Results);
519  }
520 }
Set of parameters used to perform a search.
GetFactory()
Retrieve item factory object for this field.
static HighlightSearchString($SearchTerms, $LabelForFormatting)
Highlight all instances of the search string in the result label.
static SearchField(MetadataField $Field, $SearchString, array $IdExclusions=array(), array $ValueExclusions=array())
Search a field for values matching a specified search string.
Class that allows permits easier access to MySQL system variables.
CWIS-specific user factory class.
Object representing a locally-defined type of metadata field.
NumAjaxResults($NewValue=DB_NOVALUE)
Get/set the maximum number of results to display in an AJAX dropdown.
Represents a "resource" in CWIS.
Definition: Resource.php:13
Type($NewValue=DB_NOVALUE)
Get/set type of metadata field (enumerated value).
Convenience class for QuickSearch responses, making it easy to share functions common to different ty...
Factory for Resource objects.