<?PHP
#
#   FILE:  SearchLog.php
#
#   Part of the Collection Workflow Integration System (CWIS)
#   Copyright 2016 Edward Almasy and Internet Scout Research Group
#   http://scout.wisc.edu/cwis/
#

/**
* Compare values for use in uasort.
* @param mixed $a First value to compare.
* @param mixed $b Second value to compare.
* @return -1 when a < b, 0 when they are equal, and 1 otherwise.
*/
function CompareValues($a, $b)
{
    if ($a < $b)
    {
        return -1;
    }
    elseif ($a == $b)
    {
        return 0;
    }
    else
    {
        return 1;
    }
}

# ----- MAIN -----------------------------------------------------------------


# define constants for the start time options
define("ST_1_DAY", 1);
define("ST_1_WEEK", 2);
define("ST_1_MONTH", 3);
define("ST_3_MONTH", 4);
define("ST_6_MONTH", 5);
define("ST_12_MONTH", 6);
define("ST_24_MONTH", 7);
define("ST_FOREVER", 8);

# define constants for the user type options
define("UT_ALL", 1);
define("UT_ANON", 2);
define("UT_PRIV", 3);
define("UT_UNPRIV", 4);

# define constants for the search type options
define("STY_ALL", 1);
define("STY_SUCCESSFUL", 2);
define("STY_FAILED", 3);

# make sure user has sufficient permission to view report
if (!CheckAuthorization(PRIV_COLLECTIONADMIN)) {  return;  }

# grab ahold of the relevant metrics objects
$Recorder = $GLOBALS["G_PluginManager"]->GetPlugin("MetricsRecorder");
$Reporter = $GLOBALS["G_PluginManager"]->GetPlugin("MetricsReporter");

# extract parameters
$H_SearchType = intval(GetFormValue("STY", STY_ALL));
$UserType = intval(GetFormValue("UT", UT_ALL));
$H_ResultsPerPage = GetFormValue("RP", 50);
$H_View = GetFormValue("V", "Log");

$H_StartTime = ($H_View == "Log") ? ST_24_MONTH :
           intval(GetFormValue("ST", ST_3_MONTH));

$SpamSearch = GetFormValue("Spam");
$StartIndex = intval(GetFormValue(
    TransportControlsUI::PNAME_STARTINGINDEX, 0));
$SortField = GetFormValue(
    TransportControlsUI::PNAME_SORTFIELD);
$RevSort = GetFormValue(
    TransportControlsUI::PNAME_REVERSESORT, FALSE);

# process spam searches
if ($SpamSearch !== NULL)
{
    $DB = new Database();

    $SpamSearch = preg_replace(
        '/_[0-9]+$/', '', $SpamSearch);

    # add this search to the ignore list
    $DB->Query("INSERT IGNORE INTO MetricsReporter_SpamSearches "
               ."(SearchKey) VALUES ('".addslashes($SpamSearch)."')");

    # redirect to our search listing
    $GLOBALS["AF"]->SetJumpToPage(
        "index.php?P=P_MetricsReporter_SearchLog"
        ."&V=".$H_View
        ."&ST=".$H_StartTime
        ."&UT=".$UserType
        ."&STY=".$H_SearchType
        ."&RP=".$H_ResultsPerPage
        ."&".TransportControlsUI::PNAME_STARTINGINDEX."=".$StartIndex);

    return;
}

# set up a lookup table of starting times, starting from the top of the hour
$CurrentTime = strtotime( date("Y-m-d H:00:00"));
$TimeLUT = array(
    ST_FOREVER => 0,
    ST_24_MONTH => strtotime("-24 months", $CurrentTime),
    ST_12_MONTH => strtotime("-12 months", $CurrentTime),
    ST_6_MONTH => strtotime("-6 months", $CurrentTime),
    ST_3_MONTH => strtotime("-3 months", $CurrentTime),
    ST_1_MONTH => strtotime("-1 month", $CurrentTime),
    ST_1_WEEK => strtotime("-1 week", $CurrentTime),
    ST_1_DAY => strtotime("-1 day", $CurrentTime),
);

# extract the list of spammy searches that we should exclude
$DB = new Database();
$DB->Query("SELECT SearchKey FROM MetricsReporter_SpamSearches");
$SpamSearches = $DB->FetchColumn("SearchKey");
$SpamSearches = array_flip($SpamSearches);

$CacheData = $Reporter->CacheGet("SearchLog");
if (is_null($CacheData))
{
    # if we cannot find the summary of all searches, then we need
    # to extract it from the EventData tables and also extract the
    # information we'd like to use for filtering the data

    # cache the privileged/unprivileged state of each user
    $UserCache = array();

    # just merge together the 'advanced' and regular searches
    $AllSearches = $Recorder->GetEventData(
        "MetricsRecorder", MetricsRecorder::ET_SEARCH);
    $AllSearches += $Recorder->GetEventData(
        "MetricsRecorder", MetricsRecorder::ET_ADVANCEDSEARCH);

    # construct arrays to build up our desired results
    $SearchData = array();
    $SearchMap = array();
    $UserMap = array();
    $SearchKeys = array();

    # iterate over all the searches
    foreach ($AllSearches as $Row)
    {
        # if there was a search string recorded
        #  (Note that the Metrics Data is sometimes missing a search string,
        #   and the cause for that has not yet been determined..)
        if (strlen($Row["DataOne"]))
        {
            # if we had a logged in user
            if (strlen($Row["UserId"]))
            {
                # determine if we've already checked their permissions against the
                #  list of exclusions from the Metrics Reporter
                if (!isset($UserCache[$Row["UserId"]]))
                {
                    # if we haven't check their perms and cache the result
                    $ThisUser = new CWUser($Row["UserId"]);

                    $UserCache[$Row["UserId"]] = call_user_func_array(
                        array($ThisUser, "HasPriv"),
                        $Reporter->ConfigSetting("PrivsToExcludeFromCounts") );

                    $UserMap[$Row["UserId"]] = $ThisUser->Get("UserName");
                }

                # pull cached perms data out
                $Privileged = $UserCache[$Row["UserId"]];
            }
            else
            {
                # if there was no user logged in, count them as non-priv
                $Privileged = FALSE;
            }

            # pull out the timestamp of the event
            $TS = strtotime( $Row["EventDate"] );

            # see if we already know the search key for this search
            if (!isset($SearchKeys[$Row["DataOne"]]))
            {
                # if not, we'll have to figure it out

                # see if the stored DataOne is serialized data that
                # can be decoded to a SearchParamterSet.  if not, skip
                # this record and move on to the next one
                if (IsSerializedData($Row["DataOne"]))
                {
                    try
                    {
                        $SearchParams = new SearchParameterSet($Row["DataOne"]);
                    }
                    catch (Exception $e)
                    {
                        continue;
                    }
                }
                else
                {
                    continue;
                }

                # generate an index key for this search based on the
                # display parameters (display parameters are used so
                # that searches are de-duplicated based on the
                # parameters shown to the user rather than those used
                # internally for the search, which could potentially
                # (and confusingly) list the same display parameters a
                # number of times)
                $DisplayParams = SPTSearchEngine::ConvertToDisplayParameters(
                    $SearchParams);
                $SearchDesc = $DisplayParams->TextDescription();

                # generate our search key based on the display parameters
                $Key = md5($SearchDesc);

                # cache the key for this search
                $SearchKeys[$Row["DataOne"]] = $Key;

                # if this search wasn't spam and we don't have a copy
                # of the display params, cache those too
                if (!isset($SpamSearches[$Key]) &&
                    !isset($SearchMap[$Key]))
                {
                    $SearchMap[$Key] = $SearchDesc;
                }
            }
            else
            {
                $Key = $SearchKeys[$Row["DataOne"]];
            }

            # if this search was marked as spam, continue on
            if (isset($SpamSearches[$Key]))
            {
                continue;
            }

            # record this search in our summary
            $SearchData[$Key][]= array(
                "UserId" => $Row["UserId"],
                "IsPrivileged" => $Privileged ? TRUE : FALSE,
                "Timestamp" => $TS,
                "Results" => $Row["DataTwo"],
            );
        }
    }

    # store our data for later use
    $Reporter->CachePut("SearchLog", array(
        "SearchData" => $SearchData,
        "SearchMap" => $SearchMap,
        "UserMap" => $UserMap));
}
else
{
    # extract the different members we need
    $SearchData = $CacheData["SearchData"];
    $SearchMap = $CacheData["SearchMap"];
    $UserMap = $CacheData["UserMap"];

    unset ($CacheData);
}

# now we have the data we need, work on filtering it
$NewData = array();
foreach ($SearchData as $Key => $Values)
{
    foreach ($Values as $Value)
    {
        ## Filter based on search type

        # if we're looking for 'successful' searches, but this one
        # produced no results, skip it
        if ($H_SearchType == STY_SUCCESSFUL && $Value["Results"] == 0)
        {
            continue;
        }
        # if we're looking for 'failed' searches but this one did produce results,
            # skip it
        elseif ($H_SearchType == STY_FAILED && $Value["Results"] != 0)
        {
            continue;
        }

        ## Filter based on search age

        # if this search happened before our start time, skip it
        if ($Value["Timestamp"] < $TimeLUT[$H_StartTime])
        {
            continue;
        }

        ## filter based on user type

        # if we're looking for anon users, but this one isn't anon, skip it
        if ($UserType == UT_ANON && $Value["UserId"] != "")
        {
                continue;

        }
        # if we're looking for authed users, but this one is anon, skip it
        elseif ( ($UserType == UT_PRIV || $UserType == UT_UNPRIV) &&
                 $Value["UserId"] == "")
        {
            continue;
        }
        # if we're looking for privileged users, but this one isn't priv'd, skip it
        elseif ($UserType == UT_PRIV && !$Value["IsPrivileged"])
        {
            continue;

        }
        # if we're looking for unpriv'd users, but this one is privileged, skip it
        elseif ($UserType == UT_UNPRIV && $Value["IsPrivileged"])
        {
            continue;
        }

        # we survived filtering; add this search in to the filtered data
        $NewData[$Key][]= $Value;

    }
}

$SearchData = $NewData;

# filter out any spammy searches in the data (these will exist in
#  cached data after the 'Spam' button is pushed but before the cache
#  expires and is regenerated)
$NewData = array();
foreach ($SearchData as $Key => $Item)
{
    if (!isset($SpamSearches[$Key]))
    {
        $NewData[$Key] = $Item;
    }
}
$SearchData = $NewData;

# construct the TransportControls
$H_TransportUI = new TransportControlsUI(
    TransportControlsUI::NO_ITEM_TYPE,
    $H_ResultsPerPage);

# convert data to the form expected by the ListUI
$H_ListData = array();

# if the view type was 'Log'
if ($H_View == "Log")
{
    # iterate over all the searches
    foreach ($SearchData as $Key => $Items)
    {
        $Index = 0;

        # iterate over each time the search was performed
        foreach ($Items as $Item)
        {
            # look up the user who did this search
            $UserName = isset($UserMap[$Item["UserId"]]) ?
                      $UserMap[$Item["UserId"]] : "";

            # construct a key to use when indexing into the ListUI data
            $ThisKey = $Key."_".$Index;

            # if we've already got a search at this key, see if it was
            # within 5 minutes of the search we're inspecting now
            if (isset($H_ListData[$ThisKey]) &&
                $Item["Timestamp"] - $H_ListData[$ThisKey]["Date"] < 300)
            {
                # if so, increment the NumSearches
                $H_ListData[$ThisKey]["NumSearches"] += 1;

                # and append our user to the list of users who did this search
                if (strlen($UserName) &&
                    strpos($H_ListData[$ThisKey]["UserId"], $UserName) === FALSE)
                {
                    if (strlen($H_ListData[$ThisKey]["UserId"]))
                    {
                        $H_ListData[$ThisKey]["UserId"] .= ", ";
                    }

                    $H_ListData[$ThisKey]["UserId"] .= $UserName;
                }
            }
            else
            {
                # otherwise, if we had a search but it is more than 5
                # min later, move to the next index spot
                if (isset($H_ListData[$ThisKey]))
                {
                    $Index++;
                }

                # and append this search to our list of searches
                $H_ListData[$Key."_".$Index] = array(
                    "Date" => $Item["Timestamp"],
                    "NumSearches" => 1,
                    "UserId" => $UserName,
                    "SearchParameters" => "<span class='search-params'>"
                            .$SearchMap[$Key]."</span>",
                    "Results" => $Item["Results"] );
            }
        }
    }

    if ($SortField === NULL)
    {
        $SortField = "Date";
    }
}
# otherwise, if the ViewType was 'Frequency'
elseif ($H_View == "Frequency")
{
    foreach ($SearchData as $Key => $Item)
    {
        # extract all the unique UserIds who did this search
        $UserIds = array_unique( array_map(
            function($x) { return $x["UserId"]; },
            $Item));

        # convert the UserIds to user names
        $UserNames = array();
        foreach ($UserIds as $UserId)
        {
            if (isset($UserMap[$UserId]))
            {
                $UserNames[]= $UserMap[$UserId];
            }
        }

        # construct the list entry for this search
        $H_ListData[$Key]= array(
            "Date" => max( array_map(
                function($x)
                {
                    return $x["Timestamp"];
                },
                $Item)),
            "NumSearches" => count($Item),
            "UserId" => implode(", ", $UserNames),
            "SearchParameters" => "<span class='search-params'>"
                    .$SearchMap[$Key]."</span>",
            "Results" => max( array_map(
                function($x)
                {
                    return $x["Results"];
                },
                $Item)) );
    }

    if ($SortField === NULL)
    {
        $SortField = "NumSearches";
    }
}

$H_TotalResults = count($H_ListData);
$H_TransportUI->ItemCount($H_TotalResults);

# handle sorting of search logs
if ($SortField=="Date")
{
    uasort($H_ListData, function($a, $b)
    {
        return CompareValues($b["Date"], $a["Date"]);
    });
}
elseif ($SortField == "NumSearches")
{
    uasort($H_ListData, function($a, $b)
    {
        return CompareValues($b["NumSearches"], $a["NumSearches"]);
    });
}
elseif ($SortField == "Results")
{
    uasort($H_ListData, function($a, $b)
    {
        return CompareValues($b["Results"], $a["Results"]);
    });
}
elseif ($SortField == "UserId")
{
    # if asked to sort by user, we have to do something in the case
    # where we have a list of users.  in this case, we're comparing the
    # alphabetically first element of each list of users to determine
    # the ordering.
    uasort($H_ListData, function($a,$b)
    {
        $UsersA = explode(", ", $a["UserId"]);
        $UsersB = explode(", ", $b["UserId"]);
        sort($UsersA);
        sort($UsersB);

        return strcmp(reset($UsersA), reset($UsersB)) ;
    });
}
elseif ($SortField == "SearchParameters")
{
    uasort($H_ListData, function($a, $b)
    {
        return strcmp($a["SearchParameters"], $b["SearchParameters"]);
    });
}

# reverse order if necessary
if ($RevSort)
{
    $H_ListData = array_reverse($H_ListData, TRUE);
}

# subset the ListData according to the TransportControls
$H_ListData = array_slice(
    $H_ListData,
    $H_TransportUI->StartingIndex(),
    $H_ResultsPerPage,
    TRUE);

# construct the BaseLink for TranportControlsUI and ItemListUI
$H_BaseLink = "index.php?P=P_MetricsReporter_SearchLog"
                  ."&V=".$H_View
                  ."&ST=".$H_StartTime
                  ."&UT=".$UserType
                  ."&STY=".$H_SearchType
                  ."&RP=".$H_ResultsPerPage;

# define the fields that should appear in the ListUI
if ($H_View == "Log")
{
    $H_ListFields = array(
        "Date" => array(
            "Heading" => "Date",
            "DefaultToDescendingSort" => TRUE,
            "ValueFunction" => function($Item, $FieldId)
            {
                return GetPrettyTimestamp($Item[$FieldId]);
            }
        ),
        "Results" => array(
            "DefaultToDescendingSort" => TRUE,
            "Heading" => "Results"),
        "UserId" => array(
            "Heading" => "User",
        ),
        "SearchParameters" => array(
            "Heading" => "Parameters",
            "ValueFunction" => function($Item, $FieldId)
            {
                return $Item["SearchParameters"]
                    .(($Item["NumSearches"]>1) ?
                      "<br/>(repeated ".$Item["NumSearches"]." times)" :
                      "");
            },
            "AllowHTML" => TRUE ),
    );
}
elseif ($H_View == "Frequency")
{
    $H_ListFields = array(
        "NumSearches" => array(
            "Heading" => "Count",
            "DefaultToDescendingSort" => TRUE,
        ),
        "Results" => array(
            "Heading" => "Results",
            "DefaultToDescendingSort" => TRUE,
        ),
        "UserId" => array(
            "Heading" => "Users",
        ),
        "SearchParameters" => array(
            "Heading" => "Parameters",
            "AllowHTML" => TRUE ),
    );

}
else
{
    throw new Exception("Unsupported view type: ".$H_View);
}

# set the default sort field depending on our ViewType
if ($H_View == "Log")
{
    $H_ListFields["Date"]["DefaultSortField"] = TRUE;
}
else
{
    $H_ListFields["NumSearches"]["DefaultSortField"] = TRUE;
}

# if the user requested JSON data, produce that as output
if (isset($_GET["JSON"]))
{
    $GLOBALS["AF"]->SuppressHTMLOutput();
    header("Content-Type: application/json; charset="
           .$GLOBALS["G_SysConfig"]->DefaultCharacterSet(), TRUE);

    print json_encode($H_ListData);
    return;
}