<?PHP
#
#   FILE:  OAILog.php
#
#   Part of the Collection Workflow Integration System (CWIS)
#   Copyright 2017 Edward Almasy and Internet Scout Research Group
#   http://scout.wisc.edu/cwis/
#

# ----- MAIN -----------------------------------------------------------------

# define constants for the start time options
define("ST_1_DAY", 1);
define("ST_1_WEEK", 2);
define("ST_1_MONTH", 3);
define("ST_3_MONTH", 4);
define("ST_6_MONTH", 5);
define("ST_12_MONTH", 6);
define("ST_24_MONTH", 7);
define("ST_FOREVER", 8);

# define constants for the filter type options
define("FT_ALL", 1);
define("FT_CONTACT", 2);
define("FT_SAMPLE", 3);
define("FT_HARVEST", 4);
define("FT_SETS", 5);

# make sure user has sufficient permission to view report
if (!CheckAuthorization(PRIV_COLLECTIONADMIN)) {  return;  }

# grab ahold of the relevant metrics objects
$Recorder = $GLOBALS["G_PluginManager"]->GetPlugin("MetricsRecorder");
$Reporter = $GLOBALS["G_PluginManager"]->GetPlugin("MetricsReporter");

# extract parameters
$H_ResultsPerPage = intval(GetFormValue("RP", 50));
$H_StartTime = intval(GetFormValue("ST", ST_3_MONTH));
$H_FilterType = intval(GetFormValue("FT", FT_ALL));

$StartIndex = intval(GetFormValue(
    TransportControlsUI::PNAME_STARTINGINDEX, 0));
$SortField = GetFormValue(
    TransportControlsUI::PNAME_SORTFIELD, "EventDate");
$RevSort = GetFormValue(
    TransportControlsUI::PNAME_REVERSESORT, TRUE);

# set up a lookup table of starting times, starting from the top of the hour
$CurrentHour = strtotime(date("Y-m-d H:00:00"));
$TimeLUT = array(
    ST_FOREVER => 0,
    ST_24_MONTH => strtotime("-24 months", $CurrentHour),
    ST_12_MONTH => strtotime("-12 months", $CurrentHour),
    ST_6_MONTH => strtotime("-6 months", $CurrentHour),
    ST_3_MONTH => strtotime("-3 months", $CurrentHour),
    ST_1_MONTH => strtotime("-1 month", $CurrentHour),
    ST_1_WEEK => strtotime("-1 week", $CurrentHour),
    ST_1_DAY => strtotime("-1 day", $CurrentHour),
);

# pull out all the harvest data for the specified period
$H_HarvestData = $Recorder->GetEventData(
    "MetricsRecorder", MetricsRecorder::ET_OAIREQUEST,
     date("Y-m-d H:00:00", $TimeLUT[$H_StartTime]));

# make a list of specific requests to filter
$ToFilter = array();

# and list of IPs that made those requests, so we can put repeat
#  offenders in the doghouse more globally
$FilterIPs = array();

# for each entry
foreach ($H_HarvestData as $Key => $Val)
{
    # check if this request looks like an sql injection
    if (MetricsReporter::RequestIsSqlInjection($Val["DataTwo"]))
    {
        # increment this IP's badness score
        $IPAddr = $Val["DataOne"];
        if (!isset($FilterIPs[$IPAddr]))
        {
            $FilterIPs[$IPAddr] = 0;
        }
        $FilterIPs[$IPAddr]++;

        # mark this entry as one that should be filtered
        $ToFilter[$Key] = TRUE;
    }
}

# loop over entries again looking for repeat-offender IPs
foreach ($H_HarvestData as $Key => $Val)
{
    $IPAddr = $Val["DataOne"];
    if (isset($FilterIPs[$IPAddr]) && $FilterIPs[$IPAddr] > 3)
    {
        $ToFilter[$Key] = TRUE;
    }
}

# remove problematic entries
foreach ($ToFilter as $Key => $Val)
{
    unset($H_HarvestData[$Key]);
}

# if we're not going to view all entries
if ($H_FilterType != FT_ALL)
{
    switch ($H_FilterType)
    {
        case FT_HARVEST:
        case FT_SAMPLE:
            # iterate over requests, building up a list of IPs that
            # used a resumptionToken
            $IPsThatResumed = array();
            foreach ($H_HarvestData as $Key => $Val)
            {
                $Request = urldecode($Val["DataTwo"]);
                if (strpos($Request, "resumptionToken") !== FALSE)
                {
                    $IPsThatResumed[$Val["DataOne"]] = TRUE;
                }
            }
            break;

        default:
            break;
    }

    # build up a list of which elements to filter
    $ToFilter = array();
    switch($H_FilterType)
    {
        case FT_CONTACT:
            # build a summary of the contacts from each IP address
            $ContactSummary = array();
            $ContactCount = array();
            $ContactDates = array();
            foreach ($H_HarvestData as $Key => $Val)
            {
                $IPAddr = $Val["DataOne"];

                # if we lack a contact summary for this IP, create one
                if (!isset($ContactSummary[$IPAddr]))
                {
                    $ContactSummary[$IPAddr] = array(
                        "ListRecords" => FALSE,
                        "GetRecord" => FALSE,
                        "ListIdentifiers" => FALSE,
                        "ListSets" => FALSE,
                        "Identify" => FALSE,
                        "unknown" => FALSE);
                }

                # track the number of contacts from each IP
                if (!isset($ContactCount[$IPAddr]))
                {
                    $ContactCount[$IPAddr] = 0;
                }
                $ContactCount[$IPAddr]++;

                if (!isset($ContactDates[$IPAddr]) ||
                    strtotime($ContactDates[$IPAddr]) < strtotime($Val["EventDate"]))
                {
                    $ContactDates[$IPAddr] = $Val["EventDate"];
                }

                # parse the request
                parse_str($Val["DataTwo"], $ReqParams);

                # figure out the request type for this request
                if (!isset($ReqParams["verb"]) ||
                    !in_array($ReqParams["verb"], array(
                        "ListRecords", "GetRecord", "ListIdentifiers",
                        "ListSets", "Identify")))
                {
                    $ReqType = "unknown";
                }
                else
                {
                    $ReqType = $ReqParams["verb"];
                }

                # if we don't have a record for this type of contact,
                # create one
                if ($ContactSummary[$IPAddr][$ReqType] == FALSE)
                {
                    $ContactSummary[$IPAddr][$ReqType] = $Key;
                }
            }

            # iterate over the contact summary, deciding what data
            # to keep
            $ToKeep = array();
            foreach ($ContactSummary as $IPAddr => $Contacts)
            {
                do
                {
                    $Key = array_shift($Contacts);
                }
                while (count($Contacts)>0 && $Key === FALSE);

                if ($Key !== FALSE)
                {
                    $ToKeep[$Key] = TRUE;
                }
            }

            # ditch anything not on our keep list
            foreach ($H_HarvestData as $Key => $Val)
            {
                if (!isset($ToKeep[$Key]))
                {
                    $ToFilter[]= $Key;
                }
                else
                {
                    $IPAddr = $Val["DataOne"];
                    $H_HarvestData[$Key]["Count"] = $ContactCount[$IPAddr];
                    $H_HarvestData[$Key]["EventDate"] = $ContactDates[$IPAddr];
                }
            }

            break;

        case FT_SAMPLE:
            foreach ($H_HarvestData as $Key => $Val)
            {
                $Request = urldecode($Val["DataTwo"]);
                $IPAddr = $Val["DataOne"];

                # do not filter GetRecord requests
                if (strpos($Request, "verb=GetRecord") !== FALSE)
                {
                    continue;
                }

                # do not filter ListRecords w/ a resumption token
                if (strpos($Request, "resumptionToken") !== FALSE &&
                    isset($IPsThatResumed[$IPAddr]))
                {
                    continue;
                }

                $ToFilter[]= $Key;
            }

            break;

        case FT_HARVEST:
            $CurHarvest = NULL;
            $ContinuationCounts = array();

            foreach ($H_HarvestData as $Key => $Val)
            {
                $Request = urldecode($Val["DataTwo"]);
                $IPAddr = $Val["DataOne"];

                # do not filter ListRecords from IPs that resumed
                if (strpos($Request, "verb=ListRecords") !== FALSE &&
                    isset($IPsThatResumed[$IPAddr]))
                {
                    if (strpos($Request, "resumptionToken")===FALSE)
                    {
                        $CurHarvest = $Key;
                        $ContinuationCounts[$CurHarvest] = 0;
                        continue;
                    }

                    if ($CurHarvest !== NULL)
                    {
                        $ContinuationCounts[$CurHarvest]++;
                    }
                }

                $ToFilter[]= $Key;
            }

            # add continuation counts in to harvest data
            foreach ($ContinuationCounts as $Key => $Count)
            {
                $H_HarvestData[$Key]["Count"] = $Count;
            }

            break;

        case FT_SETS:
            $CurHarvest = NULL;
            $ContinuationCounts = array();

            foreach ($H_HarvestData as $Key => $Val)
            {
                $Request = urldecode($Val["DataTwo"]);
                $IPAddr = $Val["DataOne"];

                if (strpos($Request, "set=") !== FALSE)
                {
                    if (strpos($Request, "verb=ListSets") !== FALSE)
                    {
                        continue;
                    }

                    if (strpos($Request, "verb=ListRecords") !== FALSE)
                    {
                        if (strpos($Request, "resumptionToken")===FALSE)
                        {
                            $CurHarvest = $Key;
                            $ContinuationCounts[$CurHarvest] = 0;
                            continue;
                        }

                        if ($CurHarvest !== NULL)
                        {
                            $ContinuationCounts[$CurHarvest]++;
                        }
                    }
                }
                $ToFilter[]= $Key;
            }

            # add continuation counts in to harvest data
            foreach ($ContinuationCounts as $Key => $Count)
            {
                $H_HarvestData[$Key]["Count"] = $Count;
            }

            break;
    }

    # prune marked records
    foreach ($ToFilter as $Key)
    {
        unset($H_HarvestData[$Key]);
    }
}


$H_ListFields = array(
    "EventDate" => array(
        "Heading" => "Request Date",
        "DefaultSortField" => TRUE,
    ),
    "DataOne" => array(
        "Heading" => "Remote host",
        "NoSorting" => TRUE,
    ),
    "DataTwo" => array(
        "Heading" => "Request",
    ),
);

if ($H_FilterType == FT_CONTACT)
{
    $H_ListFields["DataTwo"]["Heading"] = "Sample Request";
    $H_ListFields["EventDate"]["Heading"] = "Most Recent Request";

    $H_ListFields["Count"] = array(
        "Heading" => "Request Count",
    );
}
elseif ($H_FilterType == FT_HARVEST ||
        $H_FilterType == FT_SETS)
{
    $H_ListFields["Count"] = array(
        "Heading" => "Continuation Count",
    );
}

# sort the data
$SortFunctions = array(
    "EventDate" => function($V1, $V2)
    {
        return StdLib::SortCompare(
            strtotime($V1["EventDate"]), strtotime($V2["EventDate"]));
    },
    "DataTwo" => function($V1, $V2)
    {
        return strcmp($V1["DataTwo"], $V2["DataTwo"]);
    },
    "Count" => function($V1, $V2)
    {
        return StdLib::SortCompare($V1["Count"], $V2["Count"]);
    }
);
uasort($H_HarvestData, $SortFunctions[$SortField]);

# reverse if requested
if ($RevSort)
{
    $H_HarvestData = array_reverse($H_HarvestData);
}

$H_BaseLink = "index.php?P=P_MetricsReporter_OAILog"
        ."&ST=".$H_StartTime
        ."&RP=".$H_ResultsPerPage
        ."&FT=".$H_FilterType;


# construct the TransportControls
$H_TransportUI = new TransportControlsUI(
    TransportControlsUI::NO_ITEM_TYPE,
    $H_ResultsPerPage);

$H_TotalResults = count($H_HarvestData);
$H_TransportUI->ItemCount($H_TotalResults);

$H_HarvestData = array_slice(
    $H_HarvestData,
    $H_TransportUI->StartingIndex(),
    $H_ResultsPerPage,
    TRUE);

# if the user requested JSON data, produce that as output
if (isset($_GET["JSON"]))
{
    $GLOBALS["AF"]->SuppressHTMLOutput();
    header("Content-Type: application/json; charset="
           .$GLOBALS["G_SysConfig"]->DefaultCharacterSet(), TRUE);

    print json_encode($H_HarvestData);
    return;
}