<?PHP
#
#   FILE:  UrlChecker.php
#
#   Part of the Collection Workflow Integration System (CWIS)
#   Copyright 2011-2014 Edward Almasy and Internet Scout Research Group
#   http://scout.wisc.edu/cwis/
#

/**
* Plugin to validate URL field values.
*/
class UrlChecker extends Plugin
{
    /**
    * @const FLAG_OFF_VALUE value used by the Resource class when a flag is off
    */
    const FLAG_OFF_VALUE = 0;

    /**
    * @const FLAG_ON_VALUE value used by the Resource class when a flag is on
    */
    const FLAG_ON_VALUE = 1;

    /**
    * The number of times a URL has to be found invalid to be considered an
    * invalid URL.
    */
    const INVALIDATION_THRESHOLD = 4;

    /**
    * The timeout value in seconds for URL checking connections.
    */
    const CONNECTION_TIMEOUT = 5.0;


    # ---- STANDARD PLUGIN INTERFACE -----------------------------------------

    /**
    * Register information about this plugin.
    */
    public function Register()
    {
        $this->Name = "URL Checker";
        $this->Version = "2.1.15";
        $this->Description = trim(preg_replace('/\s+/', ' ', '
            Periodically validates URL field values.
            <i>System Administrator</i> or <i>Collection Administrator</i> privilege
            is required to view the results.'));
        $this->Author = "Internet Scout";
        $this->Url = "http://scout.wisc.edu/cwis/";
        $this->Email = "scout@scout.wisc.edu";
        $this->Requires = array("CWISCore" => "2.9.0");
        $this->EnabledByDefault = FALSE;

        $RuleOptions = array();
        $FieldsToCheckOptions = array();

        $AllSchemas = MetadataSchema::GetAllSchemas();
        foreach ($AllSchemas as $Schema)
        {
            $FlagFields = $Schema->GetFields(MetadataSchema::MDFTYPE_FLAG);
            foreach ($FlagFields as $Field)
            {
                $RuleOptions[$Field->Id().":".self::FLAG_OFF_VALUE] =
                    $Schema->Name()." : ".$Field->Name() .
                        " is set to \"".$Field->FlagOffLabel() . "\"";
                $RuleOptions[$Field->Id().":".self::FLAG_ON_VALUE] =
                    $Schema->Name()." : ". $Field->Name() .
                        " is set to \"" . $Field->FlagOnLabel() . "\"";
            }

            $TimestampFields = $Schema->GetFields(MetadataSchema::MDFTYPE_TIMESTAMP);
            foreach ($TimestampFields as $Field)
            {
                $RuleOptions[$Field->Id().":PAST"] =
                    $Schema->Name()." : ".$Field->Name() . " is in the past";
            }

            $UrlFields = $Schema->GetFields(MetadataSchema::MDFTYPE_URL);
            foreach ($UrlFields as $Field)
            {
                $FieldsToCheckOptions[$Field->Id()] =
                    $Schema->Name()." : ".$Field->Name();
            }
        }

        $this->CfgSetup["TaskPriority"] = array(
            "Type" => "Option",
            "Label" => "Task Priority",
            "Help" => "Priority of the URL checking tasks in the task queue.",
            "AllowMultiple" => FALSE,
            "Options" => array(
                ApplicationFramework::PRIORITY_BACKGROUND => "Background",
                ApplicationFramework::PRIORITY_LOW => "Low",
                ApplicationFramework::PRIORITY_MEDIUM => "Medium",
                ApplicationFramework::PRIORITY_HIGH => "High"));

        $this->CfgSetup["FieldsToCheck"] = array(
            "Type" => "Option",
            "Label" => "Fields to check",
            "Help" => "Check links in the selected URL fields.",
            "AllowMultiple" => TRUE,
            "Rows" => count($FieldsToCheckOptions),
            "Options" => $FieldsToCheckOptions);

        $this->CfgSetup["DontCheck"] = array(
            "Type" => "Option",
            "Label" => "Don't check URLs if",
            "Help" => "Don't check the URLs of resources matching these conditions.",
            "AllowMultiple" => TRUE,
            "Rows" => count($RuleOptions),
            "Options" => $RuleOptions);

        $this->CfgSetup["EnableDeveloper"] = array(
            "Type" => "Flag",
            "Label" => "Enable Developer Interface",
            "Help" => "Enable an additional developer interface "
                ."to aid in debugging the plugin.",
            "OnLabel" => "Yes",
            "OffLabel" => "No");

        $this->CfgSetup["NumToCheck"] = array(
            "Type" => "Number",
            "Label" => "Resources to Check",
            "Help" => "The number of resources to include in a batch of checks",
        );

        $this->CfgSetup["CheckDelay"] = array(
            "Type" => "Number",
            "Label" => "Check Delay",
            "Help" => "The number of minutes between tasks that start batches"
            ." of URL checks.  If the previous batch has not finished, nothing"
            ." new will be queued",
            );
    }

    /**
    * Make some config settings available when running in the background.
    */
    public function Initialize()
    {
        $this->Rules = $this->ConfigSetting("DontCheck");
        $this->FieldsToCheck = $this->ConfigSetting("FieldsToCheck");
    }

    /**
    * Create the database tables necessary to use this plugin.
    * @return NULL if everything went OK or an error message otherwise
    */
    public function Install()
    {
        $DB = new Database();

        # resource history table
        if (FALSE === $DB->Query("
            CREATE TABLE IF NOT EXISTS UrlChecker_ResourceHistory (
                ResourceId     INT,
                CheckDate      TIMESTAMP,
                PRIMARY KEY    (ResourceId)
            );"))
        { return "Could not create the resource history table"; }

        # url history table
        if (FALSE === $DB->Query("
            CREATE TABLE IF NOT EXISTS UrlChecker_UrlHistory (
                ResourceId          INT,
                FieldId             INT,
                Hidden              INT,
                CheckDate           TIMESTAMP,
                TimesInvalid        INT,
                Url                 TEXT,
                StatusCode          SMALLINT,
                ReasonPhrase        TEXT,
                IsFinalUrlInvalid   INT,
                FinalUrl            TEXT,
                FinalStatusCode     SMALLINT,
                FinalReasonPhrase   TEXT,
                PRIMARY KEY         (ResourceId, FieldId)
            );"))
        { return "Could not create the URL history table"; }

        # set default settings
        $this->ConfigSetting("EnableDeveloper", FALSE);
        $this->ConfigSetting("NextNormalUrlCheck", 0);
        $this->ConfigSetting("NextInvalidUrlCheck", 0);
        $this->ConfigSetting("TaskPriority", ApplicationFramework::PRIORITY_BACKGROUND);
        $this->ConfigSetting("NumToCheck", 250);
        $this->ConfigSetting("CheckDelay", 15);

        # Default to checking all fields:
        $FieldsToCheck = array();
        $AllSchemas = MetadataSchema::GetAllSchemas();
        foreach ($AllSchemas as $Schema)
        {
            $UrlFields = $Schema->GetFields(MetadataSchema::MDFTYPE_URL);
            foreach ($UrlFields as $Field)
            {
                $FieldsToCheck[]= $Field->Id();
            }
        }
        $this->ConfigSetting("FieldsToCheck", $FieldsToCheck);

        return NULL;
    }

    /**
    * Only save the Rules variable when sleeping.
    * @return The variables to save when serializing.
    */
    public function __sleep()
    {
        return array("Rules", "FieldsToCheck");
    }

    /**
    * Uninstall the plugin.
    * @return NULL|string NULL if successful or an error message otherwise
    */
    public function Uninstall()
    {
        $Database = new Database();

        # resource history table
        if (FALSE === $Database->Query("DROP TABLE UrlChecker_ResourceHistory;"))
        { return "Could not remove the resource history table"; }

        # URL history table
        if (FALSE === $Database->Query("DROP TABLE UrlChecker_UrlHistory;"))
        { return "Could not remove the URL history table"; }
    }

    /**
    * Upgrade from a previous version.
    * @param string $PreviousVersion Previous version number.
    */
    public function Upgrade($PreviousVersion)
    {
        # upgrade from versions < 2.0.0 to 2.0.0
        if (version_compare($PreviousVersion, "2.0.0", "<"))
        {
            $DB = new Database();

            // make the upgrade process fault tolerant
            $DB->SetQueryErrorsToIgnore(array(
                '/ALTER\s+TABLE\s+[^\s]+\s+CHANGE\s+.+/i'
                  => '/Unknown\s+column\s+[^\s]+\s+in\s+[^\s]+/i',
                '/ALTER\s+TABLE\s+[^\s]+\s+CHANGE\s+.+/i'
                  => '/Table\s+[^\s]+\s+doesn\'t\s+exist/i',
                '/ALTER\s+TABLE\s+[^\s]+\s+ADD\s+.+/i'
                  => '/Duplicate\s+column\s+name\s+[^\s]+/i',
                '/ALTER\s+TABLE\s+[^\s]+\s+ADD\s+.+/i'
                  => '/Table\s+[^\s]+\s+doesn\'t\s+exist/i',
                '/RENAME\s+TABLE\s+[^\s]+\s+TO\s+[^\s]+/i'
                  => '/Table\s+[^\s]+\s+already\s+exists/i',
                '/CREATE\s+TABLE\s+[^\s]+\s+\([^)]+\)/i'
                  => '/Table\s+[^\s]+\s+already\s+exists/i'));

            # rename columns
            if (FALSE === $DB->Query("
                ALTER TABLE UrlChecker_Failures
                CHANGE DateChecked CheckDate TIMESTAMP"))
            { return "Could not update the URL history CheckDate column"; }
            if (FALSE === $DB->Query("
                ALTER TABLE UrlChecker_Failures
                CHANGE TimesFailed TimesInvalid INT"))
            { return "Could not update the TimesInvalid column"; }
            if (FALSE === $DB->Query("
                ALTER TABLE UrlChecker_Failures
                CHANGE StatusNo StatusCode INT"))
            { return "Could not update the StatusCode column"; }
            if (FALSE === $DB->Query("
                ALTER TABLE UrlChecker_Failures
                CHANGE StatusText ReasonPhrase TEXT"))
            { return "Could not update the ReasonPhrase column"; }
            if (FALSE === $DB->Query("
                ALTER TABLE UrlChecker_Failures
                CHANGE DataOne FinalStatusCode INT DEFAULT -1"))
            { return "Could not update the FinalStatusCode column"; }
            if (FALSE === $DB->Query("
                ALTER TABLE UrlChecker_Failures
                CHANGE DataTwo FinalUrl TEXT"))
            { return "Could not update the FinalUrl column"; }
            if (FALSE === $DB->Query("
                ALTER TABLE UrlChecker_History
                CHANGE DateChecked CheckDate TIMESTAMP"))
            { return "Could not update the resource history CheckDate column"; }

            # add columns
            if (FALSE === $DB->Query("
                ALTER TABLE UrlChecker_Failures
                ADD Hidden INT DEFAULT 0
                AFTER FieldId"))
            { return "Could not add the Hidden column"; }
            if (FALSE === $DB->Query("
                ALTER TABLE UrlChecker_Failures
                ADD IsFinalUrlInvalid INT DEFAULT 0
                AFTER ReasonPhrase"))
            { return "Could not add the IsFinalUrlInvalid column"; }
            if (FALSE === $DB->Query("
                ALTER TABLE UrlChecker_Failures
                ADD FinalReasonPhrase TEXT"))
            { return "Could not add the FinalReasonPhrase column"; }

            # rename history tables
            if (FALSE === $DB->Query("
                RENAME TABLE UrlChecker_Failures
                TO UrlChecker_UrlHistory"))
            { return "Could not rename the URL history table"; }
            if (FALSE === $DB->Query("
                RENAME TABLE UrlChecker_History
                TO UrlChecker_ResourceHistory"))
            { return "Could not rename the resource history table"; }

            # remove any garbage data
            if (FALSE === $DB->Query("
                DELETE FROM UrlChecker_UrlHistory
                WHERE ResourceId < 0"))
            { return "Could not remove stale data from the URL history"; }
            if (FALSE === $DB->Query("
                DELETE FROM UrlChecker_ResourceHistory
                WHERE ResourceId < 0"))
            { return "Could not remove stale data from the resource history"; }

            # add settings table
            if (FALSE === $DB->Query("
                CREATE TABLE UrlChecker_Settings (
                    NextNormalUrlCheck     INT,
                    NextInvalidUrlCheck    INT
                );"))
            { return "Could not create the settings table"; }

            # repair and optimize tables after the changes. if this isn't done,
            # weird ordering issues might pop up
            if (FALSE === $DB->Query("
                REPAIR TABLE UrlChecker_UrlHistory"))
            { return "Could not repair the URL history table"; }
            if (FALSE === $DB->Query("
                REPAIR TABLE UrlChecker_ResourceHistory"))
            { return "Could not repair the resource history table"; }
            if (FALSE === $DB->Query("
                OPTIMIZE TABLE UrlChecker_UrlHistory"))
            { return "Could not optimize the URL history table"; }
            if (FALSE === $DB->Query("
                OPTIMIZE TABLE UrlChecker_ResourceHistory"))
            { return "Could not optimize the resource history table"; }
        }

        # upgrade from version 2.0.0 to 2.1.0
        if (version_compare($PreviousVersion, "2.1.0", "<"))
        {
            $DB = new Database();

            // make the upgrade process fault tolerant
            // @codingStandardsIgnoreStart
            $DB->SetQueryErrorsToIgnore(array(
                '/ALTER\s+TABLE\s+[^\s]+\s+ADD\s+.+/i'
                  => '/Duplicate\s+column\s+name\s+[^\s]+/i',
                '/ALTER\s+TABLE\s+[^\s]+\s+DROP\s+.+/i'
                  => '/Can\'t\s+DROP\s+[^\s;]+;\s+check\s+that\s+column\/key\s+exists/i'));
            // @codingStandardsIgnoreEnd

            # get old settings data
            if (FALSE === $DB->Query("
                SELECT * FROM UrlChecker_Settings LIMIT 1"))
            { return "Could not get settings data"; }

            if ($DB->NumRowsSelected())
            {
                $Row = $DB->FetchRow();
                $NextNormalUrlCheck = $Row["NextNormalUrlCheck"];
                $NextInvalidUrlCheck = $Row["NextInvalidUrlCheck"];
            }
            else
            {
                $NextNormalUrlCheck = 0;
                $NextInvalidUrlCheck = 0;
            }

            # add column
            if (FALSE === $DB->Query("
                ALTER TABLE UrlChecker_Settings
                ADD Name Text"))
            { return "Could not add the Name column"; }
            if (FALSE === $DB->Query("
                ALTER TABLE UrlChecker_Settings
                ADD Value Text"))
            { return "Could not add the Value column"; }

            # remove old columns
            if (FALSE === $DB->Query("
                ALTER TABLE UrlChecker_Settings
                DROP NextNormalUrlCheck"))
            { return "Could not remove the NextNormalUrlCheck Column"; }
            if (FALSE === $DB->Query("
                ALTER TABLE UrlChecker_Settings
                DROP NextInvalidUrlCheck"))
            { return "Could not remove the NextInvalidUrlCheck Column"; }

            # remove any garbage data from the tables
            if (FALSE === $DB->Query("
                DELETE FROM UrlChecker_UrlHistory
                WHERE ResourceId < 0"))
            { return "Could not remove stale data from the URL history"; }
            if (FALSE === $DB->Query("
                DELETE FROM UrlChecker_ResourceHistory
                WHERE ResourceId < 0"))
            { return "Could not remove stale data from the resource history"; }

            # this makes sure that no garbage rows exist
            if (FALSE === $DB->Query("
                DELETE FROM UrlChecker_Settings"))
            { return "Could not remove stale data from the settings table"; }

            # add settings back into the table
            if (FALSE === $DB->Query("
                INSERT INTO UrlChecker_Settings (Name, Value)
                VALUES
                ('NextNormalUrlCheck', '".addslashes($NextNormalUrlCheck)."'),
                ('NextInvalidUrlCheck', '".addslashes($NextInvalidUrlCheck)."'),
                ('EnableDeveloper', '0')"))
            { return "Could not initialize the updated settings"; }

            # repair and optimize the settings table after the changes
            if (FALSE === $DB->Query("
                REPAIR TABLE UrlChecker_Settings"))
            { return "Could not repair the settings table"; }
            if (FALSE === $DB->Query("
                OPTIMIZE TABLE UrlChecker_Settings"))
            { return "Could not optimize the settings table"; }
        }

        # upgrade from version 2.1.0 to 2.1.1
        if (version_compare($PreviousVersion, "2.1.1", "<"))
        {
            $DB = new Database();

            # remove old garbage data
            if (FALSE === $DB->Query("
                DELETE FROM UrlChecker_UrlHistory
                WHERE Url NOT REGEXP '^https?:\/\/'"))
            { return "Could not remove stale data from the URL history"; }
        }

        # upgrade to version 2.1.4
        if (version_compare($PreviousVersion, "2.1.4", "<"))
        {
            $this->ConfigSetting("TaskPriority",
                ApplicationFramework::PRIORITY_BACKGROUND);
        }

        # upgrade to version 2.1.10
        if (version_compare($PreviousVersion, "2.1.10", "<"))
        {
            $DB = new Database();

            # make the upgrade process fault tolerant
            $DB->SetQueryErrorsToIgnore(array(
                '/DROP\s+.+/i'
                  => '/Unknown\s+table/i',
                '/SELECT\s+.+/i'
                  => '/doesn\'t\s+exist/i'));

            # get old settings data if possible
            $Result = $DB->Query("SELECT * FROM UrlChecker_Settings");

            $OldSettings = array();

            # if the query succeeded
            if ($Result)
            {
                # add the old settings to the array
                while (FALSE !== ($Row = $DB->FetchRow()))
                {
                    $OldSettings[$Row["Name"]] = intval($Row["Value"]);
                }
            }

            # migrate the data to the settings for the plugin
            $this->ConfigSetting("EnableDeveloper",
                    (bool)GetArrayValue($OldSettings, "EnableDeveloper", FALSE));
            $this->ConfigSetting("NextNormalUrlCheck",
                    GetArrayValue($OldSettings, "NextNormalUrlCheck", 0));
            $this->ConfigSetting("NextInvalidUrlCheck",
                    GetArrayValue($OldSettings, "NextInvalidUrlCheck", 0));

            # remove the old settings table if possible
            $DB->Query("DROP TABLE UrlChecker_Settings;");
        }

        # upgrade to version 2.1.11
        if (version_compare($PreviousVersion, "2.1.11", "<"))
        {
            $DB = new Database();

            # make the upgrade process fault tolerant
            $DB->SetQueryErrorsToIgnore(array(
                '/ALTER\s+.+/i'
                  => '/Duplicate\s+column\s+name/i'));

            # add the Time column if possible
            $DB->Query("
                ALTER TABLE UrlChecker_ResourceHistory
                ADD Time INT DEFAULT ".intval(self::CONNECTION_TIMEOUT));

            # reset the check times (invalid less than normal to make sure an
            # invalid check is performed first)
            $this->ConfigSetting("NextNormalUrlCheck", 1);
            $this->ConfigSetting("NextInvalidUrlCheck", 0);
        }

        if (version_compare($PreviousVersion, "2.1.12", "<"))
        {
            $this->ConfigSetting("NumToCheck", 500);
        }

        if (version_compare($PreviousVersion, "2.1.13", "<"))
        {
            # If people have left the default in place,
            # change it to the new default.
            if ($this->ConfigSetting("NumToCheck") == 500)
            {
                $this->ConfigSetting("NumToCheck", 250);
            }

            # Default to checking all URL fields:
            $FieldsToCheck = array();
            $AllSchemas = MetadataSchema::GetAllSchemas();
            foreach ($AllSchemas as $Schema)
            {
                $UrlFields = $Schema->GetFields(MetadataSchema::MDFTYPE_URL);
                foreach ($UrlFields as $Field)
                {
                    $FieldsToCheck[]= $Field->Id();
                }
            }
            $this->ConfigSetting("FieldsToCheck", $FieldsToCheck);
        }

        if (version_compare($PreviousVersion, "2.1.14", "<"))
        {
            $DB = new Database();

            $DB->SetQueryErrorsToIgnore(array(
              '/ALTER\s+.+/i'
                  => '/check\sthat\scolumn\/key\sexists/i'));

            $DB->Query(
              "ALTER TABLE UrlChecker_ResourceHistory"
              ." DROP COLUMN Time");

             $this->ConfigSetting("CheckDelay", 15);
        }

        return NULL;
    }

    /**
    * Declare the events this plugin provides to the application framework.
    * @return an array of the events this plugin provides
    */
    public function DeclareEvents()
    {
        return array(
            # this event should get hooked by an outside plugin
            "URLCHECKER_SET_RESOURCE_RELEASE_CALLBACKS"
              => ApplicationFramework::EVENTTYPE_FIRST,
            "URLCHECKER_USING_CUSTOM_RELEASE_CALLBACKS"
              => ApplicationFramework::EVENTTYPE_FIRST,
            "URLCHECKER_GET_INFORMATION"
              => ApplicationFramework::EVENTTYPE_FIRST,
            "URLCHECKER_CHECK_RESOURCE_URLS"
              => ApplicationFramework::EVENTTYPE_DEFAULT,
            "URLCHECKER_GET_INVALID_COUNT"
              => ApplicationFramework::EVENTTYPE_FIRST,
            "URLCHECKER_GET_INVALID_URLS"
              => ApplicationFramework::EVENTTYPE_FIRST,
            "URLCHECKER_GET_INVALID_URL"
              => ApplicationFramework::EVENTTYPE_FIRST,
            "URLCHECKER_IS_RESOURCE_RELEASED"
              => ApplicationFramework::EVENTTYPE_FIRST,
            "URLCHECKER_RELEASE_RESOURCE"
              => ApplicationFramework::EVENTTYPE_DEFAULT,
            "URLCHECKER_WITHHOLD_RESOURCE"
              => ApplicationFramework::EVENTTYPE_DEFAULT,
            "URLCHECKER_HIDE_URL"
              => ApplicationFramework::EVENTTYPE_DEFAULT,
            "URLCHECKER_UNHIDE_URL"
              => ApplicationFramework::EVENTTYPE_DEFAULT,
            "URLCHECKER_GET_NEXT_RESOURCES_TO_BE_CHECKED"
              => ApplicationFramework::EVENTTYPE_FIRST,
            "URLCHECKER_GET_NEXT_URLS_TO_BE_CHECKED"
              => ApplicationFramework::EVENTTYPE_FIRST,
            "URLCHECKER_QUEUE_TASKS_NOW"
              => ApplicationFramework::EVENTTYPE_DEFAULT);
    }

    /**
    * Hook the events into the application framework.
    * @return an array of events to be hooked into the application framework
    */
    public function HookEvents()
    {
        // @codingStandardsIgnoreStart
        $Events = array(
            # this is useful for debugging but otherwise shouldn't be used
            # "EVENT_HTML_FILE_LOAD_COMPLETE" => "QueueResourceCheckTasks",

            "EVENT_COLLECTION_ADMINISTRATION_MENU" => "DeclareColAdminPages",
            "EVENT_PAGE_LOAD" => "SetResourceReleaseCallbacks",
            "EVENT_PERIODIC" => "QueueResourceCheckTasks",
            "EVENT_FIELD_ADDED" => "AddUrlField",
            "EVENT_PRE_FIELD_DELETE" => "RemoveUrlField",
            "EVENT_PLUGIN_CONFIG_CHANGE" => "HandleConfigChange",
            "URLCHECKER_USING_CUSTOM_RELEASE_CALLBACKS" => "UsingCustomReleaseCallbacks",
            "URLCHECKER_GET_INFORMATION" => "GetInformation",
            "URLCHECKER_CHECK_RESOURCE_URLS" => "CheckResourceUrls",
            "URLCHECKER_GET_INVALID_COUNT" => "GetInvalidCount",
            "URLCHECKER_GET_INVALID_URLS" => "GetInvalidUrls",
            "URLCHECKER_GET_INVALID_URL" => "GetInvalidUrl",
            "URLCHECKER_IS_RESOURCE_RELEASED" => "IsResourceReleased",
            "URLCHECKER_RELEASE_RESOURCE" => "ReleaseResource",
            "URLCHECKER_WITHHOLD_RESOURCE" => "WithholdResource",
            "URLCHECKER_HIDE_URL" => "HideUrl",
            "URLCHECKER_UNHIDE_URL" => "UnhideUrl",
            "URLCHECKER_GET_NEXT_RESOURCES_TO_BE_CHECKED" => "GetNextResourcesToBeChecked",
            "URLCHECKER_GET_NEXT_URLS_TO_BE_CHECKED" => "GetNextUrlsToBeChecked");

        if ($this->ConfigSetting("EnableDeveloper"))
        {
            $Events["EVENT_DEVELOPER_SUPPORT_MENU"] = "DeclareSysAdminPages";
            $Events["URLCHECKER_QUEUE_TASKS_NOW"] = "QueueResourceCheckTasks";
        }
        // @codingStandardsIgnoreEnd

        return $Events;
    }


    # ---- HOOKED METHODS ----------------------------------------------------

    /**
    * Add page hooks for the collection administration section.
    * @return map page name to page title for the application framework
    */
    public function DeclareColAdminPages()
    {
        $Pages = array(
            "Results" => "URL Checker Results",
            "ConfigureActions" => "URL Checker Release/Withhold Configuration",
            );

        if ($this->ConfigSetting("EnableDeveloper"))
        {
            $Pages["HiddenUrls"] = "URL Checker Hidden URLs";
        }

        return $Pages;
    }

    /**
    * Add page hooks for the system administration section. This should only
    * be called if EnableDeveloper is TRUE.
    * @return map page name to page title for the application framework
    */
    public function DeclareSysAdminPages()
    {
        return array(
            "Developer" => "URL Checker Developer Page",
            );
    }

    /**
    * Signal to set custom resource releasing/withholding callbacks on page
    * load.
    */
    public function SetResourceReleaseCallbacks()
    {
        $Callbacks = $GLOBALS["AF"]->SignalEvent(
            "URLCHECKER_SET_RESOURCE_RELEASE_CALLBACKS");

        if (is_array($Callbacks) && count($Callbacks) == 3
            && is_callable($Callbacks[0]) && is_callable($Callbacks[1])
            && is_callable($Callbacks[2]))
        {
            $this->IsResourceReleasedCallback = $Callbacks[0];
            $this->ReleaseResourceCallback = $Callbacks[1];
            $this->WithholdResourceCallback = $Callbacks[2];
        }
    }

    /**
    * Return whether or not custom callbacks are set.
    * @return TRUE if custom callbacks are set, FALSE otherwise
    */
    public function UsingCustomReleaseCallbacks()
    {
        # if callbacks are set, then so will this data member
        return isset($this->IsResourceReleasedCallback);
    }

    /**
    * Queue tasks to check resource URLs for resources that need to be checked.
    * @return Returns the amount of time before this should be called again, in
    *      minutes.
    */
    public function QueueResourceCheckTasks()
    {
        # don't waste time and resources if there aren't any URL fields
        if (count($this->GetUrlFields()) == 0)
        {
            return 60;
        }

        # come back in five minutes if there are URLs still being checked
        if ($GLOBALS["AF"]->GetQueuedTaskCount(array($this, "CheckResourceUrls")))
        {
            return 5;
        }

        # Get the list of failing URLs that need to be checked, and the list of
        # resources that are due for a check.  This will give us somewhere between
        #  0 and 2 * $NumToCheck elements.
        $Urls = $this->GetNextUrlsToBeChecked();
        $Resources = $this->GetNextResourcesToBeChecked();

        # If we have anything to do:
        if (count($Urls)>0 || count($Resources)>0)
        {
            # Divide our checks among Urls and Resources, with weighting
            # determined by the number of each check type.  If we've got
            # equal numbers of both, then the split will be 50/50.  If we've
            # got N Url checks and 2N Resource checks, then 1/3 of the
            # checks will go to URLs and 2/3 to Resources.

            $NumToCheck = $this->ConfigSetting("NumToCheck");
            $PctUrls = count($Urls) / (count($Urls) + count($Resources) );

            $Urls      = array_slice(
                $Urls, 0, round( $PctUrls * $NumToCheck), TRUE);
            $Resources = array_slice(
                $Resources, 0, round( (1-$PctUrls) * $NumToCheck), TRUE);

            # Note: In the code below, we do not check our exclusion rules
            # and queue a check for all resources / urls.  This is
            # because the CheckResourceUrls tasks queued by
            # QueueResourceCheckTask() still need to run to do some
            # bookkeeping in the database.
            foreach ($Urls as $Url)
            {
                $Resource = new UrlChecker_Resource($Url->ResourceId);
                $this->QueueResourceCheckTask($Resource);
            }

            foreach ($Resources as $ResourceId => $CheckDate)
            {
                $Resource = new UrlChecker_Resource($ResourceId, $CheckDate);
                $this->QueueResourceCheckTask($Resource);
            }
        }

        return $this->ConfigSetting("CheckDelay");
    }

    /**
    * Get information/stats of the various data saved.
    * @return array of various information
    */
    public function GetInformation()
    {
        $this->RemoveStaleData();

        $DB = new Database();
        $Info = array();

        # database settings
        $Info["EnableDeveloper"] = intval($this->ConfigSetting("EnableDeveloper"));
        $Info["NumToCheck"] = $this->ConfigSetting("NumToCheck");

        # hard-coded settings
        $Info["Timeout"] = self::CONNECTION_TIMEOUT;
        $Info["Threshold"] = self::INVALIDATION_THRESHOLD;

        # the number of resources checked so far
        $DB->Query("SELECT COUNT(*) as NumChecked FROM UrlChecker_ResourceHistory");
        $Info["NumResourcesChecked"] = intval($DB->FetchField("NumChecked"));

        # the number of resources that haven't been checked so far (don't count
        # resources with IDs < 0 since they're probably bad)
        $DB->Query("
            SELECT COUNT(*) as NumResources
            FROM Resources
            WHERE ResourceId >= 0");
        $Info["NumResourcesUnchecked"] = intval($DB->FetchField("NumResources"))
            - $Info["NumResourcesChecked"];

        # the number of the invalid URLs past the threshold and "not hidden"
        $DB->Query("
            SELECT COUNT(*) as NumInvalid
            FROM UrlChecker_UrlHistory
            WHERE Hidden = 0
            AND TimesInvalid > ".self::INVALIDATION_THRESHOLD);
        $Info["NumInvalid"] = intval($DB->FetchField("NumInvalid"));

        # the number of the invalid URLs past the threshold and hidden
        $DB->Query("
            SELECT COUNT(*) as NumInvalid
            FROM UrlChecker_UrlHistory
            WHERE Hidden = 1
            AND TimesInvalid > ".self::INVALIDATION_THRESHOLD);
        $Info["NumInvalidAndHidden"] = intval($DB->FetchField("NumInvalid"));

        # the number of possibly invalid urls
        $DB->Query("
            SELECT COUNT(*) as NumInvalid
            FROM UrlChecker_UrlHistory
            WHERE TimesInvalid <= ".self::INVALIDATION_THRESHOLD);
        $Info["NumPossiblyInvalid"] = intval($DB->FetchField("NumInvalid"));

        # the number of "not hidden" invalid URLs for each status code
        $Info["InvalidUrlsForStatusCodes"] = array();
        $DB->Query("
            SELECT StatusCode, COUNT(*) as NumInvalid
            FROM UrlChecker_UrlHistory
            WHERE Hidden = 0
            AND TimesInvalid > ".self::INVALIDATION_THRESHOLD."
            GROUP BY StatusCode");
        while (FALSE !== ($Row = $DB->FetchRow()))
        {
            $Info["InvalidUrlsForStatusCodes"][intval($Row["StatusCode"])]
                = intval($Row["NumInvalid"]);
        }

        # the number of "hidden" invalid URLs for each status code
        $Info["HiddenInvalidUrlsForStatusCodes"] = array();
        $DB->Query("
            SELECT StatusCode, COUNT(*) as NumInvalid
            FROM UrlChecker_UrlHistory
            WHERE Hidden = 1
            AND TimesInvalid > ".self::INVALIDATION_THRESHOLD."
            GROUP BY StatusCode");
        while (FALSE !== ($Row = $DB->FetchRow()))
        {
            $Info["HiddenInvalidUrlsForStatusCodes"][intval($Row["StatusCode"])]
                = intval($Row["NumInvalid"]);
        }

        # if using custom callbacks
        $Info["UsingCustomReleaseCallbacks"] =
            ($this->UsingCustomReleaseCallbacks()) ? "Yes" : "No";

        # the last time a check was done
        $DB->Query("
            SELECT *
            FROM UrlChecker_ResourceHistory
            ORDER BY CheckDate DESC LIMIT 1");
        $Info["DateLastResourceChecked"] = $DB->FetchField("CheckDate");

        # the next time a check will be performed
        $Info["DateOfNextCheck"] = $this->GetDateOfNextCheck();

        # version information
        $Info["Version"] = $this->Version;
        $Info["CwisVersion"] = CWIS_VERSION;
        $Info["PhpVersion"] = PHP_VERSION;

        return $Info;
    }

    /**
    * Check all of the URL metadata field values for the given resource.
    * @param int $ResourceId ID of Resource to check.
    * @param string $CheckDate Date resource was last checked.
    */
    public function CheckResourceUrls($ResourceId, $CheckDate)
    {
        $DB = new Database();

        # instantiate resource
        $Resource = is_object($ResourceId) ? $ResourceId
                : new UrlChecker_Resource($ResourceId, $CheckDate);

        # the URLs for the resource should not be checked
        if ($this->ShouldNotCheckUrls($Resource))
        {
            # record that the resource was checked
            $this->UpdateResourceHistory($Resource);

            # clear out the URL history
            $DB->Query("
                DELETE FROM UrlChecker_UrlHistory
                WHERE ResourceId = '".addslashes($Resource->Id())."'");

            # don't check any URLs
            return;
        }

        foreach ($this->GetUrlFields() as $Field)
        {
            # skip URL fields from other schemas
            if ($Field->SchemaId() != $Resource->SchemaId())
            {
                continue;
            }

            $Url = $Resource->Get($Field);

            # get the url's http status
            $Info = $this->GetHttpInformation($Url);

            # remove old failure data, if any, if the url is ok
            if ($Info["StatusCode"] == -1 || ($Info["StatusCode"] == 200
                && $this->HasValidContent($Resource->Get($Field))))
            {
                # delete/insert record (fragmentation? mysql: prob. not, pgsql: no)
                # avoids any sync issues and self-heals if sync issues do arise
                $DB->Query("LOCK TABLES UrlChecker_UrlHistory WRITE");
                $DB->Query("
                    DELETE FROM UrlChecker_UrlHistory
                    WHERE ResourceId = '".intval($Resource->Id())."'
                    AND FieldId = '".intval($Field->Id())."'");
                $DB->Query("UNLOCK TABLES");
            }

            # record a failure since there was a problem
            else
            {
                $DB->Query("
                    SELECT * FROM UrlChecker_UrlHistory
                    WHERE ResourceId = '".intval($Resource->Id())."'
                    AND FieldId = '".intval($Field->Id())."'");

                # try to use an existing TimesInvalid value if possible and the
                # HTTP info is not too different
                $TimesInvalid = 1;
                $Hidden = 0;
                if (FALSE !== ($Row = $DB->FetchRow())
                    && $Row["StatusCode"] == strval($Info["StatusCode"])
                    && $Row["FinalStatusCode"] == strval($Info["FinalStatusCode"]))
                {
                    # the URL hasn't changed at all
                    if ($Row["FinalUrl"] == $Info["FinalUrl"])
                    {
                        $TimesInvalid = intval($Row["TimesInvalid"]) + 1;
                        $Hidden = intval($Row["Hidden"]);
                    }

                    # if the server uses cookies, and there is a redirect, the
                    # URL is likely to change every time a check takes place.
                    # thus, only check the host portions if those conditions are
                    # true
                    else if ($Row["StatusCode"]{0} == "3" && $Info["UsesCookies"])
                    {
                        $DbUrl = @parse_url($Row["FinalUrl"]);
                        $NewUrl = @parse_url($Info["FinalUrl"]);

                        if ($DbUrl && $NewUrl && isset($DbUrl["host"])
                            && isset($NewUrl["host"])
                            && $DbUrl["host"] == $NewUrl["host"])
                        {
                            $TimesInvalid = intval($Row["TimesInvalid"]) + 1;
                            $Hidden = intval($Row["Hidden"]);
                        }
                    }
                }

                if ($Info["FinalStatusCode"] == 200
                    && !$this->HasValidContent($Info["FinalUrl"]))
                {
                    $IsFinalUrlInvalid = 1;
                }

                else
                {
                    $IsFinalUrlInvalid = 0;
                }

                # add the new row with the updated info
                $DB->Query("LOCK TABLES UrlChecker_UrlHistory WRITE");
                $DB->Query("
                    DELETE FROM UrlChecker_UrlHistory
                    WHERE ResourceId = '".intval($Resource->Id())."'
                    AND FieldId = '".intval($Field->Id())."'");
                $DB->Query("
                    INSERT INTO UrlChecker_UrlHistory SET
                    ResourceId = '".intval($Resource->Id())."',
                    FieldId = '".intval($Field->Id())."',
                    Hidden = '".$Hidden."',
                    TimesInvalid = ".intval($TimesInvalid).",
                    Url = '".addslashes($Resource->Get($Field))."',
                    StatusCode = '".intval($Info["StatusCode"])."',
                    ReasonPhrase = '".addslashes($Info["ReasonPhrase"])."',
                    IsFinalUrlInvalid = '".$IsFinalUrlInvalid."',
                    FinalUrl = '".addslashes($Info["FinalUrl"])."',
                    FinalStatusCode = '".intval($Info["FinalStatusCode"])."',
                    FinalReasonPhrase = '".addslashes($Info["FinalReasonPhrase"])."'");
                $DB->Query("UNLOCK TABLES");
            }
        }

        # record that the resource was checked
        $this->UpdateResourceHistory($Resource);
    }

    /**
    * Get the number of invalid URLs that match the given constraints
    * @param array $Constraints Array of constraints.
    * @return int The number of invalid URLs that match the constraints
    */
    public function GetInvalidCount(array $Constraints = array())
    {
        $this->RemoveStaleData();

        $DB = new Database();
        $ValidRelations = array("=", "!=", "<", ">", "<=", ">=");

        # construct the where constraint
        $Where = " WHERE URH.TimesInvalid > ".self::INVALIDATION_THRESHOLD." ";
        $OuterGroup = "";
        foreach ($Constraints as $ConstraintList)
        {
            # skip invalid constraints
            if (!($ConstraintList instanceof UrlChecker_ConstraintList))
            {
                continue;
            }

            $InnerGroup = "";
            foreach ($ConstraintList as $Constraint)
            {
                $Key = $Constraint->Key;
                $Value = $Constraint->Value;
                $Relation = $Constraint->Relation;

                # skip if the relation is invalid
                if (!in_array($Relation, $ValidRelations))
                {
                    continue;
                }

                # Resource table constraint
                if ($Key instanceof MetadataField
                    && $Key->Status == MetadataSchema::MDFSTAT_OK)
                {
                    $LogicOperator = (strlen($InnerGroup)) ? "AND" : "";
                    $InnerGroup .= " ".$LogicOperator." R.".$Key->DBFieldName();
                    $InnerGroup .= " ".$Relation." '".addslashes($Value)."'";
                }

                # UrlChecker_History table constraint
                else if (is_string($Key))
                {
                    $LogicOperator = (strlen($InnerGroup)) ? "AND" : "";
                    $InnerGroup .= " ".$LogicOperator." URH.".$Key;
                    $InnerGroup .= " ".$Relation." '".addslashes($Value)."'";
                }

                # otherwise ignore the invalid key value
            }

            if (strlen($InnerGroup))
            {
                $OuterGroup .= (strlen($OuterGroup)) ? " OR " : "";
                $OuterGroup .= " ( " . $InnerGroup . " ) ";
            }
        }

        if (strlen($OuterGroup))
        {
            $Where .= " AND " . $OuterGroup;
        }

        # get the url data
        $DB->Query("
            SELECT COUNT(*) AS NumInvalid
            FROM UrlChecker_UrlHistory URH
            LEFT JOIN Resources R
            ON URH.ResourceId = R.ResourceId
            ".$Where);

        return intval($DB->FetchField("NumInvalid"));
    }

    /**
    * Get the invalid URLs that match the given constraints.
    * @param array $Constraints Array of constraints
    * @param string $OrderBy Field by which the URLs should be sorted
    * @param string $OrderDirection Direction in which the URLs should be sorted
    * @param int $Limit How many URLs should be returned
    * @param int $Offset Where the result set should begin
    * @param array $Options Various other options
    * @return array An array of UrlChecker_InvalidUrl objects
    */
    public function GetInvalidUrls(array $Constraints=array(), $OrderBy="StatusCode",
        $OrderDirection="DESC", $Limit=15, $Offset=0, array $Options=array())
    {
        $this->RemoveStaleData();

        $DB = new Database();
        $ValidGetConstraints = array(
            "ResourceId", "FieldId", "TimesInvalid", "Url", "CheckDate",
            "StatusCode", "ReasonPhrase", "FinalUrl", "FinalStatusCode",
            "FinalReasonPhrase", "Hidden");
        $ValidRelations = array("=", "!=", "<", ">", "<=", ">=");

        # construct the where constraint
        $Where = " WHERE URH.TimesInvalid > ".self::INVALIDATION_THRESHOLD." ";
        $OuterGroup = "";
        foreach ($Constraints as $ConstraintList)
        {
            # skip invalid constraints
            if (!($ConstraintList instanceof UrlChecker_ConstraintList))
            {
                continue;
            }

            $InnerGroup = "";
            foreach ($ConstraintList as $Constraint)
            {
                $Key = $Constraint->Key;
                $Value = $Constraint->Value;
                $Relation = $Constraint->Relation;

                # skip if the relation is invalid
                if (!in_array($Relation, $ValidRelations))
                {
                    continue;
                }

                # Resource table constraint
                if ($Key instanceof MetadataField
                    && $Key->Status == MetadataSchema::MDFSTAT_OK)
                {
                    $LogicOperator = (strlen($InnerGroup)) ? "AND" : "";
                    $InnerGroup .= " ".$LogicOperator." R.".$Key->DBFieldName();
                    $InnerGroup .= " ".$Relation." '".addslashes($Value)."'";
                }

                # UrlChecker_History table constraint
                else if (is_string($Key))
                {
                    $LogicOperator = (strlen($InnerGroup)) ? "AND" : "";
                    $InnerGroup .= " ".$LogicOperator." URH.".$Key;
                    $InnerGroup .= " ".$Relation." '".addslashes($Value)."'";
                }

                # otherwise ignore the invalid key value
            }

            if (strlen($InnerGroup))
            {
                $OuterGroup .= (strlen($OuterGroup)) ? " OR " : "";
                $OuterGroup .= " ( " . $InnerGroup . " ) ";
            }
        }

        # if there is at least one inner group, add an outer parentheses to
        # group them together
        if (strlen($InnerGroup))
        {
            $OuterGroup = " (" . $OuterGroup . ") ";
        }

        if (strlen($OuterGroup))
        {
            $Where .= " AND " . $OuterGroup;
        }

        # valid UrlChecker_History table order
        if (in_array($OrderBy, $ValidGetConstraints))
        {
            $OrderBy = "URH.".$OrderBy;
        }

        # valid Resource table order
        else if ($OrderBy instanceof MetadataField
                && $OrderBy->Status() == MetadataSchema::MDFSTAT_OK)
        {
            $OrderBy = "R.".$OrderBy->DBFieldName();
        }

        # otherwise default the StatusCode field of the UrlChecker_History tale
        else
        {
            $OrderBy = "URH.StatusCode";
        }

        # make sure order direction is valid
        if ($OrderDirection != "ASC" && $OrderDirection != "DESC")
        {
            $OrderDirection = "DESC";
        }

        # get the url data
        $DB->Query("
            SELECT * FROM UrlChecker_UrlHistory URH
            LEFT JOIN Resources R
            ON URH.ResourceId = R.ResourceId
            ".$Where."
            ORDER BY ".$OrderBy." ".$OrderDirection."
            LIMIT ".intval($Limit)."
            OFFSET ".intval($Offset));

        # create url objects
        $Urls = array();
        while (FALSE !== ($Row = $DB->FetchRow()))
        {
            $Urls[] = new UrlChecker_InvalidUrl($Row);
        }

        return $Urls;
    }

    /**
    * Get the invalid URL that is associated with the given resource and
    * metadata field, or NULL if one doesn't exist.
    * @param Resource $Resource Resource.
    * @param MetadataField $Field Metadata field.
    * @return an UrlChecker_InvalidUrl object or NULL
    */
    public function GetInvalidUrl(Resource $Resource, MetadataField $Field)
    {
        $DB = new Database();
        $DB->Query("
            SELECT *
            FROM UrlChecker_UrlHistory
            WHERE ResourceId = ".intval($Resource->Id())."
            AND FieldId = ".$Field->Id());

        if (!$DB->NumRowsSelected())
        {
            return NULL;
        }

        return new UrlChecker_InvalidUrl($DB->FetchRow());
    }

    /**
    * Determine whether or not the resource is "released". By default, this
    * means whether or not the Release Flag value is set to TRUE or not, but
    * may be different if a custom callback has been set.
    * @param Resource $Resource Resource.
    * @return TRUE if the resource is released, FALSE otherwise
    */
    public function IsResourceReleased(Resource $Resource)
    {
        $Schema = new MetadataSchema($Resource->SchemaId());

        # custom callback set
        if (isset($this->IsResourceReleasedCallback))
        {
            return call_user_func($this->IsResourceReleasedCallback, $Resource);
        }

        # release flag does not exist or is disabled, assume TRUE
        else if (NULL === ($ReleaseFlag = $Schema->GetFieldByName("Release Flag"))
                || $ReleaseFlag->Status() != MetadataSchema::MDFSTAT_OK
                || !$ReleaseFlag->Enabled())
        {
            return TRUE;
        }

        return (bool)$Resource->Get("Release Flag");
    }

    /**
    * Release the given resource. By default, this means that the Release Flag
    * value for the resource will be set to TRUE, but may be different if a
    * custom callback has been set.
    * @param Resource $Resource Resource.
    */
    public function ReleaseResource(Resource $Resource)
    {
        $Schema = new MetadataSchema($Resource->SchemaId());
        $ReleaseAction = $this->ConfigSetting("ReleaseConfiguration");

        # use a custom callback if one is available
        if (isset($this->ReleaseResourceCallback))
        {
            call_user_func($this->ReleaseResourceCallback, $Resource);
        }
        # otherwise, use the action configured in the UI
        else if ($ReleaseAction !== NULL)
        {
            # actions configured via the UI
            $WasChanged = FieldEditingUI::ApplyChangesToResource(
                $Resource, $GLOBALS["G_User"], $ReleaseAction);

            if ($WasChanged && $Schema->Id() == MetadataSchema::SCHEMAID_DEFAULT)
            {
                $Resource->Set("Date Last Modified", "now");
                $Resource->Set("Last Modified By Id", $GLOBALS["G_User"]);
            }
        }
        # if nothing was configured, fall back to toggling Release Flag
        else
        {
            $ReleaseFlag = $Schema->GetFieldByName("Release Flag");
            if ($ReleaseFlag !== NULL &&
                $ReleaseFlag->Status == MetadataSchema::MDFSTAT_OK &&
                $ReleaseFlag->Enabled() &&
                !$Resource->Get($ReleaseFlag) )
            {
                $Resource->Set($ReleaseFlag, TRUE);
                $Resource->Set("Date Last Modified", "now");
                $Resource->Set("Last Modified By Id", $GLOBALS["G_User"]);
            }
        }
    }

    /**
    * Withhold the given resource. By default, this means that the Release Flag
    * value for the resource will be set to NULL, but may be different if a
    * custom callback has been set.
    * @param Resource $Resource Resource.
    */
    public function WithholdResource(Resource $Resource)
    {
        $Schema = new MetadataSchema($Resource->SchemaId());
        $WithholdAction = $this->ConfigSetting("WithholdConfiguration");

        # custom callback set
        if (isset($this->WithholdResourceCallback))
        {
            call_user_func($this->WithholdResourceCallback, $Resource);
        }
        # otherwise, use the action configured in the UI
        else if ($WithholdAction !== NULL)
        {
            # actions configured via the UI
            $WasChanged = FieldEditingUI::ApplyChangesToResource(
                $Resource, $GLOBALS["G_User"], $WithholdAction);

            if ($WasChanged && $Schema->Id() == MetadataSchema::SCHEMAID_DEFAULT)
            {
                $Resource->Set("Date Last Modified", "now");
                $Resource->Set("Last Modified By Id", $GLOBALS["G_User"]);
            }
        }
        # if nothing was configured, fall back to toggling Release Flag
        else
        {
            $ReleaseFlag = $Schema->GetFieldByName("Release Flag");
            if ($ReleaseFlag !== NULL &&
                $ReleaseFlag->Status() == MetadataSchema::MDFSTAT_OK &&
                $ReleaseFlag->Enabled() &&
                $Resource->Get($ReleaseFlag) )
            {
                $Resource->Set($ReleaseFlag, FALSE);
                $Resource->Set("Date Last Modified", "now");
                $Resource->Set("Last Modified By Id", $GLOBALS["G_User"]);
            }
        }
    }

    /**
    * Hide the URL associated with the given resource and metadata field so
    * that it doesn't show up on the results page.
    * @param Resource $Resource Resource.
    * @param MetadataField $Field Metadata field.
    */
    public function HideUrl(Resource $Resource, MetadataField $Field)
    {
        $DB = new Database();
        $DB->Query("
            UPDATE UrlChecker_UrlHistory
            SET Hidden = 1
            WHERE ResourceId = '".intval($Resource->Id())."'
            AND FieldId = '".intval($Field->Id())."'");
    }

    /**
    * "Unhide" the URL associated with the given resource and metadata field so
    * that it shows up on the results page.
    * @param Resource $Resource Resource.
    * @param MetadataField $Field Metadata field.
    */
    public function UnhideUrl(Resource $Resource, MetadataField $Field)
    {
        $DB = new Database();
        $DB->Query("
            UPDATE UrlChecker_UrlHistory
            SET Hidden = 0
            WHERE ResourceId = '".intval($Resource->Id())."'
            AND FieldId = '".intval($Field->Id())."'");
    }

    /**
    * Get a subset of the resources that haven't been checked or haven't been
    * checked in at least a day.
    * @return an associative array mapping ResourceId => CheckDate
    */
    public function GetNextResourcesToBeChecked()
    {
        $this->RemoveStaleData();

        $DB = new Database();
        $Resources = array();

        # never been checked
        $DB->Query("
            SELECT R.*
            FROM Resources R
            LEFT JOIN UrlChecker_ResourceHistory URH
            ON R.ResourceId = URH.ResourceId
            WHERE URH.ResourceId IS NULL
            AND R.ResourceId >= 0
            LIMIT ".$this->ConfigSetting("NumToCheck") );

        $NumNew = $DB->NumRowsSelected();

        while (FALSE !== ($Row = $DB->FetchRow()))
        {
            $Resources[$Row["ResourceId"]] = "N/A";
        }

        # still some space left for more resources to check
        if ($NumNew < $this->ConfigSetting("NumToCheck"))
        {
            # resources that haven't been checked in at least one day, sorted
            # by the last time they were checked.
            $Yesterday = date("Y-m-d H:i:s", strtotime("-1 day"));
            $DB->Query("
                SELECT *
                FROM UrlChecker_ResourceHistory
                WHERE CheckDate <= '".strval($Yesterday)."'
                ORDER BY CheckDate ASC
                LIMIT ".( $this->ConfigSetting("NumToCheck") - $NumNew));

            while (FALSE !== ($Row = $DB->FetchRow()))
            {
                $Resources[$Row["ResourceId"]] = $Row["CheckDate"] ;
            }
        }

        return $Resources;
    }

    /**
    * Get a subset of the invalid URLS that haven't been checked in over a
    * day and are below the threshold, or haven't been checked in over a week
    * and are over the threshold.
    * @return an array of UrlChecker_InvalidUrl objects
    */
    public function GetNextUrlsToBeChecked()
    {
        $this->RemoveStaleData();

        $DB = new Database();
        $Urls = array();

        # (check times > 1 day and <= threshold) OR
        # (check times > 1 week and > threshold)
        $Yesterday = date("Y-m-d H:i:s", strtotime("-1 day"));
        $WeekAgo = date("Y-m-d H:i:s", strtotime("-1 week"));
        $DB->Query("
            SELECT *
            FROM UrlChecker_UrlHistory
            WHERE
              (TimesInvalid <= ".intval(self::INVALIDATION_THRESHOLD)."
               AND CheckDate <= '".strval($Yesterday)."')
              OR
              (TimesInvalid > ".intval(self::INVALIDATION_THRESHOLD)."
               AND CheckDate <= '".strval($WeekAgo)."')
            ORDER BY CheckDate ASC
            LIMIT ".$this->ConfigSetting("NumToCheck"));

        while (FALSE !== ($Row = $DB->FetchRow()))
        {
            $Urls[] = new UrlChecker_InvalidUrl($Row);
        }

        return $Urls;
    }

    /**
    * Handle the addition of a new URL field, setting it to check by default.
    * @param int $FieldId ID of field.
    */
    public function AddUrlField($FieldId)
    {
        $Field = new MetadataField($FieldId);
        if ($Field->Type() == MetadataSchema::MDFTYPE_URL)
        {
            $this->FieldsToCheck[]= $FieldId;
            $this->ConfigSetting("FieldsToCheck", $this->FieldsToCheck);
        }
    }

    /**
    * Handle the deletion of a URL field, removing it from the list of fields to check.
    * @param int $FieldId ID of field.
    */
    public function RemoveUrlField($FieldId)
    {
        $Field = new MetadataField($FieldId);
        if ($Field->Type() == MetadataSchema::MDFTYPE_URL)
        {
            if ($Key = array_search($FieldId, $this->FieldsToCheck) !== FALSE)
            {
                unset($this->FieldsToCheck[$Key]);
                $this->ConfigSetting("FieldsToCheck", $this->FieldsToCheck);
            }
        }
    }

    /**
    * Handle changes to plugin configuration.
    * @param string $PluginName Name of plugin
    * @param string $ConfigSetting Setting to change.
    * @param mixed $OldValue Old value of setting.
    * @param mixed $NewValue New value of setting.
    */
    public function HandleConfigChange(
            $PluginName, $ConfigSetting, $OldValue, $NewValue )
    {
        if ($PluginName == $this->Name && $ConfigSetting == "DontCheck")
        {
            $GLOBALS["AF"]->QueueUniqueTask(
            array($this, "ProcessChangedExclusionRules"),
                array(), ApplicationFramework::PRIORITY_LOW,
                "Remove URL checker data for resources excluded "
                ."by URLChecker rules change");
        }
    }

    /**
    * Process a change in exclusion rules
    */
    public function ProcessChangedExclusionRules()
    {
        # Clean out invalid URLs from resources that would now be skipped
        #  by our exclusion rules.  This is done to prevent them from
        #  continuing to appear in the Results page after being excluded.
        $DB = new Database();
        $DB->Query(
             "SELECT DISTINCT ResourceId AS ResourceId "
             ."FROM UrlChecker_UrlHistory WHERE StatusCode >= 300");
        $ResourceIds = $DB->FetchRows();

        $SkippedResourceIds = array();
        foreach ($ResourceIds as $Row)
        {
            $Resource = new Resource($Row["ResourceId"]);
            if ($this->ShouldNotCheckUrls($Resource))
            {
                $SkippedResourceIds[]= $Row["ResourceId"];
            }
        }

        if (count($SkippedResourceIds)>0)
        {
            foreach (array_chunk($SkippedResourceIds, 100) as $Chunk)
            {
                $DB->Query(
                    "DELETE FROM UrlChecker_UrlHistory "
                    ."WHERE ResourceId IN (".implode(",", $Chunk).")");
            }
        }
    }

    # ---- PRIVATE INTERFACE -------------------------------------------------

    private $Rules;
    private $FieldsToCheck;

    /**
    * @var $IsResourceReleasedCallback custom callback
    * @var $ReleaseResourceCallback custom callback
    * @var $WithholdResourceCallback custom callback
    */
    private $IsResourceReleasedCallback;
    private $ReleaseResourceCallback;
    private $WithholdResourceCallback;

    /**
    * Determine whether or not the URLs for the given resource should be
    * checked.
    * @param Resource $Resource Resource.
    * @return bool TRUE if the URLs should not be checked and FALSE otherwise
    */
    protected function ShouldNotCheckUrls(Resource $Resource)
    {
        $Rules = isset($this->Rules) ? $this->Rules : array();

        # check if the resource matches any of the rules
        foreach ($Rules as $Rule)
        {
            # parse out the field ID and flag value
            list($FieldId, $Flag) = explode(":", $Rule);

            try
            {
                $Field = new MetadataField($FieldId);
            }
            catch (Exception $e)
            {
                # If the ID was invalid, causing an exception to be thrown,
                # move along to the next rule.
                continue;
            }

            # If this rule applies to a field that we couldn't retrieve,
            #  skip it.
            if ($Field->Status() != MetadataSchema::MDFSTAT_OK)
            {
                continue;
            }

            # If this rule applies to a different schema, skip it.
            if ($Field->SchemaId() != $Resource->SchemaId())
            {
                continue;
            }

            $Value = $Resource->Get($Field);
            if (empty($Value))
            {
                $Value = self::FLAG_OFF_VALUE;
            }

            switch ($Field->Type())
            {
                case MetadataSchema::MDFTYPE_FLAG:
                    # the rule matches if the field value equals the flag value
                    # specified in the rule. the checks with empty() are used in case
                    # NULLs are in the database, which are assumed to be "off"
                    if ($Value == $Flag)
                    {
                        return TRUE;
                    }
                    break;
                case MetadataSchema::MDFTYPE_TIMESTAMP:
                    if ($Flag == "PAST" && strtotime($Value) < time() )
                    {
                        return TRUE;
                    }
                    break;
                default:
                    break;
            }
        }

        return FALSE;
    }

    /**
    * Update the resource history for the given resource.
    * @param Resource $Resource The resource for which to update the history.
    */
    protected function UpdateResourceHistory(Resource $Resource)
    {
        $DB = new Database();

        # delete/insert record (fragmentation? mysql: prob. not, pgsql: no)
        # avoids any sync issues and self-heals if sync issues do arise
        $DB->Query("LOCK TABLES UrlChecker_ResourceHistory WRITE");
        $DB->Query("
            DELETE FROM UrlChecker_ResourceHistory
            WHERE ResourceId = '".addslashes($Resource->Id())."'");
        $DB->Query("
            INSERT INTO UrlChecker_ResourceHistory
            SET ResourceId = '".addslashes($Resource->Id())."'");
        $DB->Query("UNLOCK TABLES");
    }

    /**
    * Get an URL's status info. If there is no redirection, this will be the
    * status line for the URL. If there are redirects, this will be the status
    * line for the URL and the status line for the last URL after redirection.
    * @param string $Url URL
    * @return an array with the same fields as an UrlChecker_HttpInfo object
    */
    protected function GetHttpInformation($Url)
    {
        # information for the URL
        list($Info, $Redirect) = $this->GetHttpInformationAux($Url);

        # information for redirects, if any
        if (!is_null($Redirect))
        {
            $MaxIterations = 5;

            while (isset($Redirect) && --$MaxIterations >= 0)
            {
                $FinalUrl = $Redirect;
                list($FinalInfo, $Redirect) =
                    $this->GetHttpInformationAux($Redirect);

                $Info["UsesCookies"] = $Info["UsesCookies"] || $FinalInfo["UsesCookies"];

                if (is_null($Redirect))
                {
                    unset($Redirect);
                }
            }

            $Info["FinalUrl"] = $FinalUrl;
            $Info["FinalStatusCode"] = $FinalInfo["StatusCode"];
            $Info["FinalReasonPhrase"] = $FinalInfo["ReasonPhrase"];
        }

        return $Info;
    }

    /**
    * Auxiliary function for self::GetHttpInformation(). Gets the HTTP
    * information on one URL. Note that this only supports HTTP and HTTPS.
    * @param string $Url URL
    * @return an array with the same fields as an UrlChecker_HttpInfo object
    */
    private function GetHttpInformationAux($Url)
    {
        # this should be an UrlChecker_HttpInfo object but some versions of PHP
        # segfault when using them, for an unknown reason
        $Info = array("Url" => "", "StatusCode" => -1, "ReasonPhrase" => "",
            "FinalUrl" => "", "FinalStatusCode" => -1, "FinalReasonPhrase" => "",
            "UsesCookies" => FALSE);

        # blank url (code defaults to -1, i.e., not checked)
        if (!strlen(trim($Url)))
        {
            return array($Info, NULL);
        }

        # default to HTTP if not protocol is specified
        if (!@preg_match('/^[a-z]+:/', $Url))
        {
            $Url = "http://".$Url;
        }

        # only check HTTP/HTTPS URLs
        if (!@preg_match('/^https?:\/\//', $Url))
        {
            return array($Info, NULL);
        }

        # assume that we can't connect to the URL
        $Info["Url"] = $Url;
        $Info["StatusCode"] = 0;

        # make sure there are no spaces in the url and parse it
        $ParsedUrl = @parse_url(str_replace(" ", "%20", $Url));

        if (!$ParsedUrl || !isset($ParsedUrl["host"]))
        {
            return array($Info, NULL);
        }

        $HostName = $ParsedUrl["host"];

        # username and password specified in the URL, add to the hostname
        if (isset($ParsedUrl["user"]) && isset($ParsedUrl["pass"]))
        {
            $HostName = $ParsedUrl["user"].":".$ParsedUrl["pass"]."@".$HostName;
        }

        # port specified in the URL, so get it out
        if (isset($ParsedUrl["port"]))
        {
            $Port = intval($ParsedUrl["port"]);
        }

        # HTTPS needs to use the ssl:// protocol with fsockopen
        if (isset($ParsedUrl["scheme"]) && $ParsedUrl["scheme"] == "https")
        {
            $HostName = "ssl://".$HostName;

            # default to port 443 if no port is specified
            if (!isset($Port))
            {
                $Port = 443;
            }
        }

        # default to port 80 if no port specified
        if (!isset($Port))
        {
            $Port = 80;
        }

        # can't connect. also the timeout is set to 5 seconds
        if (FALSE === ($Stream = @fsockopen($HostName, $Port, $ErrNo,
            $ErrStr, self::CONNECTION_TIMEOUT)))
        {
            return array($Info, NULL);
        }

        # construct the path that's going to be GET'ed
        if (isset($ParsedUrl["path"]))
        {
            $Path = $ParsedUrl["path"];

            if (isset($ParsedUrl["query"]))
            {
                $Path .= "?".$ParsedUrl["query"];
            }
        }

        else
        {
            $Path = "/";
        }

        # basic headers required for HTTP version 1.1
        $RequestHeaders = "GET ".$Path." HTTP/1.1\r\n";
        $RequestHeaders .= "Host: ".$ParsedUrl["host"]."\r\n";

        # set the User-Agent header since some servers erroneously require it
        $RequestHeaders .= "User-Agent: URL-Checker/".$this->Version." "
           ."CWIS/".CWIS_VERSION." PHP/".PHP_VERSION."\r\n";

        # some servers erroneously require the Accept header too
        $RequestHeaders .= "Accept: text/html,application/xhtml+xml,"
            ."application/xml;q=0.9,*/*;q=0.8\r\n";

        # final newline to signal that we're done sending headers
        $RequestHeaders .= "\r\n";

        if (FALSE === fwrite($Stream, $RequestHeaders))
        {
            # couldn't send anything
            fclose($Stream);
            return array($Info, NULL);
        }

        # HTTP status line
        if (!feof($Stream) && FALSE !== ($Line = fgets($Stream)))
        {
            # remove trailing newline from the HTTP status line
            $Line = trim($Line);

            $StatusLine = new UrlChecker_StatusLine($Line);
            $Info["StatusCode"] = $StatusLine->GetStatusCode();
            $Info["ReasonPhrase"] = $StatusLine->GetReasonPhrase();
        }

        else
        {
            # the server responded with nothing so mark the URL as an internal
            # server error (500)
            fclose($Stream);
            $Info["StatusCode"] = 500;
            $Info["ReasonPhrase"] = "Internal Server Error";
            return array($Info, NULL);
        }

        # this might cause hangs for line > 8KB. trim() removes trailing newline
        while (!feof($Stream) && FALSE !== ($Line = trim(fgets($Stream))))
        {
            # stop before reading any content
            if ($Line == "")
            {
                break;
            }

            # a Location header
            if (substr($Line, 0, 9) == "Location:")
            {
                list(, $Location) = explode(":", $Line, 2);
                $Location = ltrim($Location);
            }

            # a Set-Cookie header
            if (substr($Line, 0, 11) == "Set-Cookie:")
            {
                $Info["UsesCookies"] = TRUE;
            }
        }

        # given a Location value; need to make sure it's absolute
        if (isset($Location) && strlen($Location)
            && substr($Location, 0, 4) != "http")
        {
            # relative path, relative URI, so add in the path info
            if ($Location{0} != "/")
            {
                $Location = dirname($ParsedUrl["path"])."/".$Location;
            }

            if (substr($HostName, 0, 6) == "ssl://")
            {
                $Location = "https://".substr($HostName, 5).$Location;
            }

            else
            {
                $Location = "http://".$HostName.$Location;
            }
        }

        return array($Info, isset($Location) ? $Location : NULL);
    }

    /**
    * Determine if a given URL has valid content, that is, if it doesn't match
    * some rudimentary regular expressions. Checks for "Page Not Found"-type
    * strings.
    * @param string $Url URL
    * @return TRUE if the content for the given URL is valid, FALSE otherwise
    */
    private function HasValidContent($Url)
    {
        # set the default protocol version to 1.1, this may cause issues with
        # PHP < 5.3 if the request isn't HTTP 1.1 compliant
        $Options["http"]["protocol_version"] = 1.1;

        # timeout
        $Options["http"]["timeout"] = self::CONNECTION_TIMEOUT;

        # set the User-Agent HTTP header since some servers erroneously require
        # it
        $Options["http"]["user_agent"] = "URL-Checker/".$this->Version." "
           ."CWIS/".CWIS_VERSION." PHP/".PHP_VERSION;

        # some servers erroneously require the Accept header too
        $Options["http"]["header"] = "Accept: text/html,application/xhtml+xml,"
            ."application/xml;q=0.9,*/*;q=0.8";

        # try to prevent hangs in feof by telling the server to close the
        # connection after retrieving all of the content
        $Options["http"]["header"] .= "\r\nConnection: close";

        # fetch content even when the HTTP status code is not 200
        $Options["http"]["ignore_errors"] = TRUE;

        $Stream = stream_context_create($Options);

        # escape spaces so that we don't mess up the http method header line
        $Url = str_replace(" ", "%20", $Url);

        if (FALSE === ($Handle = @fopen($Url, "r", FALSE, $Stream)))
        {
            return TRUE;
        }

        # sleep for 0.15s to allow some of the content to buffer to avoid having
        # the opening HTML tag not show up in the first fread
        usleep(150000);

        # get the first 8KB and do a basic check to see if the file is HTML.
        # since fread might stop before getting 8KB, e.g., if a packet is
        # received or the server is slow, there is a chance that the file is
        # HTML, but it's opening tag won't have arrived in the first fread, and
        # therefore won't be checked. this should be OK since it probably means
        # the server is really slow and it shouldn't be checked anyway
        if (FALSE === ($Html = @fread($Handle, 8192))
            || FALSE === strpos($Html, "<html"))
        {
            return TRUE;
        }

        # this will be used to prevent hangs in feof in case the server doesn't
        # support the Connection header
        $Time = microtime(TRUE);

        # read until the end of the file, the timeout is reached, or if at least
        # 500 KB have been read
        $Failsafe = 1000;
        while (!feof($Handle)
               && (microtime(TRUE) - $Time) < self::CONNECTION_TIMEOUT
               && strlen($Html) < 512000 # strlen can't always be trusted
               && $Failsafe--)
        {
            if (FALSE === ($Html .= @fread($Handle, 8192)))
            {
                return TRUE;
            }
        }

        fclose($Handle);

        # parse out the title and the body to search within
        $Title = (preg_match('/<title[^>]*>(.*?)<\/title>/is', $Html, $Matches))
            ? trim($Matches[1]) : "" ;
        $Body = (preg_match('/<body[^>]*>(.*?)<\/body>/is', $Html, $Matches))
            ? trim ($Matches[1]) : "";
        $Html = $Title." ".$Body;

        # strip out tags that contain data that is probably not HTML
        $Html = preg_replace('/<(script|noscript|style)[^>]*>.*?<\/\1>/is',
            '', $Html);

        # remove HTML tags so we only have text to search
        $Html = strip_tags($Html);

        if (preg_match('/(file|url|page|document)\s+([^\s]+\s+)?(couldn\'t\s+be|'
            .'could\s+not\s+be|cannot\s+be|can\'t\s+be|was\s+not)\s+found/i', $Html))
        {
            return FALSE;
        }

        else if (preg_match('/(file|url|page|404|document)\s+not\s+found|'
            .'(http|error)\s+404/i', $Html))
        {
            return FALSE;
        }

        else if (preg_match('/(couldn\'t|could\s+not|cannot|can\'t)\s+find\s+'
            .'(the|that)\s+(file|url|page|document)/i', $Html))
        {
            return FALSE;
        }

        return TRUE;
    }

    /**
    * Queue tasks that check individual resources.
    * @param Resource $Resource Resource to be checked.
    */
    private function QueueResourceCheckTask(Resource $Resource)
    {
        $TaskDescription =
           "Validate URLs associated with <a href=\"r".$Resource->Id()."\"><i>"
           .$Resource->GetMapped("Title")."</i></a>";
        $GLOBALS["AF"]->QueueUniqueTask(
            array($this, "CheckResourceUrls"), array($Resource->Id(),
            $Resource->GetCheckDate()), $this->ConfigSetting("TaskPriority"),
            $TaskDescription);
    }

    /**
    * Remove any stale data from deleted resources or changed URLs.
    */
    private function RemoveStaleData()
    {
        static $RemovedStaleData;

        # so that the following queries are executed only once per load
        if (isset($RemovedStaleData))
        {
            return;
        }

        $RemovedStaleData = TRUE;
        $DB = new Database();

        # clean history tables of data from deleted resources
        $DB->Query("
            DELETE URH
            FROM UrlChecker_ResourceHistory URH
            LEFT JOIN Resources R
            ON URH.ResourceId = R.ResourceId
            WHERE R.ResourceId IS NULL");
        $DB->Query("
            DELETE UUH
            FROM UrlChecker_UrlHistory UUH
            LEFT JOIN Resources R
            ON UUH.ResourceId = R.ResourceId
            WHERE R.ResourceId IS NULL");

        # clean URL history table of data from deleted fields
        $DB->Query("
            DELETE UUH
            FROM UrlChecker_UrlHistory UUH
            LEFT JOIN MetadataFields M
            ON UUH.FieldId = M.FieldId
            WHERE M.FieldId IS NULL");

        # clean URL history table of data for fields that aren't URL fields
        # (from when field types are changed)
        $DB->Query(
            "DELETE FROM UrlChecker_UrlHistory WHERE "
            ."FieldId NOT IN (SELECT FieldId FROM MetadataFields "
            ."WHERE FieldType='Url')");

        # clean history tables of data from URLs that have changed
        foreach ($this->GetUrlFields() as $Field)
        {
            # both of the following queries use BINARY when checking the URL
            # field to force a case sensitive search

            $DB->Query("
                DELETE URH
                FROM UrlChecker_ResourceHistory URH
                LEFT JOIN (Resources R, UrlChecker_UrlHistory UUH)
                ON (UUH.ResourceId = R.ResourceId
                  AND UUH.ResourceId = URH.ResourceId)
                WHERE UUH.ResourceId IS NOT NULL AND (
                  BINARY UUH.Url != R.".$Field->DBFieldName()."
                  AND UUH.FieldId = ".intval($Field->Id())."
                )");

            $DB->Query("
                DELETE UUH
                FROM UrlChecker_UrlHistory UUH
                LEFT JOIN Resources R
                ON UUH.ResourceId = R.ResourceId
                WHERE
                BINARY UUH.Url != R.".$Field->DBFieldName()."
                AND UUH.FieldId = ".intval($Field->Id()));
        }
    }

    /**
    * Get all the URL metadata fields.
    * @return array of all the metadata fields
    */
    private function GetUrlFields()
    {
        static $Fields;

        if (!isset($Fields))
        {
            $Fields = array();
            foreach ($this->FieldsToCheck as $FieldId)
            {
                try
                {
                    $Fields[]= new MetadataField($FieldId);
                }
                catch (Exception $e)
                {
                    # continue if the field is invalid
                    continue;
                }
            }
        }

        return $Fields;
    }

    /**
    * Get the date/time that the URL checking method will run.
    * @return Returns the date/time that the URL checking method will run.
    */
    private function GetDateOfNextCheck()
    {
        # find the URL checking method
        foreach ($GLOBALS["AF"]->GetKnownPeriodicEvents() as $PeriodicEvent)
        {
            $Callback = $PeriodicEvent["Callback"];

            # if its the URL checking method
            if (is_array($Callback)
                && $Callback[0] instanceof PluginCaller
                && $Callback[0]->GetCallbackAsText()
                        == "UrlChecker::QueueResourceCheckTasks")
            {
                # return the next run date
                return date("Y-m-d H:i:s", $PeriodicEvent["NextRun"]);
            }
        }

        # no next run date
        return NULL;
    }
}
