t('Ultimate Cron Monitoring'), 'id' => 'ULTIMATE_CRON', ); } /** * Implementation of hook_nagios(). */ function ultimate_cron_nagios($check = 'nagios') { $status = array(); foreach(ultimate_cron_nagios_functions() as $function => $description) { if (variable_get('ultimate_cron_nagios_func_' . $function, TRUE) && ($check == 'nagios' || $check == $function)) { $func = $function . '_check'; $result = $func(); $status[$result['key']] = $result['data']; } } return $status; } /** * Implementation of hook_nagios_settings(). */ function ultimate_cron_nagios_settings() { $form = array(); foreach(ultimate_cron_nagios_functions() as $function => $description) { $var = 'ultimate_cron_nagios_func_' . $function; $form[$var] = array( '#type' => 'checkbox', '#title' => $function, '#default_value' => variable_get($var, TRUE), '#description' => $description, ); } $group = 'thresholds'; $form[$group] = array( '#type' => 'fieldset', '#collapsible' => TRUE, '#collapsed' => FALSE, '#title' => t('Thresholds'), '#description' => t('Thresholds for reporting critical alerts to Nagios.'), ); $form[$group]['ultimate_cron_nagios_running_threshold'] = array( '#type' => 'textfield', '#title' => t('Running jobs count'), '#default_value' => variable_get('ultimate_cron_nagios_running_threshold', 50), '#description' => t('Issue a critical alert when more than this number of jobs are running. Default is 50.'), ); $form[$group]['ultimate_cron_nagios_failed_threshold'] = array( '#type' => 'textfield', '#title' => t('Failed jobs count'), '#default_value' => variable_get('ultimate_cron_nagios_failed_threshold', 10), '#description' => t('Issue a critical alert when more than this number of jobs failed their last run. Default is 10.'), ); $form[$group]['ultimate_cron_nagios_longrunning_threshold'] = array( '#type' => 'textfield', '#title' => t('Long running jobs'), '#default_value' => variable_get('ultimate_cron_nagios_longrunning_threshold', 0), '#description' => t('Issue a critical alert when more than this number of jobs are running longer than usual. Default is 0.') ); return $form; } /** * Implementation of hook_nagios_checks(). */ function ultimate_cron_nagios_checks() { return ultimate_cron_nagios_functions(); } /** * Implementation of drush hook_nagios_check(). */ function ultimate_cron_nagios_check($function) { // We don't bother to check if the function has been enabled by the user. // Since this runs via drush, web security is not an issue. $func = $function . '_check'; $result = $func(); $status[$result['key']] = $result['data']; return $status; } /************** HELPER FUNCTIONS ***********************************/ /** * Return a list of nagios check functions * @see ultimate_cron_nagios() */ function ultimate_cron_nagios_functions() { return array( 'ultimate_cron_running' => t('Check number of currently running jobs'), 'ultimate_cron_failed' => t('Check the number of jobs that failed last run'), 'ultimate_cron_longrunning' => t('Check the number of jobs that are running longer than usual'), ); } /** * Get information about running jobs - currently running or failed. * * @staticvar array $overview * @param string $mode Which mode to get info about; 'running' or 'errors' * @return int */ function ultimate_cron_nagios_get_job_info($mode = 'running') { // Ensure valid mode if (!in_array($mode, array('running', 'errors'))) { $mode = 'running'; } static $overview = array(); if (!isset($overview[$mode])) { $overview[$mode] = 0; // Get hooks and their data $data = _ultimate_cron_preload_cron_data(); $hooks = ultimate_cron_get_hooks(); $modules = array(); foreach ($hooks as $function => $hook) { if (!$module || $module == $hook['module']) { $hook['settings'] = $data[$function]['settings'] + $hook['settings']; $hook['background_process'] = $data[$function]['background_process']; $hook['log'] = ultimate_cron_get_log($function); // Setup process if ($hook['background_process']) { $overview['running']++; } $log = $hook['log']; if (isset($log['status']) && !$log['status']) { $overview['errors']++; } } } } return $overview[$mode]; } /*************** NAGIOS CHECK FUNCTIONS ********************************/ /** * Check number of running jobs. * * @return array */ function ultimate_cron_running_check() { $running = ultimate_cron_nagios_get_job_info('running'); $threshold = variable_get('ultimate_cron_nagios_running_threshold', 50); if (count($running) > $threshold) { $data = array( 'status' => NAGIOS_STATUS_CRITICAL, 'type' => 'state', 'text' => t('@jobs currently running - it is more than @threshold', array('@jobs' => $running, '@threshold' => $threshold)), ); } else { $data = array( 'status' => NAGIOS_STATUS_OK, 'type' => 'state', 'text' => t('@jobs currently running', array('@jobs' => $running)), ); } return array( 'key' => 'ULTIMATE_CRON_RUNNING', 'data' => $data, ); } /** * Check number of jobs that failed last run. * * @return array */ function ultimate_cron_failed_check() { $failed = ultimate_cron_nagios_get_job_info('errors'); $threshold = variable_get('ultimate_cron_nagios_failed_threshold', 10); if (count($failed) > $threshold) { $data = array( 'status' => NAGIOS_STATUS_CRITICAL, 'type' => 'state', 'text' => t('@jobs failed their last run - it is more than @threshold', array('@jobs' => $failed, '@threshold' => $threshold)), ); } else { $data = array( 'status' => NAGIOS_STATUS_OK, 'type' => 'state', 'text' => t('@jobs failed their last run', array('@jobs' => $failed)), ); } return array( 'key' => 'ULTIMATE_CRON_FAILED', 'data' => $data, ); } /** * Check number of jobs running longer than usual. * * @return array * * @todo Implement the logic */ function ultimate_cron_longrunning_check() { $longrunning = 0; // Get running jobs // Find out how long they have been running // Calculate average run time per job (over a threshold? E.g. queues run very fast if there is nothing to process) // If $threshold = variable_get('ultimate_cron_nagios_longrunning_threshold', 0); if ($longrunning > $threshold) { $data = array( 'status' => NAGIOS_STATUS_CRITICAL, 'type' => 'state', 'text' => t('@jobs jobs are running longer than usual - it is more than @threshold', array('@jobs' => $longrunning, '@threshold' => $threshold)), ); } else { $data = array( 'status' => NAGIOS_STATUS_OK, 'type' => 'state', 'text' => t('@jobs jobs are running longer than usual', array('@jobs' => $longrunning)), ); } return array( 'key' => 'ULTIMATE_CRON_LONGRUNNING', 'data' => $data, ); }