ultimate_cron.nagios.inc 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249
  1. <?php
  2. /**
  3. * Implements hook_nagios_info().
  4. */
  5. function ultimate_cron_nagios_info() {
  6. return array(
  7. 'name' => t('Ultimate Cron Monitoring'),
  8. 'id' => 'ULTIMATE_CRON',
  9. );
  10. }
  11. /**
  12. * Implementation of hook_nagios().
  13. */
  14. function ultimate_cron_nagios($check = 'nagios') {
  15. $status = array();
  16. foreach(ultimate_cron_nagios_functions() as $function => $description) {
  17. if (variable_get('ultimate_cron_nagios_func_' . $function, TRUE) && ($check == 'nagios' || $check == $function)) {
  18. $func = $function . '_check';
  19. $result = $func();
  20. $status[$result['key']] = $result['data'];
  21. }
  22. }
  23. return $status;
  24. }
  25. /**
  26. * Implementation of hook_nagios_settings().
  27. */
  28. function ultimate_cron_nagios_settings() {
  29. $form = array();
  30. foreach(ultimate_cron_nagios_functions() as $function => $description) {
  31. $var = 'ultimate_cron_nagios_func_' . $function;
  32. $form[$var] = array(
  33. '#type' => 'checkbox',
  34. '#title' => $function,
  35. '#default_value' => variable_get($var, TRUE),
  36. '#description' => $description,
  37. );
  38. }
  39. $group = 'thresholds';
  40. $form[$group] = array(
  41. '#type' => 'fieldset',
  42. '#collapsible' => TRUE,
  43. '#collapsed' => FALSE,
  44. '#title' => t('Thresholds'),
  45. '#description' => t('Thresholds for reporting critical alerts to Nagios.'),
  46. );
  47. $form[$group]['ultimate_cron_nagios_running_threshold'] = array(
  48. '#type' => 'textfield',
  49. '#title' => t('Running jobs count'),
  50. '#default_value' => variable_get('ultimate_cron_nagios_running_threshold', 50),
  51. '#description' => t('Issue a critical alert when more than this number of jobs are running. Default is 50.'),
  52. );
  53. $form[$group]['ultimate_cron_nagios_failed_threshold'] = array(
  54. '#type' => 'textfield',
  55. '#title' => t('Failed jobs count'),
  56. '#default_value' => variable_get('ultimate_cron_nagios_failed_threshold', 10),
  57. '#description' => t('Issue a critical alert when more than this number of jobs failed their last run. Default is 10.'),
  58. );
  59. $form[$group]['ultimate_cron_nagios_longrunning_threshold'] = array(
  60. '#type' => 'textfield',
  61. '#title' => t('Long running jobs'),
  62. '#default_value' => variable_get('ultimate_cron_nagios_longrunning_threshold', 0),
  63. '#description' => t('Issue a critical alert when more than this number of jobs are running longer than usual. Default is 0.')
  64. );
  65. return $form;
  66. }
  67. /**
  68. * Implementation of hook_nagios_checks().
  69. */
  70. function ultimate_cron_nagios_checks() {
  71. return ultimate_cron_nagios_functions();
  72. }
  73. /**
  74. * Implementation of drush hook_nagios_check().
  75. */
  76. function ultimate_cron_nagios_check($function) {
  77. // We don't bother to check if the function has been enabled by the user.
  78. // Since this runs via drush, web security is not an issue.
  79. $func = $function . '_check';
  80. $result = $func();
  81. $status[$result['key']] = $result['data'];
  82. return $status;
  83. }
  84. /************** HELPER FUNCTIONS ***********************************/
  85. /**
  86. * Return a list of nagios check functions
  87. * @see ultimate_cron_nagios()
  88. */
  89. function ultimate_cron_nagios_functions() {
  90. return array(
  91. 'ultimate_cron_running' => t('Check number of currently running jobs'),
  92. 'ultimate_cron_failed' => t('Check the number of jobs that failed last run'),
  93. 'ultimate_cron_longrunning' => t('Check the number of jobs that are running longer than usual'),
  94. );
  95. }
  96. /**
  97. * Get information about running jobs - currently running or failed.
  98. *
  99. * @staticvar array $overview
  100. * @param string $mode Which mode to get info about; 'running' or 'errors'
  101. * @return int
  102. */
  103. function ultimate_cron_nagios_get_job_info($mode = 'running') {
  104. // Ensure valid mode
  105. if (!in_array($mode, array('running', 'errors'))) {
  106. $mode = 'running';
  107. }
  108. static $overview = array();
  109. if (!isset($overview[$mode])) {
  110. $overview[$mode] = 0;
  111. // Get hooks and their data
  112. $data = _ultimate_cron_preload_cron_data();
  113. $hooks = ultimate_cron_get_hooks();
  114. $modules = array();
  115. foreach ($hooks as $function => $hook) {
  116. if (!$module || $module == $hook['module']) {
  117. $hook['settings'] = $data[$function]['settings'] + $hook['settings'];
  118. $hook['background_process'] = $data[$function]['background_process'];
  119. $hook['log'] = ultimate_cron_get_log($function);
  120. // Setup process
  121. if ($hook['background_process']) {
  122. $overview['running']++;
  123. }
  124. $log = $hook['log'];
  125. if (isset($log['status']) && !$log['status']) {
  126. $overview['errors']++;
  127. }
  128. }
  129. }
  130. }
  131. return $overview[$mode];
  132. }
  133. /*************** NAGIOS CHECK FUNCTIONS ********************************/
  134. /**
  135. * Check number of running jobs.
  136. *
  137. * @return array
  138. */
  139. function ultimate_cron_running_check() {
  140. $running = ultimate_cron_nagios_get_job_info('running');
  141. $threshold = variable_get('ultimate_cron_nagios_running_threshold', 50);
  142. if (count($running) > $threshold) {
  143. $data = array(
  144. 'status' => NAGIOS_STATUS_CRITICAL,
  145. 'type' => 'state',
  146. 'text' => t('@jobs currently running - it is more than @threshold', array('@jobs' => $running, '@threshold' => $threshold)),
  147. );
  148. }
  149. else {
  150. $data = array(
  151. 'status' => NAGIOS_STATUS_OK,
  152. 'type' => 'state',
  153. 'text' => t('@jobs currently running', array('@jobs' => $running)),
  154. );
  155. }
  156. return array(
  157. 'key' => 'ULTIMATE_CRON_RUNNING',
  158. 'data' => $data,
  159. );
  160. }
  161. /**
  162. * Check number of jobs that failed last run.
  163. *
  164. * @return array
  165. */
  166. function ultimate_cron_failed_check() {
  167. $failed = ultimate_cron_nagios_get_job_info('errors');
  168. $threshold = variable_get('ultimate_cron_nagios_failed_threshold', 10);
  169. if (count($failed) > $threshold) {
  170. $data = array(
  171. 'status' => NAGIOS_STATUS_CRITICAL,
  172. 'type' => 'state',
  173. 'text' => t('@jobs failed their last run - it is more than @threshold', array('@jobs' => $failed, '@threshold' => $threshold)),
  174. );
  175. }
  176. else {
  177. $data = array(
  178. 'status' => NAGIOS_STATUS_OK,
  179. 'type' => 'state',
  180. 'text' => t('@jobs failed their last run', array('@jobs' => $failed)),
  181. );
  182. }
  183. return array(
  184. 'key' => 'ULTIMATE_CRON_FAILED',
  185. 'data' => $data,
  186. );
  187. }
  188. /**
  189. * Check number of jobs running longer than usual.
  190. *
  191. * @return array
  192. *
  193. * @todo Implement the logic
  194. */
  195. function ultimate_cron_longrunning_check() {
  196. $longrunning = 0;
  197. // Get running jobs
  198. // Find out how long they have been running
  199. // Calculate average run time per job (over a threshold? E.g. queues run very fast if there is nothing to process)
  200. // If
  201. $threshold = variable_get('ultimate_cron_nagios_longrunning_threshold', 0);
  202. if ($longrunning > $threshold) {
  203. $data = array(
  204. 'status' => NAGIOS_STATUS_CRITICAL,
  205. 'type' => 'state',
  206. 'text' => t('@jobs jobs are running longer than usual - it is more than @threshold', array('@jobs' => $longrunning, '@threshold' => $threshold)),
  207. );
  208. }
  209. else {
  210. $data = array(
  211. 'status' => NAGIOS_STATUS_OK,
  212. 'type' => 'state',
  213. 'text' => t('@jobs jobs are running longer than usual', array('@jobs' => $longrunning)),
  214. );
  215. }
  216. return array(
  217. 'key' => 'ULTIMATE_CRON_LONGRUNNING',
  218. 'data' => $data,
  219. );
  220. }