5 =head1 NAME $RCSfile: cleartasks.pl,v $
15 Andrew DeFaria <Andrew@ClearSCM.com>
23 Sun Jan 2 19:40:28 EST 2011
27 $Date: 2013/06/02 18:47:26 $
33 Usage cleartasks.pl: [-u|sage] [-ve|rbose] [-deb|ug]
36 -u|sage: Displays usage
39 -de|bug: Output debug messages
41 -da|emon: Run in daemon mode (Default: yes)
42 -p|idfile: File to be created with the pid written to it (Default:
43 cleartasks.pid). Note: pidfile is only written if -daemon is
48 Examine the Clearadm schedule and perform the tasks required.
50 Note that sending the Cleartasks.pl process a sigusr1 will cause it to toggle
62 use lib "$FindBin::Bin/lib", "$FindBin::Bin/../lib";
71 my $VERSION = '$Revision: 1.25 $';
72 ($VERSION) = ($VERSION =~ /\$Revision: (.*) /);
74 my $logfile = "$Clearadm::CLEAROPTS{CLEARADM_LOGDIR}/$FindBin::Script";
75 $logfile =~ s/\.pl$//;
76 $logfile .= '.' . hostname() . '.log';
78 my $pidfile = "$Clearadm::CLEAROPTS{CLEARADM_RUNDIR}/$FindBin::Script.pid";
81 # Augment PATH with $Clearadm::CLEAROPTS{CLEARADM_BASE}
82 $ENV{PATH} .= ":$Clearadm::CLEAROPTS{CLEARADM_BASE}";
84 my ($clearadm, $clearexec);
88 display 'Turning verbose off';
91 display 'Turning verbose on';
96 $SIG{USR1} = \&ToggleVerbose;
98 sub HandleSystemNotCheckingIn(%) {
101 my $startTime = time;
103 my $message = "Unable to connect to system $system{name}:$system{port}";
106 task => 'System checkin',
107 started => Today2SQLDatetime,
110 system => $system{name},
113 my ($err, $msg, $lastid) = $clearadm->AddRunlog(%runlog);
115 $clearadm->Error ("Unable to add to runlog (Status: $err)\n$msg") if $err;
117 # Check to see if we should notify anybody about this non-responding system
118 my %notification = $clearadm->GetNotification ('System checkin');
120 my $when = Today2SQLDatetime;
121 my $nomorethan = lc $notification{nomorethan};
122 my $systemLink = $Clearadm::CLEAROPTS{CLEARADM_WEBBASE};
123 $systemLink .= "/systemdetails.cgi?system=$system{name}";
124 my $runlogLink = $Clearadm::CLEAROPTS{CLEARADM_WEBBASE};
125 $runlogLink .= "/runlog.cgi?id=$lastid";
126 my $subject = "System is not responding (Is clearagent running?)";
129 <h1><font color="red">Alert</font> System not responding!</h1>
132 <p>On $when the system <a href="$systemLink">$system{name}</a> was <a
133 href="$runlogLink">not responding</a> to clearagent requests. This can happen if
134 clearagent is not setup and running on the system.</p>
147 verbose "$system{name}: $subject";
150 } # HandleSystemNotCheckingIn
152 sub SystemsCheckin() {
153 for ($clearadm->FindSystem) {
156 next if $system{active} eq 'false';
158 verbose "Contacting system $system{name}:$system{port}";
160 my $startTime = time;
162 my $status = $clearexec->connectToServer($system{name}, $system{port});
165 HandleSystemNotCheckingIn %system;
169 $clearexec->disconnectFromServer;
171 verbose 'Successfully checked in with system: '
172 . "$system{name}:$system{port}";
174 display __FILE__ . " DEBUG: System undefined 1" unless $system{name};
175 $clearadm->UpdateSystem($system{name}, (lastheardfrom => Today2SQLDatetime));
177 $clearadm->ClearNotifications ($system{name})
178 if $system{notification} and $system{notification} eq 'Heartbeat';
184 sub UpdateRunlog($$$$) {
185 my ($status, $startTime, $task, $output) = @_;
188 task => $$task{name},
189 system => $$task{system},
190 started => Today2SQLDatetime,
193 $runlog{status} = $status;
197 $runlog{message} = join "\n", @$output;
199 $runlog{message} = 'Successful execution of ';
200 $runlog{message} .= "$$task{name}: $$task{command}";
204 $runlog{message} = join "\n", @$output;
206 $runlog{message} = 'Unable to execute ';
207 $runlog{message} .= "$$task{name}: $$task{command} ";
208 $runlog{message} .= join (' ', @$output);
212 my ($err, $msg, $lastid) = $clearadm->AddRunlog(%runlog);
214 $clearadm->Error($msg, $err) if $err;
219 sub MakeSystemLink($) {
222 return "$Clearadm::CLEAROPTS{CLEARADM_WEBBASE}/systemdetails.cgi?system="
226 sub MakeLoadavgLink($) {
229 return "$Clearadm::CLEAROPTS{CLEARADM_WEBBASE}/plot.cgi?type=loadavg&system="
230 . "$system&scaling=Hour&points=24";
233 sub ProcessLoadavgErrors($$$$@) {
234 # TODO: Also need to handle the case where the error was something other
235 # than "Load average over threshold". Perhaps by having different return
236 # status. Also, runlog entry #22169 never reported!
237 my ($notification, $task, $system, $lastid, @output) = @_;
239 my $when = Today2SQLDatetime;
242 # We need to log this output. Write it to STDOUT
245 my ($subject, $message, $currLoadavg, $threshold, $systemLink, $loadavgLink);
247 if (/System: (\w+) Loadavg (\d+\.\d+) Threshold (\d+\.\d+)/) {
251 $systemLink = MakeSystemLink $system;
252 $loadavgLink = MakeLoadavgLink $system;
253 $subject = "Load average of $currLoadavg exceeds threshold ";
254 $subject .= "($threshold)";
257 <h1><font color="red">Alert</font> Load Average is over the threshold!</h1>
260 <p>On $when the system <a href="$systemLink">$system</a>'s load avg
261 (<a href="$loadavgLink">$currLoadavg</a>) had exceeded the threshold set for
262 this system ($threshold).</p>
264 } elsif (/ERROR.*system\s+(\S+):/) {
266 $systemLink = MakeSystemLink $system;
267 $subject = "Error trying to obtain Loadavg";
270 <h1><font color="red">Alert</font> Unable to obtain Loadavg!</h1>
273 <p>On $when we were unable to obtain the Loadavg for
274 system <a href="$systemLink">$system</a>.</p>
276 <p>The following was the error message:</p>
281 <p>On $when on the system $system, we were unable to parse the Loadavg output. This is what we saw:</p>
285 $message .= join "\n", @output;
286 $message .= "</pre>";
287 $clearadm->Error($message, -1);
304 } # ProcessLoadAvgErrors
306 sub ProcessFilesystemErrors($$$$@) {
307 # TODO: Also need to handle the case where the error was something other
308 # than "Filesystem over threshold". Perhaps by having different return
310 my ($notification, $task, $system, $lastid, @output) = @_;
312 my $when = Today2SQLDatetime;
317 # We need to log this output. Write it to STDOUT
320 if (/System:\s*(\S+)\s*Filesystem:\s*(\S+)\s*Used:\s*(\d+\.\d+)%\s*Threshold:\s*(\d+)/) {
328 $system{$1} = [$system{$1}, \%fsinfo];
330 $system{$1} = \%fsinfo;
335 for my $systemName (keys %system) {
338 if (ref $system{$systemName} eq 'HASH') {
339 push @fsinfo, $system{$systemName};
341 push @fsinfo, @{$system{$systemName}};
344 my $systemLink = MakeSystemLink($systemName);
345 my $subject = 'Filesystem has exceeded threshold';
346 my $message = <<"END";
348 <h1><font color="red">Alert</font> Filesystem is over the threshold!</h1>
351 <p>On $when the following filesystems on <a href="$systemLink">$systemName</a>
352 were over their threshold.</p>
358 my $filesystemLink = $Clearadm::CLEAROPTS{CLEARADM_WEBBASE};
359 $filesystemLink .= "/plot.cgi?type=filesystem&system=$systemName";
360 $filesystemLink .= "&filesystem=$fsinfo{filesystem}";
361 $filesystemLink .= '&scaling=Day&points=7';
362 $message .= "<li>Filesystem <a href=\"$filesystemLink\">";
363 $message .= "$fsinfo{filesystem}</a> is $fsinfo{usedPct}% full. Threshold is ";
364 $message .= "$fsinfo{threshold}%</li>";
381 } # ProcessFilesystemErrors
383 sub NonZeroReturn($$$$$$) {
384 my ($system, $notification, $status, $lastid, $output, $task) = @_;
386 my @output = @{$output};
389 my $when = Today2SQLDatetime;
391 my $subject = "Non zero return from $task{command} "
392 . "executing on $system";
393 my $taskLink = $Clearadm::CLEAROPTS{CLEARADM_WEBBASE};
394 $taskLink .= "/tasks.cgi?task=$task{name}";
395 my $similarLink = $Clearadm::CLEAROPTS{CLEARADM_WEBBASE};
396 $similarLink .= "/runlog.cgi?system=$task{system}"
398 . "&task=$task{name}";
399 my $runlogLink = $Clearadm::CLEAROPTS{CLEARADM_WEBBASE};
400 $runlogLink .= "/runlog.cgi?id=$lastid";
401 my $message = <<"END";
403 <h1><font color="red">Alert</font> Non zero status from script execution!</h1>
406 <p>On $when, while executing <a href="$taskLink">$task{name}</a> on
407 $task{system}, a non zero status of $status was returned. Here is the resulting
408 output:</p><blockquote><pre>
411 $message .= join "\n", @output;
414 <p>You may wish to examine the individual <a href="$runlogLink">runlog entry</a>
415 that caused this alert or a list of <a href="$similarLink">similar
419 $message .= "</pre></blockquote>";
434 sub ExecuteTask($%) {
435 my ($sleep, %task) = @_;
437 my ($status, @output, %system, $subject, $message);
439 verbose_nolf "Performing task $task{name}";
441 my %notification = $clearadm->GetNotification ($task{notification});
443 my $startTime = time;
445 if ($task{system} =~ /localhost/i) {
446 verbose " on localhost";
447 ($status, @output) = Execute "$task{command} 2>&1";
449 %system = $clearadm->GetSystem ($task{system});
451 verbose " on $system{name}";
453 $status = $clearexec->connectToServer (
459 $output[0] = "Unable to connect to system $system{name}:$system{port} to "
460 . "execute $task{command}";
463 ($status, @output) = $clearexec->execute($task{command});
465 $output[0] = "Unable to exec $task{command} on $system{name}"
469 $clearexec->disconnectFromServer;
472 my $lastid = UpdateRunlog ($status, $startTime, \%task, \@output);
475 if ($notification{cond}
476 and $notification{cond} =~ /non zero return/i) {
485 } elsif ($notification{cond} =~ /loadavg over threshold/i) {
486 ProcessLoadavgErrors($notification{name}, $task{name}, $system{name}, $lastid, @output);
487 } elsif ($notification{cond} =~ /filesystem over threshold/i) {
488 ProcessFilesystemErrors($notification{name}, $task{name}, $system{name}, $lastid, @output);
491 $clearadm->ClearNotifications($task{system});
494 my ($err, $msg) = $clearadm->UpdateSchedule(
496 ( 'lastrunid' => $lastid ),
499 $clearadm->Error($msg, $err) if $err;
501 $sleep -= time - $startTime;
508 'usage' => sub { Usage },
509 'verbose' => sub { set_verbose },
510 'debug' => sub { set_debug },
511 'daemon!' => \$daemon,
512 'pidfile=s' => \$pidfile,
513 ) or Usage "Invalid parameter";
515 Usage 'Extraneous options: ' . join ' ', @ARGV if @ARGV;
517 EnterDaemonMode $logfile, $logfile, $pidfile if $daemon;
519 display "$FindBin::Script V$VERSION started at " . localtime;
521 $clearadm = Clearadm->new;
522 $clearexec = Clearexec->new;
524 $clearadm->SetNotify;
527 # First check in with all systems
530 my ($sleep, @workItems) = $clearadm->GetWork;
533 my %scheduledTask = %{$_};
535 $scheduledTask{system} ||= 'All systems';
537 if ($scheduledTask{system} =~ /all systems/i) {
538 for my $system ($clearadm->FindSystem) {
539 next if $$system{active} eq 'false';
541 $scheduledTask{system} = $$system{name};
542 $sleep = ExecuteTask $sleep, %scheduledTask;
545 $sleep = ExecuteTask $sleep, %scheduledTask;
550 verbose "Sleeping for $sleep seconds";
557 =head1 CONFIGURATION AND ENVIRONMENT
559 DEBUG: If set then $debug is set to this level.
561 VERBOSE: If set then $verbose is set to this level.
563 TRACE: If set then $trace is set to this level.
571 L<Getopt::Long|Getopt::Long>
573 =head2 ClearSCM Perl Modules
589 <a href="http://clearscm.com/php/scm_man.php?file=clearadm/lib/Clearadm.pm">Clearadm</a><br>
590 <a href="http://clearscm.com/php/scm_man.php?file=clearadm/lib/Clearexec.pm">Clearexec</a><br>
591 <a href="http://clearscm.com/php/scm_man.php?file=lib/DateUtils.pm">DateUtils</a><br>
592 <a href="http://clearscm.com/php/scm_man.php?file=lib/Display.pm">Display</a><br>
593 <a href="http://clearscm.com/php/scm_man.php?file=lib/TimeUtils.pm">TimeUtils</a><br>
594 <a href="http://clearscm.com/php/scm_man.php?file=lib/Utils.pm">Utils</a><br>
599 =head1 BUGS AND LIMITATIONS
601 There are no known bugs in this script
603 Please report problems to Andrew DeFaria <Andrew@ClearSCM.com>.
605 =head1 LICENSE AND COPYRIGHT
607 Copyright (c) 2010, ClearSCM, Inc. All rights reserved.