[hobbit] MySQL replication status checker

Charles Goyard charles.goyard at orange-ftgroup.com
Tue Sep 25 09:49:46 CEST 2007


Hi,

Charles Jones wrote :
> I just got bit hard by a MySQL replication error, so I'm about to write 
> a Hobbit client-side ext script that will check the local servers slave 
> status (CHECK SLAVE STATUS \G), parse the output, and alert 
> appropriately (red if Slave Running = No, or if Seconds Behind Master 
> >300, yellow if Seconds Behind Master <300 but > 60...otherwise green).

I have one, and I believe there's one included in the perl script
package (already on the shire).

However, here's mine. It runs on the server side, so you need to give
access to a specific user.

There's an obvious improvement to do: the slave status shows on the
master line (column mysqlrep). It should show on the slave's line too,
and the master replication status should stay green if a slave is blue
(that is, a small dependency check).

Regards,

-- 
Charles Goyard - charles.goyard at orange-ftgroup.com - (+33) 1 45 38 01 31
Orange Business Services - online multimedia  // ingénierie
-------------- next part --------------
#!/usr/bin/perl -w
#
# mysql replication monitoring
# generic attempt

use strict;
#use Data::Dumper;
my $DEBUG       = 0;

$ENV{BBPROG}    = "bb-mysql-replication";
my $TESTNAME    = "mysqlrep";

my $BBHOME      = $ENV{BBHOME};
my $BB          = $ENV{BB};	# full path to the bin/bb util
my $BBDISP      = $ENV{BBDISP};	# IP of the BBDISPLAY server
my $BBVAR       = $ENV{BBVAR};
my $MACHINE     = $ENV{MACHINE}; # hostname, fqdn
my $COLOR       = "clear";	# global color for the test
my $MSG         = "";		# body of the message
my $HEAD        = ""; # first line of the message (has to be short, optional)
my $DATA        = "";		# data for NCV records (hobbit only)

if ($DEBUG == 1) {
	$BBHOME  = "/tmp" unless $BBHOME;
	$BBDISP  = "127.0.0.1" unless $BBDISP;
	$BBVAR   = "/tmp" unless $BBVAR;
	$MACHINE = "host.priv.com" unless $MACHINE;
}

# The Fine Manual :
sub clear;			# sets status color to clear
sub green;			# ~ to green
sub yellow;			# ~ to yellow
sub red;			# ~ to red
sub setcolor; # safely sets status color                       # setcolor("yellow")
sub head; # sets the first status line                     # head("foo OK")
sub msg; # adds text to the body of the status message    # msg("foo" [, "bar", ...])
sub data; # adds NCV-formatted data to the status message  # data(ds, value)
sub sendreport;	# sends the report to the BB server              # sendreport()
sub resetreport; # wipes the report (head, body, data, color)     # resetreport()

# You can always use $MSG, $HEAD, $DATA and $COLOR directly.

########
# Put your code here
########

# What you need :
# on the remote masters and slaves, create:
# for mysql 4.x : grant replication client on *.* to 'user'@'monitorhost' identified by 'password';
#                 grant select on test.* to 'user'@'monitorhost' identified by 'password';
# for mysql 3.x : grant process on *.*   to 'user'@'monitorhost' identified by 'password';
#                 grant select on test.* to 'user'@'monitorhost' identified by 'password';
#

use DBI;

my $ERR;
my $legalERR = "[^a-zA-Z0-9.:,_ -]";
my ($status, $maitreSql, @slaves, $esclaveSql, $login, $pass, %auth, $node);
my ($positionMaitre_t1, $positionMaitre_t2, $positionEsclave, $slaveRunning, $error);
my $conffile="/usr2/hobbitlocal/etc/bb-mysql-replication.cfg";
my $OKOFFSET = 100;
readConfig();
foreach $node ( keys %auth ) {
	msg "Node: $node\n";
	green;			# let's be optimit ;)

	$maitreSql  = $auth{$node}{master};
	@slaves     = keys %{ $auth{$node}{slaves} };
	$login      = $auth{$node}{user};
	$pass       = $auth{$node}{pass};
	$MACHINE    = $maitreSql;

	$positionMaitre_t1 = getSqlMasterStatus($maitreSql,$login,$pass,"test");
	if (not defined $positionMaitre_t1) {
                $ERR =~ s/$legalERR/ /go;
		msg(sprintf("master ERROR                    (%s) &red %s", $maitreSql, $ERR));
		head "mirroring FAILURE";
		sendreport;
		resetreport;
		next;
	}
	sleep 2;
	foreach $esclaveSql (@slaves) {
		($positionEsclave, $slaveRunning,$error) = getSqlSlaveStatus($esclaveSql,$login,$pass,"test");
		next if not defined $positionEsclave;
		$auth{$node}{slaves}{$esclaveSql}{pos} = $positionEsclave;
		$auth{$node}{slaves}{$esclaveSql}{running} = $slaveRunning;
	}
	sleep 2;
	# on reprend la position sur le maître
	$positionMaitre_t2 = getSqlMasterStatus($maitreSql,$login,$pass,"test");
	if (not defined $positionMaitre_t2) {
		head "mirroring FAILURE";
                $ERR =~ s/$legalERR/ /go;
		msg(sprintf("master ERROR                    (%s) &red %s", $maitreSql, $ERR));
		sendreport;
		resetreport;
		next;
	}
	msg(sprintf("master T1      at % 40s  (%s)", $positionMaitre_t1, $maitreSql));

	foreach $esclaveSql (@slaves) {
		$positionEsclave = $auth{$node}{slaves}{$esclaveSql}{pos};
		$slaveRunning    = $auth{$node}{slaves}{$esclaveSql}{running};
		if (not defined $positionEsclave) {
			$ERR =~ s/$legalERR/ /go;
			msg "slave  ERROR                    ($esclaveSql) &red $ERR";
			next;
		} elsif ($slaveRunning eq "Yes") {
			msg "slave  RUNNING                                              ($esclaveSql)";
			if ( ($positionMaitre_t1 le $positionEsclave) and
			     ($positionEsclave le $positionMaitre_t2) ) {
				msg(sprintf("slave  ON TIME at % 40s  (%s)", $positionEsclave, $esclaveSql));
			} else {
				my($masterfile, $masteroffset, $slavefile, $slaveoffset);
				($masterfile, $masteroffset) = split(/:/, $positionMaitre_t1, 2);
				($slavefile, $slaveoffset) = split(/:/, $positionEsclave, 2);
				msg(sprintf("slave  LATE    at % 40s  (%s) &yellow", $positionEsclave, $esclaveSql));
				if($masterfile eq $slavefile) {
					if ($masteroffset - $slaveoffset < $OKOFFSET) {
						green;
					}
					else {
						yellow;
					}
				}
				else {
					$masterfile =~ s/.+\.(\d+)$/$1/;
					$slavefile  =~ s/.+\.(\d+)$/$1/;
					if($masterfile - $slavefile != 1) {
						yellow;
					}
					elsif($masteroffset < $OKOFFSET) {
						green;
					}
					else {
						yellow;
					}
				}
			}
		} else {
			msg "slave  STOPPED                                              ($esclaveSql) &red";
			red;
		}
		if($error ne "") {
			red;
			$error =~ s/$legalERR/ /go;
			msg "slave  ERROR    $error";
		}
	}
	msg(sprintf("master T2      at % 40s  (%s)", $positionMaitre_t2, $maitreSql));

	if ($COLOR eq "green") {head "Mirroring OK"} else {head "Mirroring FAILURE"}
	sendreport;
	resetreport;
}

exit 0;

###############
###############


########################

# conffile has format :
# node;masterhost;slavehost1[,slavehost2,...];user;pass
# (so it works in multiple slave setups)
# example:
# authcluster;auth-sql01.priv.com;auth-sql02.priv.com;monitor;coUIc
# radius;rad-sql01.priv.com;rad-repsql01.priv.com,rad-backup01.priv.com;monitor;coUIc
# add comment lines with '#'
sub readConfig
{
	my ($node, $master, $slaves, $user, $pass, @conf, $line);
	if (! open(CONF, "$conffile")) {
		head("monitoring error");
		msg("Erreur à l'ouverture de $conffile : $!");
		red;
		sendreport;
		exit 1;
	}
	@conf = <CONF>;
	close(CONF);
	foreach $line (@conf) {
		if (substr($line, 0, 1) eq "#") { next }
		elsif (substr($line, 0, 1) eq "\n") { next }
		else {
			($node, $master, $slaves, $user, $pass) = split(/\s*;\s*/, $line, 5);
			if ($pass) {
				chomp $pass;
				$auth{$node}{master} = $master;
				foreach my $slave (split(",", $slaves)) {
					$auth{$node}{slaves}{$slave}{pos} = undef;
					$auth{$node}{slaves}{$slave}{running} = "not tested";
				}
				$auth{$node}{user}   = $user;
				$auth{$node}{pass}   = $pass;
			} else {
				msg("&red invalid config : $line");
				red;
			}
		}
	}
	#print Dumper \%auth;
}


# Retourne les infos relatives au sql maitre
# <- (Position);
sub getSqlMasterStatus
{
	my ($host,$user,$pass,$base) = @_;
	my $port;
	($host, $port) = split(/:/, $host, 2);
	if(not defined $port) {
		$port = 3306;
	}
	my $dbLink = DBI->connect("DBI:mysql:$base:$host:$port:mysql_connect_timeout=10","$user","$pass");
	if (! $dbLink) {
		$ERR = $DBI::errstr;
		red;
		return (undef);
	}
	my $query = $dbLink->prepare("SHOW MASTER STATUS");
	if (! $query->execute()) {
		$ERR = $DBI::errstr;
		red;
		return (undef);
	}
	my @results = $query->fetchrow_array;
	return "$results[0]:$results[1]";
}


# Retourne les infos relatives au sql slave

# <- (Exec_Master_Log_Pos, Slave_IO_Running&&Slave_SQL_Running)
sub getSqlSlaveStatus
{
	my ($host,$user,$pass,$base) = @_;
	my ($port, $slaverunning);
	($host, $port) = split(/:/, $host, 2);
	if(not defined $port) {
		$port = 3306;
	}
	my $dbLink = DBI->connect("DBI:mysql:$base:$host:$port:mysql_connect_timeout=10","$user","$pass");
	if (!$dbLink) {
		red;
		$ERR = $DBI::errstr;
		return (undef);
	}
	my $query = $dbLink->prepare("SHOW SLAVE STATUS");
	$query->execute();
	if (! $query->execute()) {
		$ERR = $DBI::errstr;
		red;
		return (undef);
	}
	my @results = $query->fetchrow_array;
	## sloppy but works :-/
	## mysql 3.23
	if ((scalar @results) == 12) {
		return ("$results[4]:$results[5]",$results[6], $results[10]);
	}
	## mysql 4.0
	elsif ((scalar @results) == 18) {
		if($results[9] eq "Yes" and $results[10] eq "Yes") {
			$slaverunning = "Yes"
		}
		else {
			$slaverunning = "No";
		}
		return ("$results[8]:$results[16]", $slaverunning, $results[14]);

	}
	## mysql 4.1
	elsif ((scalar @results) == 33) {
		if($results[10] eq "Yes" and $results[11] eq "Yes") {
			$slaverunning = "Yes"
		}
		else {
			$slaverunning = "No"
		}
		return ("$results[9]:$results[21]", $slaverunning, $results[19]);
	}
	## mysql x.q ?
	else {
		red;
		msg("Unsupported mysql version");
		return (undef, "unknown");
	}
}


###################
###################

####
# send the report
####

sub sendreport
{
	$MACHINE =~ s/\./,/g;
	my $date = localtime;
	my $cmd = "$BB $BBDISP \"status $MACHINE.$TESTNAME $COLOR $date $HEAD\n$DATA\n$MSG\"";
	if($DEBUG == 1) {
		print "$cmd\n";
	}
	else {
		system($cmd);
	}
}

sub resetreport
{
	$MSG = $DATA = $HEAD = '';
	$COLOR = 'clear';
}

# sets the global color of the test
# prevents downgrading severity
# clear == green < yellow < red
sub setcolor
{
	my $newcolor = shift;
	if ($newcolor eq "red") {
		$COLOR = "red";
	} elsif ($COLOR eq "green" or $COLOR eq "clear") {
		$COLOR = "$newcolor";
	}

	return $COLOR;
}
sub clear  { setcolor 'clear'  }
sub green  { setcolor 'green'  }
sub yellow { setcolor 'yellow' }
sub red    { setcolor 'red'    }


sub data
{
	my ($n, $v) = @_;
	$DATA .= "$n: $v\n";
}

sub head
{
	$HEAD = "@_";
}

sub msg
{
	$MSG .= join("\n", @_) . "\n";
}


More information about the Xymon mailing list