[hobbit] Re: Monitoring and ESX host

chris brown brown.crisn at gmail.com
Wed Jul 4 11:30:40 CEST 2007


I have written some checks for both esx 25x and esx3.

These should probably be re-written at some stage, just haven't taken the
time to yet, but I hope you get some ideas.

For ESX 3.0.1 and up you can take the OO approach with VMPerl  (See example
one) [it's very slow]. I say use 3.0.1 because although the sdk manual says
these methods are supported in esx 3.0.0 there seem to be some problems with
VmPerl that are fixed in 3.0.1.

Or you can do most of this stuff by parsing the /proc nodes (see example 2
and 3 below)

EX 1.
use VMware::VmPerl;
use VMware::VmPerl::VM;
use VMware::VmPerl::ConnectParams;
use strict;

my @list_of_cfg = </proc/vmware/vm/*/names>; foreach (@list_of_cfg) {
        my $file_info = `cat $_`;
        my @get_vmx_info = split(/\s+/,$file_info);
        my $cfg_path = $get_vmx_info[2];
        $cfg_path =~ s/cfgFile.//g;
        $cfg_path =~ s/\"//g;
        #print "$cfg_path\n"; }

        my $connect_params = VMware::VmPerl::ConnectParams::new;
        my $vm = VMware::VmPerl::VM::new();
        my $i;
        if (!$vm->connect($connect_params, $cfg_path)) {
        my ($error_number, $error_string) = $vm->get_last_error();
        undef $vm;
        die "Could not connect to vm: Error $error_number: $error_string\n";
        }

        my $name1 = $vm->get_config('displayName');
        my $mem = $vm->get_config('memsize');
        my $OS = $vm->get_config('guestOS');
        my $mem_active = $vm->get_resource('mem.active');
        my $mem_ovhd = $vm->get_resource('mem.overhd');
        my $cpu = $vm->get_resource('cpu.number');
        my $mem_swap_in = $vm->get_resource('mem.swapin');
        my $mem_shared = $vm->get_resource('mem.shared');


        print
"\n---------------------------------------------------------------
------------\n";
        print "\tMachine Name is: $name1\n";
        print "\tMachine has: $mem MB RAM\n";
        print "\tOS: $OS\n";
        print "\tNum cpu's: $cpu CPU\n";
        print "\t--------------------stats--------------------\n";
        for($i=0; $i<$cpu; $i++) {

        my $usedsec = $vm->get_resource("cpu.${i}.usedsec");

        print "\tUsed vmkernel cpu time for cpu${i}: $usedsec (sec)\n";

}
        print "\tMem swapped in: $mem_swap_in\n";
        print "\tMem active: $mem_active\n";
        print "\tMem overhead: $mem_ovhd\n";
        print "\tMem shared: $mem_shared\n"; }

EX2 This gets the swapped/swptgt from /proc/vmware/sched/mem This should
work for esx 25x and 3x. (haven't really experimented much though)

open (INPUT, $proc_vmware_mem) || die "Could not open the file
$proc_vmware_mem. Exiting";

my @proc_mem_details=<INPUT>;

close INPUT;
my $lastline = $#proc_mem_details;
$lastline--;

#print "LAST_LINE=$lastline\n";
#       The data we're concerned with is located between line 16 and last
line -1 my @vm_details=@proc_mem_details[16..$lastline];

foreach my $vm_instance (@vm_details)
{
        #       We now need to break down the fields for each instance
        #       Replace the space with a definable symbol
        $vm_instance=~s/\ +/-/g;
        $vm_instance=~s/\/-/:/g;
        $vm_instance=~s/\//:/g;
        $vm_instance=~s/:/-/g;
        my @vm_instance_details=split(/-/,$vm_instance);

        my $vm_id=$vm_instance_details[1];
        my $size=$vm_instance_details[5];
        my $sizetgt=$vm_instance_details[6];
        $sizetgt=~ s/\///g;
        my $memctl=$vm_instance_details[7];
        my $mctltgt=$vm_instance_details[8];
        $mctltgt=~ s/\///g;
        #       We will get the actual name of the vmware instance
referenced by vmid from
        my $vm_name_detail=`cat /proc/vmware/vm/$vm_id/names`;
        my @vm_name_details=split(/\ /,$vm_name_detail);
        my $vm_name=$vm_name_details[$#vm_name_details];
        $vm_name=~ s/\"//g;
        $vm_name=~ s/displayName\=//g;
        chomp $vm_name;

        my $percentage=($sizetgt/$size);
        $percentage=sprintf "%2d", 100 *$percentage;

        #       Now check the stats
        if ($percentage > $paging_threshold){
                print "ERROR\t-\tThe VM $vm_name is exceeding the allocated
paging threshold ($percentage\%/$paging_threshold\%)\n";
                $num_in_error++;
        }else{
                print "The VM $vm_name is within the allocated paging
threshold ($percentage\%/$paging_threshold\%)\n";

        }
}

if ($num_in_error gt 0){
        exit(255);
}else{
        exit(0);
}

EX3
This takes a look at the disk q length to the luns(and some other info),
sleeps for a defined interval, runs the check again and compares the values

#!/usr/bin/perl -w
use strict;

my @vmhba_ar = < /proc/vmware/scsi/vmhba*/?:* >; my (@outer_vmfs_contents,
@outer_vmhba_ar, @r1, @r2, @vmfs_subdirs); my @bigger = (\@r1, \@r2); my
$counter=0; my $check_int=2; my $threshold=3; my $num_in_error;

my $lastline_global = $#bigger;

my $dir = "/vmfs/volumes";
opendir(VMFS, $dir) || die "cant open $dir $!\n"; my @vmfs_contents =
readdir VMFS; close VMFS; chdir $dir;


foreach (@vmfs_contents) {
        if (-l $_ && !  m/^\./ ) {
        chomp;
        push @vmfs_subdirs, $_;
        my @smaller = `sudo /usr/sbin/vmkfstools -Ph $_`;
        push @outer_vmfs_contents, \@smaller; } }

foreach (@bigger) {
        $counter++;
        foreach (@vmhba_ar) {
                my @get_name=split(/scsi/,$_);
                my $name=$get_name[1];
                $name =~ s/\//:/g;
                $name =~ s/^\://g;

                open(INPUT, $_) || die "cant open $_ $!\n";
                my @temp=<INPUT>;
                close INPUT;
                my $lastline = $#temp;
                my $line_of_interest = $temp[$lastline];
                chomp $line_of_interest;
                if ($counter le $lastline_global) {
                push @r1, $line_of_interest; }
                else {
                push @r2, $line_of_interest; }

} if ($counter le $lastline_global) {
        sleep $check_int; }
}

#              This wi
grep {
        my @get_qd_r1=split(/\s+/,$r1[$_]);
        my $qd_r1=$get_qd_r1[3];
        my @get_qd_r2=split(/\s+/,$r2[$_]);
        my $qd_r2=$get_qd_r2[3];
        my $press = ($qd_r2 - $qd_r1);
        if ($press <= $threshold) {
                push @outer_vmhba_ar, $r2[$_]; }
        else { my $string = "WARNING - Q'D IO -> \t\t $r2[$_] \n" ;
                push @outer_vmhba_ar, $string;
       $num_in_error++ ; }
} 0..$#r1;

grep {
        print "\n", @{$outer_vmfs_contents[$_]},  $outer_vmhba_ar[$_],
"\n\n"; } 0..$#outer_vmfs_contents;

if ($num_in_error gt 0){
        exit(255);
}else{
        exit(0);
}


I'll get round to cleaning these up and making them available at some stage.
I've been very lazy though and just left them as is.

Hope you get some ideas from these.

Chris
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.xymon.com/pipermail/xymon/attachments/20070704/9c234ebb/attachment.html>


More information about the Xymon mailing list