[hobbit] some larrd issues on hobbit 4.0.3 rc1

Henrik Stoerner henrik at hswn.dk
Sun May 1 09:11:42 CEST 2005


On Sat, Apr 30, 2005 at 06:22:56PM -0500, Kauffman, Tom wrote:
> These vmstat rrds are from back on larrd 42; just after the change to
> accumulate cpu wait. So I'm trying the vmstat recreate to see if the
> definitions I've got are severely non-standard (I'd almost bet on it)
> 

Use the "rrdtool dump FILENAME.rrd" to dump the old data into a
text file (XML) format. When you look at this file, at the top you'll 
find the data-sets definitions that LARRD has setup in this RRD file;
these come from the "aix" definition in the old LARRD vmstat-larrd.pl
script. So there should be (in sequence): cpu_r, cpu_b, mem_avm,
mem_free, mem_re, mem_pi, mem_po, mem_fr, sr, mem_cy, cpu_int, cpu_syc,
cpu_csw, cpu_usr, cpu_sys, cpu_idl, cpu_wait - at least, that's what 
Hobbit would generate, and therefore it assumes this layout when
updating the RRD-file.

Since the files are being updated by Hobbit, but the data collected is
wrong, my guess is that you have these in a different sequence than
Hobbit expects.

There are two way of tackling that problem. 

One way is to change the Hobbit layout to match your current RRD files.
This layout is defined in the hobbit-4.0.3rc1/hobbitd/larrd/do_vmstat.c
file - just look for "aix" and you'll see it. Only problem with this is
that you'll need to repeat this change whenever you upgrade Hobbit.

The other way is to modify the dumped RRD-file, then use "rrdtool restore" 
to convert the modified XML-file back to an RRD file.

You need to change the sequence of the dataset definitions at the
beginning of the file, and also change each of the data "rows" that make
up the bulk of the file. These look like this:

<!-- 2005-05-01 02:00:00 CEST / 1114905600 --><row><v> 1.5896990741e-01 </v><v>2.1686840278e+02 </v><v> 9.5610891204e+01 </v><v> 3.5725331019e+02</v><v> 1.0420138889e-01 </v><v>8.3974537037e-01 </v><v> 3.3892245370e+00</v><v> 3.3494723380e+02 </v><v>9.9369259259e+01 </v><v> 1.0934771532e+05</v><v> 3.8053798435e+05 </v><v> 8.1690244444e+03 </v><v> 2.7122800926e+00 </v><v>1.2084837963e+00 </v><v> 2.1852577870e+05</v></row>

Each of the "<v> VALUE </v>" appear in the sequence that the datasets
are defined. So you must swap values around to match the new layout.


> On the disk space rrds -- this is a lot of wasted activity for us; we
> have about 8 filesystems we care about, and my production R3 DB server
> currently has 95 filesystems that have been 100% full since creation --
> and we add another 10 every 13 months (150 GB -- SAP just *eats* disk).

I see - perhaps something like the attached patch could be used. With
this, you can setup two environment variables that are regexp patterns
that the filesystem name is matched against before they get graphed;
NORRDDISKS is an "exclude" pattern - any filesystem name matching this 
do not get a graph, RDDISKS is an "include" pattern - only filesystem 
names matching this pattern get graphed. You can use none of them (the
current behaviour), one of them or both.

E.g. if all of your SAP filesystems are mounted below "/sap", you would
just put
  NORRDDISKS="^/sap"
in hobbitserver.cfg, and they won't get graphed.

This doesn't affect any of the RRD files that have already been created,
so you must manually clean out the unwanted disk*.rrd files from the
~hobbit/data/rrd/HOSTNAME/ directory to get rid of the graphs you don't
want.


Henrik

-------------- next part --------------
--- hobbitd/larrd/do_disk.c	2005/04/10 11:09:06	1.22
+++ hobbitd/larrd/do_disk.c	2005/05/01 06:57:19
@@ -13,12 +13,32 @@
 static char *disk_params[] = { "rrdcreate", rrdfn, "DS:pct:GAUGE:600:0:100", "DS:used:GAUGE:600:0:U", 
 				rra1, rra2, rra3, rra4, NULL };
 
-/* This is ported almost directly from disk-larrd.pl */
 
 int do_disk_larrd(char *hostname, char *testname, char *msg, time_t tstamp)
 {
 	enum { DT_IRIX, DT_AS400, DT_NT, DT_UNIX, DT_NETAPP, DT_NETWARE } dsystype;
 	char *eoln, *curline;
+	static int ptnsetup = 0;
+	static pcre *inclpattern = NULL;
+	static pcre *exclpattern = NULL;
+
+	if (!ptnsetup) {
+		const char *errmsg;
+		int errofs;
+		char *ptn;
+
+		ptnsetup = 1;
+		ptn = getenv("RRDDISKS");
+		if (ptn) {
+			inclpattern = pcre_compile(ptn, PCRE_CASELESS, &errmsg, &errofs, NULL);
+			if (!inclpattern) errprintf("PCRE compile of RRDDISKS='%s' failed\n", ptn);
+		}
+		ptn = getenv("NORRDDISKS");
+		if (ptn) {
+			exclpattern = pcre_compile(ptn, PCRE_CASELESS, &errmsg, &errofs, NULL);
+			if (!inclpattern) errprintf("PCRE compile of NORRDDISKS='%s' failed\n", ptn);
+		}
+	}
 
 	if (strstr(msg, " xfs ") || strstr(msg, " efs ") || strstr(msg, " cxfs ")) dsystype = DT_IRIX;
 	else if (strstr(msg, "DASD")) dsystype = DT_AS400;
@@ -34,6 +54,7 @@
 		int columncount;
 		char *diskname = NULL;
 		int pused = -1;
+		int wanteddisk = 1;
 		unsigned long long aused = 0;
 
 		eoln = strchr(curline, '\n'); if (eoln) *eoln = '\0';
@@ -64,13 +85,12 @@
 		switch (dsystype) {
 		  case DT_IRIX:
 			diskname = xstrdup(columns[6]);
-			p = diskname; while ((p = strchr(p, '/')) != NULL) { *p = ','; }
 			p = strchr(columns[5], '%'); if (p) *p = ' ';
 			pused = atoi(columns[5]);
 			aused = atoi(columns[3]);
 			break;
 		  case DT_AS400:
-			diskname = xstrdup(",DASD");
+			diskname = xstrdup("/DASD");
 			p = strchr(columns[columncount-1], '%'); if (p) *p = ' ';
 			/* 
 			 * Yikes ... the format of this line varies depending on the color.
@@ -88,21 +108,19 @@
 			break;
 		  case DT_NT:
 			diskname = xmalloc(strlen(columns[0])+2);
-			sprintf(diskname, ",%s", columns[0]);
+			sprintf(diskname, "/%s", columns[0]);
 			p = strchr(columns[4], '%'); if (p) *p = ' ';
 			pused = atoi(columns[4]);
 			aused = atoi(columns[2]);
 			break;
 		  case DT_UNIX:
 			diskname = xstrdup(columns[5]);
-			p = diskname; while ((p = strchr(p, '/')) != NULL) { *p = ','; }
 			p = strchr(columns[4], '%'); if (p) *p = ' ';
 			pused = atoi(columns[4]);
 			aused = atoi(columns[2]);
 			break;
 		  case DT_NETAPP:
 			diskname = xstrdup(columns[1]);
-			p = diskname; while ((p = strchr(p, '/')) != NULL) { *p = ','; }
 			pused = atoi(columns[5]);
 			p = columns[3] + strspn(columns[3], "0123456789");
 			aused = atoll(columns[3]);
@@ -113,13 +131,34 @@
 			break;
 		  case DT_NETWARE:
 			diskname = xstrdup(columns[1]);
-			p = diskname; while ((p = strchr(p, '/')) != NULL) { *p = ','; }
 			aused = atoll(columns[3]);
 			pused = atoi(columns[7]);
 			break;
 		}
 
-		if (diskname && (pused != -1)) {
+		/* Check include/exclude patterns */
+		wanteddisk = 1;
+		if (exclpattern) {
+			int ovector[30];
+			int result;
+
+			result = pcre_exec(exclpattern, NULL, diskname, strlen(diskname), 
+					   0, 0, ovector, (sizeof(ovector)/sizeof(int)));
+
+			wanteddisk = (result < 0);
+		}
+		if (wanteddisk && inclpattern) {
+			int ovector[30];
+			int result;
+
+			result = pcre_exec(inclpattern, NULL, diskname, strlen(diskname), 
+					   0, 0, ovector, (sizeof(ovector)/sizeof(int)));
+
+			wanteddisk = (result >= 0);
+		}
+
+		if (wanteddisk && diskname && (pused != -1)) {
+			p = diskname; while ((p = strchr(p, '/')) != NULL) { *p = ','; }
 			if (strcmp(diskname, ",") == 0) {
 				diskname = xrealloc(diskname, 6);
 				strcpy(diskname, ",root");


More information about the Xymon mailing list