[hobbit] hist files

Henrik Stoerner henrik at hswn.dk
Mon Jul 31 17:52:30 CEST 2006


On Mon, Jul 31, 2006 at 10:55:50AM -0400, Michael Frey wrote:

[snip]

ok, the attached patch should let you read the history file again.
It cannot bring back the events that were lost, of course - but at least
you should be able to view the history.

> It also all happened at the same time, and not for all hosts or tests, 
> although conn seems to be the hardest hit.  Every host I have looked at, 
> had at least one Hostory graph failure; except for any new host eneterd 
> after July 23rd.

I cannot see how that could happen, but just in case the patch modifies
the history module to not log anything that doesn't have a valid color
code.

The time when this happened, would it happen to concide with any
cron-jobs, disk intensive activity, or anything else that might cause
file operations to fail or go slow ?


Regards,
Henrik

-------------- next part --------------
--- lib/availability.c	2006/07/20 16:06:41	1.42
+++ lib/availability.c	2006/07/31 15:28:39
@@ -219,6 +219,44 @@
 	return strdup(cause);
 }
 
+static char *get_historyline(char *buf, int bufsize, FILE *fd, int *err,
+			     char *colstr, unsigned int *start, unsigned int *duration, int *scanres)
+{
+	int ok;
+
+	do {
+		ok = 1;
+
+		if (fgets(buf, bufsize, fd) == NULL) {
+			return NULL;
+		}
+
+		if (strlen(buf) < 25) {
+			ok = 0;
+			*err += 1;
+			dbgprintf("Bad history line (short): %s\n", buf);
+			continue;
+		}
+
+		*scanres = sscanf(buf+25, "%s %u %u", colstr, start, duration);
+		if (*scanres < 2) {
+			ok = 0;
+			*err += 1;
+			dbgprintf("Bad history line (missing items): %s\n", buf);
+			continue;
+		}
+
+		if (parse_color(colstr) == -1) {
+			ok = 0;
+			*err += 1;
+			dbgprintf("Bad history line (bad color string): %s\n", buf);
+			continue;
+		}
+	} while (!ok);
+
+	return buf;
+}
+
 static int scan_historyfile(FILE *fd, time_t fromtime, time_t totime,
 		char *buf, size_t bufsize, 
 		time_t *starttime, time_t *duration, char *colstr)
@@ -235,9 +273,14 @@
 
 	/* Is start of history after our report-end time ? */
 	rewind(fd);
-	fgets(buf, bufsize, fd);
-	if (sscanf(buf+25, "%s %u %u", colstr, &uistart, &uidur) == 2) 
-		uidur = time(NULL)-uistart;
+	if (!get_historyline(buf, bufsize, fd, &err, colstr, &uistart, &uidur, &scanres)) {
+		*starttime = time(NULL);
+		*duration = 0;
+		strcpy(colstr, "clear");
+		return err;
+	}
+
+	if (scanres == 2) uidur = time(NULL)-uistart;
 	start = uistart; dur = uidur;
 
 	if (start > totime) {
@@ -249,8 +292,7 @@
 
 	/* First, do a quick scan through the file to find the approximate position where we should start */
 	while ((start+dur) < fromtime) {
-		if (fgets(buf, bufsize, fd)) {
-			scanres = sscanf(buf+25, "%s %u %u", colstr, &uistart, &uidur);
+		if (get_historyline(buf, bufsize, fd, &err, colstr, &uistart, &uidur, &scanres)) {
 			start = uistart; dur = uidur;
 			if (scanres == 2) dur = time(NULL) - start;
 
@@ -262,11 +304,6 @@
 					fgets(buf, bufsize, fd); /* Skip partial line */
 				}
 			}
-			else {
-				err++;
-				dbgprintf("Bad line in history file '%s'\n", buf);
-				start = dur = 0; /* Try next line */
-			}
 		}
 		else {
 			start = time(NULL);
@@ -284,19 +321,11 @@
 
 	/* Read one line at a time until we hit start of our report period */
 	do {
-		if (fgets(buf, bufsize, fd)) {
-			scanres = sscanf(buf+25, "%s %u %u", colstr, &uistart, &uidur);
+		if (get_historyline(buf, bufsize, fd, &err, colstr, &uistart, &uidur, &scanres)) {
 			start = uistart; dur = uidur;
 			if (scanres == 2) dur = time(NULL) - start;
 
-			if (scanres < 2) {
-				err++;
-				dbgprintf("Bad line in history file '%s'\n", buf);
-				start = dur = 0; /* Try next line */
-			}
-			else {
-				dbgprintf("Got entry starting %lu lasting %lu\n", start, dur);
-			}
+			dbgprintf("Got entry starting %lu lasting %lu\n", start, dur);
 		}
 		else {
 			start = time(NULL);
@@ -350,7 +379,7 @@
 	unsigned int uistart, uidur;
 	char colstr[MAX_LINE_LEN];
 	int color, done, i, scanres;
-	int fileerrors;
+	int fileerrors = 0;
 
 	repinfo->fstate = "OK";
 	repinfo->withreport = 0;
@@ -377,11 +406,15 @@
 	}
 	else {
 		/* Already positioned (probably in a pipe) */
-		fgets(l, sizeof(l), fd);
-		scanres = sscanf(l+25, "%s %u %u", colstr, &uistart, &uidur);
-		starttime = uistart; duration = uidur;
-		if (scanres == 2) duration = time(NULL) - starttime;
-		fileerrors = 0;
+		if (get_historyline(l, sizeof(l), fd, &fileerrors, colstr, &uistart, &uidur, &scanres)) {
+			starttime = uistart; duration = uidur;
+			if (scanres == 2) duration = time(NULL) - starttime;
+		}
+		else {
+			starttime = time(NULL); duration = 0;
+			strcpy(colstr, "clear");
+			fileerrors = 1;
+		}
 	}
 
 	if (starttime > totime) {
@@ -437,8 +470,7 @@
 		}
 
 		if ((starttime + duration) < totime) {
-			if (fgets(l, sizeof(l), fd)) {
-				scanres = sscanf(l+25, "%s %u %u", colstr, &uistart, &uidur);
+			if (get_historyline(l, sizeof(l), fd, &fileerrors, colstr, &uistart, &uidur, &scanres)) {
 				starttime = uistart; duration = uidur;
 				if (scanres == 2) duration = time(NULL) - starttime;
 			}
--- hobbitd/hobbitd_history.c	2006/05/25 21:04:44	1.46
+++ hobbitd/hobbitd_history.c	2006/07/31 15:44:15
@@ -167,6 +167,11 @@
 			downtimeactive = (atoi(items[12]) > 0);
 			clienttstamp = atoi(items[13]);
 
+			if (newcolor == -1) {
+				errprintf("Bad message: newcolor is unknown '%s'\n", items[7]);
+				continue;
+			}
+
 			p = hostnamecommas = strdup(hostname); while ((p = strchr(p, '.')) != NULL) *p = ',';
 
 			if (save_statusevents) {
@@ -224,7 +229,8 @@
 							/* Sun Oct 10 06:49:42 2004 red   1097383782 602 */
 
 							if ((strlen(l) > 24) && 
-							    (sscanf(l+24, " %s %d %d", oldcol, &lastchg_i, &dur_i) == 2)) {
+							    (sscanf(l+24, " %s %d %d", oldcol, &lastchg_i, &dur_i) == 2) &&
+							    (parse_color(oldcol) != -1)) {
 								/* 
 								 * Record the start location of the line
 								 */
@@ -262,7 +268,11 @@
 					 * Logfile does not exist.
 					 */
 					lastchg = tstamp;
-					statuslogfd = fopen(statuslogfn, "w");
+					statuslogfd = fopen(statuslogfn, "a");
+					if (statuslogfd == NULL) {
+						errprintf("Cannot open status historyfile '%s' : %s\n", 
+							statuslogfn, strerror(errno));
+					}
 				}
 
 				if (strcmp(oldcol, colorname(newcolor)) == 0) {
@@ -300,10 +310,6 @@
 
 					fclose(statuslogfd);
 				}
-				else {
-					errprintf("Cannot open status historyfile '%s' : %s\n", 
-						statuslogfn, strerror(errno));
-				}
 
 				MEMUNDEFINE(statuslogfn);
 				MEMUNDEFINE(oldcol);


More information about the Xymon mailing list