[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [hobbit] Two DNS lookups for a server but one fails
- To: hobbit (at) hswn.dk
- Subject: Re: [hobbit] Two DNS lookups for a server but one fails
- From: Henrik =?unknown-8bit?q?St=C3=B8rner?= <henrik (at) hswn.dk>
- Date: Wed, 7 Jan 2009 14:29:48 +0100
- References: <1F7B01020EC4D04DA17703634B9E888E09BFE6A3 (at) ULPGCTMVMAI003.EU.COLT>
- User-agent: Mutt/1.5.18 (2008-05-17)
Hi Martin,
On Mon, Jan 05, 2009 at 01:58:56PM -0000, Ward, Martin wrote:
> *** DNS lookup of 'a:smtp.server.com' ***
> Timeout (channel destroyed)
>
> In this instance it was the A record that failed but in others it is the
> NS record. I always get one of the queries back successfully, but not
> both.
>
> These were working fine until I upgraded to Xymon 4.2.2 so this looks
> like the culprit. Any ideas or suggestions?
there was a change done in 4.2.2 - backported from the 4.3.x code - to
fix a bug that could cause the network tests to lockup while doing the
DNS lookups. It is probably that "fix" that causes the problem.
Going over the DNS code again, I think there's some flawed logic in
how it handles the lookups. Could you try the attached version of
xymon-4.2.2/bbnet/dns.c ? Just copy it on top of the existing one,
then run "make" and copy the resulting xymon-4.2.2/bbnet/bbtest-net
binary to your ~xymon/server/bin/ directory (save the existing one
just in case this completely breaks stuff).
Let me know if that is better.
Regards,
Henrik
/*----------------------------------------------------------------------------*/
/* Hobbit monitor network test tool. */
/* */
/* Copyright (C) 2004-2006 Henrik Storner <henrik (at) hswn.dk> */
/* */
/* This program is released under the GNU General Public License (GPL), */
/* version 2. See the file "COPYING" for details. */
/* */
/*----------------------------------------------------------------------------*/
static char rcsid[] = "$Id: dns.c,v 1.31 2006-08-03 06:25:49 henrik Exp $";
#include <unistd.h>
#include <string.h>
#include <stdlib.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <arpa/nameser.h>
#include <netdb.h>
#include <sys/time.h>
#include "libbbgen.h"
#include <ares.h>
#include <ares_version.h>
#include "dns.h"
#include "dns2.h"
#ifdef HPUX
/* Doesn't have hstrerror */
char *hstrerror(int err) { return ""; }
#endif
static ares_channel stdchannel;
static int stdchannelactive = 0;
int use_ares_lookup = 1;
int dns_stats_total = 0;
int dns_stats_success = 0;
int dns_stats_failed = 0;
int dns_stats_lookups = 0;
int dnstimeout = 30;
static int pending_dns_count = 0;
int max_dns_per_run = 500;
FILE *dnsfaillog = NULL;
static void dns_queue_run(ares_channel channel);
typedef struct dnsitem_t {
char *name;
struct in_addr addr;
struct dnsitem_t *next;
int failed;
struct timeval resolvetime;
} dnsitem_t;
static RbtHandle dnscache;
static int dnscachepreped = 0;
static void prepare_dnscache(void)
{
if (dnscachepreped) return;
dnscache = rbtNew(name_compare);
dnscachepreped = 1;
}
static char *find_dnscache(char *hostname)
{
struct in_addr inp;
RbtIterator handle;
dnsitem_t *dnsc;
if (!dnscachepreped) prepare_dnscache();
if (inet_aton(hostname, &inp) != 0) {
/* It is an IP, so just use that */
return hostname;
}
/* In the cache ? */
handle = rbtFind(dnscache, hostname);
if (handle == rbtEnd(dnscache)) return NULL;
dnsc = (dnsitem_t *)gettreeitem(dnscache, handle);
return inet_ntoa(dnsc->addr);
}
#if (ARES_VERSION_MAJOR > 1)
#error "Unsupported C-ARES version"
#else
#if (ARES_VERSION_MINOR > 4)
static void dns_callback(void *arg, int status, int timeout, struct hostent *hent)
#else
static void dns_callback(void *arg, int status, struct hostent *hent)
#endif
#endif
{
dnsitem_t *dnsc = (dnsitem_t *) arg;
struct timeval etime;
struct timezone tz;
gettimeofday(&etime, &tz);
tvdiff(&dnsc->resolvetime, &etime, &dnsc->resolvetime);
pending_dns_count--;
if (status == ARES_SUCCESS) {
memcpy(&dnsc->addr, *(hent->h_addr_list), sizeof(dnsc->addr));
dbgprintf("Got DNS result for host %s : %s\n", dnsc->name, inet_ntoa(dnsc->addr));
if (stdchannelactive) dns_stats_success++;
}
else {
memset(&dnsc->addr, 0, sizeof(dnsc->addr));
dbgprintf("DNS lookup failed for %s - status %s (%d)\n", dnsc->name, ares_strerror(status), status);
dnsc->failed = 1;
if (stdchannelactive) {
if (dnsfaillog) {
fprintf(dnsfaillog, "DNS lookup failed for %s - status %s (%d)\n",
dnsc->name, ares_strerror(status), status);
}
dns_stats_failed++;
}
}
}
void add_host_to_dns_queue(char *hostname)
{
struct timezone tz;
if (!dnscachepreped) prepare_dnscache();
if (stdchannelactive && (pending_dns_count >= max_dns_per_run)) {
dns_queue_run(stdchannel);
}
if (find_dnscache(hostname) == NULL) {
/* New hostname */
dnsitem_t *dnsc = (dnsitem_t *)calloc(1, sizeof(dnsitem_t));
dbgprintf("Adding hostname '%s' to resolver queue\n", hostname);
pending_dns_count++;
if (use_ares_lookup && !stdchannelactive) {
int status;
status = ares_init(&stdchannel);
if (status == ARES_SUCCESS) {
stdchannelactive = 1;
}
else {
errprintf("Cannot initialize ARES resolver, using standard\n");
errprintf("ARES error was: '%s'\n", ares_strerror(status));
use_ares_lookup = 0;
}
}
dnsc->name = strdup(hostname);
gettimeofday(&dnsc->resolvetime, &tz);
rbtInsert(dnscache, dnsc->name, dnsc);
if (use_ares_lookup) {
ares_gethostbyname(stdchannel, hostname, AF_INET, dns_callback, dnsc);
}
else {
struct hostent *hent;
int status;
hent = gethostbyname(hostname);
if (hent) {
status = ARES_SUCCESS;
dns_stats_success++;
}
else {
status = ARES_ENOTFOUND;
dns_stats_failed++;
dbgprintf("gethostbyname() failed with err %d: %s\n", h_errno, hstrerror(h_errno));
if (dnsfaillog) {
fprintf(dnsfaillog, "Hostname lookup failed for %s - status %s (%d)\n",
hostname, hstrerror(h_errno), h_errno);
}
}
#if (ARES_VERSION_MAJOR > 1)
#error "Unsupported C-ARES version"
#else
#if (ARES_VERSION_MINOR > 4)
dns_callback(dnsc, status, 0, hent);
#else
dns_callback(dnsc, status, hent);
#endif
#endif
}
dns_stats_total++;
}
}
void add_url_to_dns_queue(char *url)
{
bburl_t bburl;
if (!dnscachepreped) prepare_dnscache();
decode_url(url, &bburl);
if (bburl.proxyurl) {
if (bburl.proxyurl->parseerror) return;
add_host_to_dns_queue(bburl.proxyurl->host);
}
else {
if (bburl.desturl->parseerror) return;
add_host_to_dns_queue(bburl.desturl->host);
}
}
static void dns_queue_run(ares_channel channel)
{
int nfds, selres;
fd_set read_fds, write_fds;
struct timeval *tvp, tv;
struct timeval cutoff, now;
struct timezone tz;
if (pending_dns_count == 0) return;
dbgprintf("Processing %d DNS lookups with ARES\n", pending_dns_count);
gettimeofday(&cutoff, &tz);
cutoff.tv_sec += dnstimeout + 1;
while ((pending_dns_count > 0) && (now.tv_sec < cutoff.tv_sec)) {
FD_ZERO(&read_fds);
FD_ZERO(&write_fds);
nfds = ares_fds(channel, &read_fds, &write_fds);
if (nfds == 0) break;
tv.tv_sec = 10; tv.tv_usec = 0;
tvp = ares_timeout(channel, &tv, &tv);
selres = select(nfds, &read_fds, &write_fds, NULL, tvp);
ares_process(channel, &read_fds, &write_fds);
gettimeofday(&now, &tz);
}
}
void flush_dnsqueue(void)
{
if (!dnscachepreped) prepare_dnscache();
if (stdchannelactive) dns_queue_run(stdchannel);
}
char *dnsresolve(char *hostname)
{
char *result;
if (!dnscachepreped) prepare_dnscache();
if (hostname == NULL) return NULL;
flush_dnsqueue();
dns_stats_lookups++;
result = find_dnscache(hostname);
if (result == NULL) {
errprintf("dnsresolve - internal error, name '%s' not in cache\n", hostname);
return NULL;
}
if (strcmp(result, "0.0.0.0") == 0) return NULL;
return result;
}
int dns_test_server(char *serverip, char *hostname, strbuffer_t *banner)
{
ares_channel channel;
struct ares_options options;
struct in_addr serveraddr;
int status;
struct timeval starttime, endtime;
struct timeval *tspent;
struct timezone tz;
char msg[100];
char *tspec, *tst;
dns_resp_t *responses = NULL;
dns_resp_t *walk = NULL;
int i;
if (!dnscachepreped) prepare_dnscache();
if (inet_aton(serverip, &serveraddr) == 0) {
errprintf("dns_test_server: serverip '%s' not a valid IP\n", serverip);
return 1;
}
options.flags = ARES_FLAG_NOCHECKRESP;
options.servers = &serveraddr;
options.nservers = 1;
status = ares_init_options(&channel, &options, (ARES_OPT_FLAGS | ARES_OPT_SERVERS));
if (status != ARES_SUCCESS) {
errprintf("Could not initialize ares channel: %s\n", ares_strerror(status));
return 1;
}
pending_dns_count = 0;
tspec = strdup(hostname);
gettimeofday(&starttime, &tz);
tst = strtok(tspec, ",");
do {
dns_resp_t *newtest = (dns_resp_t *)malloc(sizeof(dns_resp_t));
char *p, *tlookup;
int atype = T_A;
newtest->msgbuf = newstrbuffer(0);
newtest->next = NULL;
if (responses == NULL) responses = newtest; else walk->next = newtest;
walk = newtest;
p = strchr(tst, ':');
tlookup = (p ? p+1 : tst);
if (p) { *p = '\0'; atype = dns_name_type(tst); *p = ':'; }
dbgprintf("ares_search: tlookup='%s', class=%d, type=%d\n", tlookup, C_IN, atype);
ares_search(channel, tlookup, C_IN, atype, dns_detail_callback, newtest);
pending_dns_count++;
tst = strtok(NULL, ",");
} while (tst);
dns_queue_run(channel);
gettimeofday(&endtime, &tz);
tspent = tvdiff(&starttime, &endtime, NULL);
clearstrbuffer(banner); status = ARES_SUCCESS;
strcpy(tspec, hostname);
tst = strtok(tspec, ",");
for (walk = responses, i=1; (walk); walk = walk->next, i++) {
/* Print an identifying line if more than one query */
if ((walk != responses) || (walk->next)) {
sprintf(msg, "\n*** DNS lookup of '%s' ***\n", tst);
addtobuffer(banner, msg);
}
addtostrbuffer(banner, walk->msgbuf);
if (walk->msgstatus != ARES_SUCCESS) status = walk->msgstatus;
xfree(walk->msgbuf);
tst = strtok(NULL, ",");
}
xfree(tspec);
sprintf(msg, "\nSeconds: %u.%03u\n", (unsigned int)tspent->tv_sec, (unsigned int)tspent->tv_usec/1000);
addtobuffer(banner, msg);
ares_destroy(channel);
pending_dns_count = 0;
return (status != ARES_SUCCESS);
}