/*
 * httpscan - send HEAD / HTTP/1.0 to servers on stdin, print the results
 *            to stdout.  uses POSIX threads to efficiently query hosts 
 *            in parallel.  Scans a full class B network in about an hour.
 *
 * timeouts are handled with a list & pthread_cancel().  threadid's and 
 * time(NULL) are inserted into a linked list; the list is scanned 
 * periodically & any thread running longer than o_thread_timeout is canceled.
 * a pthread cleanup handler closes the open socket associated w/ that thread
 * & removes it from the linked list.
 *
 * (someone tell me if this is a stupid way to do this :-)
 *
 * the connect() itself is non-blocking (Stevens r0x) because (at least under 
 * Linux 2.2 & 2.4) a connect() running in a thread can't be canceled if it's 
 * in a state other than ESTABLISHED (e.g., SYN_SENT). This makes things 
 * painfully slow when working against an empty network.
 *
 * this program has 2 purposes:  
 *   1) be able to scan networks behind !@# transparent http 
 *      caches/accelerators that accept a connection for the entire network.  
 *      If you nmap it, the whole network will be flagged as supporting HTTP, 
 *      but where are the *real* web servers?)
 *   2) teach my lazy ass how to do threads (..and linked lists..  a fine
 *      reminder that I've forgotten-- if I ever really knew-- how to do 
 *      Real Pogramming.)
 *
 * compile with [g]cc [-Wall -pedantic] -o httpscan httpscan.c -lpthread
 *
 * 10 Jan 2001  jwa@jammed.com   GPL'd: http://www.gnu.org/copyleft/gpl.html
 *
 * $Id: httpscan.c,v 1.3 2001/05/09 05:31:27 jwa Exp $
 *
 */

static const char version[] = "$Id: httpscan.c,v 1.3 2001/05/09 05:31:27 jwa Exp $";

// includes

#include <stdio.h>
#include <stddef.h>
#include <stdlib.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/time.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netdb.h>
#include <arpa/inet.h>
#include <pthread.h>
#include <errno.h>
#include <string.h>
#include <signal.h>
#include <unistd.h>
#include <stdarg.h>

// the list

#define	HOSTLEN		1024
typedef struct
{
	pthread_t	id;			// thread ID
	time_t		start;			// when the thread started
	int		socket;			// socket fd for shutdown/close
	char 		host[HOSTLEN];		// hostname
	struct list 	*next;
} list;

// protos

int		main(int argc, char *argv[]);
int		getinfo(char *host, int port);
void 		extract_info (char *buf, char *version, size_t sz);
static void 	*getinfo_thread (void *);
void		usage(void);
struct in_addr	*atoaddr(char *address);
int		open_tcp(char *host, int port);
int 		connect_nonb(int sockfd, const struct sockaddr *saptr, socklen_t salen, int nsec);
void		cleanup_thread(void *);
void		status(char *fmt, ...);
void 		debug(int code, char *type, char *fmt, ...);

// list mgmt

void		free_id (pthread_t id);
list		*walk_and_locate (pthread_t id);
void		walk_and_kill(int expire);
void		insert(pthread_t id);
void		walk_and_print(void);

// global counters

int c_running;		// threads running
int c_hits;		// servers found
int c_errors;		// network/socket errors
int c_cancels;		// threads canceled
int c_fucked;		// threads that we tried to cancel, but couldn't
int c_frees;		// list entries freed
int c_opens;		// socket opens
int c_shutdowns;	// socket closes
int c_hosts;		// hosts scanned

// global opts

int o_debug;		// debug level (see below)
int o_thread_timeout;	// obvious
int o_max_threads;	// obvious
int o_show_errors;	// print some socket-related errors
int o_verbose;		// print a running status line.  argv0 is just as good.

// debug crap

#define	SOCKET	1, "SOCKET"	// debug level 1; socket junk
#define THREAD	2, "THREAD"	// level 2; fun with threads
#define LIST	3, "LIST"	// level 3; dinner with lists

// muten

pthread_mutex_t list_mutex = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_t runcount_mutex = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_t errorcount_mutex = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_t successcount_mutex = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_t free_list_mutex = PTHREAD_MUTEX_INITIALIZER;

// the list

list *thread_l;
list *thread_l_head;

// in lieu of a setproctitle() ..

char *argv0ptr;

// here we go

int main(int argc, char *argv[]) {
	pthread_t	threadid;
	char		host[HOSTLEN];
	char		*hp;
	int		c;
	time_t		t_start, t_end;	// elapsed

	// init junk

	argv0ptr = *argv;

	c_running = c_hosts = c_errors = c_hits = c_hosts = c_fucked = 0;

	o_show_errors = o_debug;
	o_max_threads = 5;	// default # of threads to run
	o_thread_timeout = 7;	// seconds

	while ((c = getopt(argc, argv, "c:t:d:ev")) != EOF) {
		switch (c) {
			case 'c':
				o_max_threads = atoi(optarg);
				fprintf(stderr, "o_max_threads set to %d\n",
					o_max_threads);
				break;
			case 't':
				o_thread_timeout = atoi(optarg);
				fprintf(stderr, "o_thread timeout set to %d\n",
					o_thread_timeout);
				break;
			case 'd':
				o_debug = atoi(optarg);
				break;
			case 'e':
				o_show_errors = 1;
				break;
			case 'v':
				o_verbose = 1;
				break;
			default:
				usage();
				exit(-1);
		}		
	}

	// init list

	thread_l = calloc(1, sizeof(list));
	thread_l_head = thread_l;

	// main read loop


	t_start = time(NULL);

	while (fgets(host, sizeof(host)-1, stdin) != NULL) {
		c_hosts++;
		host[strlen(host)-1] = 0;
		while (c_running >= o_max_threads) {
			// idle loop
			status("running", c_running, o_max_threads);
			walk_and_kill(o_thread_timeout);
			sleep(1);
		}	
		// at some point, arg will be a struct that has
		// both hostname & port.  KISS for now.
		hp = malloc(strlen(host)+1);
		if (hp == NULL) {
			perror("malloc");
			exit(-1);
		}

		strcpy(hp, host);	// malloc'd w/ strlen+1

        	pthread_mutex_lock(&runcount_mutex);
		c_running++;
        	pthread_mutex_unlock(&runcount_mutex);

		if (pthread_create(&threadid, NULL, &getinfo_thread, hp)) {
			perror("pthread_create");
			exit(-1);
		}

	}
	// need to wait for all pending threads to finish
	while (c_running != 0) {
		status("waiting for remaining threads");
		walk_and_kill(o_thread_timeout);
		sleep(1);
	}
	t_end = time(NULL);

	printf("%d hosts scanned in %ld seconds; %d hits; %d socket errors; %d canceled threads\n",
		c_hosts, t_end - t_start, c_hits, c_errors, c_cancels);

	if (c_fucked)
		printf("fucked: %d\n", c_fucked);

	exit(0);
}

static void * getinfo_thread (void *arg) {
	char host[HOSTLEN];
	pthread_t tid;

	strncpy(host, arg, sizeof(host)-1);
	free (arg);

	debug(THREAD, "started");
	tid = pthread_self();

	pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
	pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);

	// install cleanup handler.  this will get called
	// whenever the thread terminates
	pthread_cleanup_push(cleanup_thread, (void *) &tid);

	// "daemonize"
	pthread_detach(pthread_self());

	// push threadid into the list of active threads
	insert(pthread_self());

	if (getinfo(host, 80) != 0) {
		pthread_mutex_lock(&errorcount_mutex);
		c_errors++;
		pthread_mutex_unlock(&errorcount_mutex);
	}

	if (o_verbose)
		fprintf(stderr, "[%ld] hosts=%d  fucked=%d running=%d "
		                "errors=%d  frees=%d  cancels=%d\n", 
				pthread_self(), c_hosts, c_fucked, c_running, 
				c_errors, c_frees, c_cancels);


	debug(THREAD, "exiting");
	pthread_exit(NULL);
	// not reached

	debug(THREAD, "cleanup_pop");
	pthread_cleanup_pop(0); // make the compiler happy
	exit(0);
}

void cleanup_thread (void *arg) {
	pthread_t *tid = (pthread_t *) arg;

	debug(THREAD, "cleanup_thread(%ld) called", *tid);

	free_id(*tid);

        pthread_mutex_lock(&runcount_mutex);
	c_running--;
        pthread_mutex_unlock(&runcount_mutex);
	debug(THREAD, "cleanup_thread(%ld) exiting", *tid);
	return;
}


int getinfo(char *host, int port) {
	int 	s;
	char 	sockbuf[1024];
	size_t	sz;
	char	version[1024];
	list	*p;

	s = open_tcp(host, port);
	if (s < 0)
		return -1;

	c_opens++;

	// update p->socket with our socket
        pthread_mutex_lock(&list_mutex);
	p = walk_and_locate(pthread_self());
	if (p) {
		p->socket = s;
		strncpy(p->host, host, HOSTLEN-1);
	} else {
		fprintf(stderr, "fatal: can't locate myself?!!\n");
		exit (-1);
	}
        pthread_mutex_unlock(&list_mutex);

	debug(SOCKET, "HEADing %s", host);

	if (write(s, "HEAD / HTTP/1.0\r\n\r\n", 19) != 19) {
		debug(SOCKET, "write failed (%s)", strerror(errno));
		snprintf(version, sizeof(version)-1, "[%s]", strerror(errno));
	} else {
		debug(SOCKET, "reading");
		sz = read(s, sockbuf, sizeof(sockbuf));
		debug(SOCKET, "read got %d", sz);
		if (sz < 0) {
        		pthread_mutex_lock(&errorcount_mutex);
			c_errors++;
        		pthread_mutex_unlock(&errorcount_mutex);
			snprintf(version, sizeof(version)-1, "socket foobar: %s", strerror(errno));
		} else if (sz == 0) {
			// not a very chatty server..
			strncpy(version, "no data", sizeof(version)-1);
		} else {
			extract_info(sockbuf, version, sizeof(version));
        		pthread_mutex_lock(&successcount_mutex);
			c_hits++;
        		pthread_mutex_unlock(&successcount_mutex);
		}	
	}
	fprintf(stdout, "%-15s %s\n", host, version); fflush(stdout);
	// socket cleanup is handled by pthread_cleanup_push
	return 0;	
}


// extract Server: string from buf

void extract_info (char *buf, char *version, size_t sz) {
	char *p;
	char response[128] = { 0 };
	char server[128] = { 0 };

	p = strtok(buf, "\n");
	if (p) {
		p[strlen(p)-1] = 0;
		strncpy(response, p, sizeof(response)-1);
		if (strlen(response) == 0) { // dad nabbit
			snprintf(version, sizeof(version)-1, "no data .. deceptagon");
			return;
		}	
	}
	while (p) {
		p[strlen(p)-1] = 0;	// trunc trailing \r
		if (!strncasecmp(p, "Server:", 7)) {
			strncpy(server, p+7, sizeof(server)-1);
			if (server[0] == ' ') {
				char *s;
				s = server+1;
				strncpy(server, s, sizeof(server)-1);
			}
		}	
		p = strtok(NULL, "\n");
	}
	snprintf(version, sz-1, "[%s] - [%s]", response, server);
}


// open a socket

int open_tcp(char *host, int port) {
        struct in_addr		*addr;
        int 			sock,connected;
        struct sockaddr_in 	address;

        addr = atoaddr(host);

	debug(SOCKET, "opening %s:%d", host, port);

        if (addr == NULL) {
		if (o_show_errors)
			fprintf(stderr, "%s: bad hostname\n", host);
		return -1;
	}

        port = htons(port);

        memset((char *) &address, 0, sizeof(address));
        address.sin_family = AF_INET;
        address.sin_port = port;
        address.sin_addr.s_addr = addr->s_addr;

        sock = socket(AF_INET, SOCK_STREAM, 0);
	if (sock == -1) {
		fprintf(stderr, "socket: %s (%d hosts scanned, %d active sessions .. try lowering # of threads)\n",
			strerror(errno), c_hosts, c_running);
		exit(-1);
	}

        connected = connect_nonb(sock, (struct sockaddr *) &address, sizeof(address), o_thread_timeout);
        //connected = connect(sock, (struct sockaddr *) &address, sizeof(address));
	if (connected != 0) {
		debug(SOCKET, "connect() failed: %s", strerror(errno));
		if (o_show_errors)		// only if we care
			perror("connect");
		return -1;
	}
	debug(SOCKET, "got socket %d", sock);
        return sock;
}


// a non-blocking connect.  pthread_cancel() can't cancel a connect()
// that is in the SYN_SENT phase (linux 2.2, 2.4), so we do a non-blocking
// connect.  urp-ah-ga-ga.

int connect_nonb (int sockfd, const struct sockaddr *saptr, socklen_t salen, int nsec) {
	int			flags, n, error;
	socklen_t		len;
	fd_set			rset, wset;
	struct timeval		tval;

	flags = fcntl(sockfd, F_GETFL, 0);
	fcntl(sockfd, F_SETFL, flags | O_NONBLOCK);

	debug(SOCKET, "non-blocking connect start");
	error = 0;
	if ( (n = connect(sockfd, (struct sockaddr *) saptr, salen)) < 0)
		if (errno != EINPROGRESS)
			return(-1);

	debug(SOCKET, "non-blocking connect return");
	/* Do whatever we want while the connect is taking place. */

	if (n == 0)
		goto done;	/* connect completed immediately */

	FD_ZERO(&rset);
	FD_SET(sockfd, &rset);
	wset = rset;
	tval.tv_sec = nsec;
	tval.tv_usec = 0;

	debug(SOCKET, "select() start");
	if ( (n = select(sockfd+1, &rset, &wset, NULL,
					 nsec ? &tval : NULL)) == 0) {
		debug(SOCKET, "select() timeout");
		close(sockfd);		/* timeout */
		errno = ETIMEDOUT;
		return(-1);
	} 
	debug(SOCKET, "select() with I/O (?)");

	if (FD_ISSET(sockfd, &rset) || FD_ISSET(sockfd, &wset)) {
		len = sizeof(error);
		if (getsockopt(sockfd, SOL_SOCKET, SO_ERROR, &error, &len) < 0)
			return(-1);	/* Solaris pending error */
	} else {
		fprintf(stderr, "select error: sockfd not set");
		exit(-1);
	}	

done:
	fcntl(sockfd, F_SETFL, flags);	/* restore file status flags */

	if (error) {
		close(sockfd);		/* just in case */
		errno = error;
		return(-1);
	}
	return(0);
}

// hostname -> struct in_addr magic

struct in_addr *atoaddr(char *address) {
        struct hostent *host;
        static struct in_addr saddr;

	/* First try it as aaa.bbb.ccc.ddd. */

        saddr.s_addr = inet_addr(address);
        if (saddr.s_addr != -1)
                return &saddr;

        host = gethostbyname(address);

        if (host != NULL)
                return (struct in_addr *) *host->h_addr_list;

	return NULL;
}

void usage(void) {
	printf("usage: httpscan [-c thread count] [-t timeout]\n"
	       "       [-d debuglevel (1-3)] [-e show net errors] [-v verbose]\n"
	       "feed a hostlist to stdin.\n");
	printf("%s\n", version);       
	exit(1);
}


// list management stuff

void insert(pthread_t id) {
	list *p;

	// walk list looking for a freed entry (0 in id field)
	// if we can't find one, default to thread_l->next


        pthread_mutex_lock(&list_mutex);

	p = walk_and_locate(0);
	if ((p) && (p->next)) {
		debug(LIST, "inserting %ld into free", id);
		p->id = id;
		p->start = time(NULL);
		p->socket = -1;
	} else {
		debug(LIST, "inserting %ld into new", id);
		thread_l->id = id;
		thread_l->start = time(NULL);
		thread_l->socket = -1;
		thread_l->next = calloc(1, sizeof(list));
		thread_l = (list *) thread_l->next;
		
	}
        pthread_mutex_unlock(&list_mutex);
	debug(LIST, "end of insert");
}


// walk thread_l looking for threads that have been running past
// their prime & kill 'em.

void walk_and_kill(int expire) {
	list *p;
	int runtime;
	int n;
	int fucked_state = 0;

        pthread_mutex_lock(&list_mutex);

	debug(THREAD, "walk_and_kill(%d)", expire);

	p = thread_l_head;
	while (p) {
		if (p->id) {
			runtime = time (NULL) - p->start;
			debug(THREAD, "looking at thread %ld (runtime %d)", p->id, runtime);
			if (runtime > expire) {
				debug(THREAD, "canceling thread %ld after %d seconds", p->id, runtime);
				n = pthread_cancel(p->id);

				if (n != 0) {
					if (n == ESRCH) {
						// it may have exited already
						// see BLAH, fuck, etc. below.
						debug(THREAD, "couldn't find thread id %ld to cancel!", p->id);
						exit(-1);
					}	
					else {
						fprintf(stderr, "unknown pthread_cancel(%ld) error: %d\n", p->id, n);
						exit(-1);
					}	
				}		

				// BLAH
				// connect() can get stuck in SYN_SENT
				// & pthread_cancel() won't interrupt it. ?!@#
				// sweet limpin' christ...
/*
				n = pthread_cancel(p->id);
				if (n != ESRCH) {
					debug(THREAD, "yeek; thread %ld still running?!", p->id);
					fucked_state = 1;
					c_fucked++;
				} else if (n != 0) {
					perror("pthread_cancel");
					exit(-1);
				} else {
					debug(THREAD, "%ld successfully canceled", p->id);
					c_cancels++;
				}	
*/				
			}
		}
		p = (list *) p->next;
	}

        if (pthread_mutex_unlock(&list_mutex) != 0)
		perror("pthread_mutex_unlock");

	if (fucked_state) {
		// mother fucker can't be join'd or cancel'd
		// hang out until someone times out
		while (c_running > o_max_threads) {
			status("fucked_state");
			debug(THREAD, "fucked; waiting for (%d > %d)", c_running, o_max_threads);
			sleep(1);
		}
		debug(THREAD, "fucking hooray; a thread self-terminated; returning");
	}	
}

// walk thread_l, locate id, return ptr to position in list
// assumes thread_l is under mutex lock

list *walk_and_locate (pthread_t id) {
	list *p;

	debug(LIST, "walk_and_locate(%ld)", id);

	p = thread_l_head;
	while (p != NULL) {
		if (p->id == id) {
			return p;
		}	
		p = (list *) p->next;
	}
	return NULL;

}

// release a thread id from the list

void free_id (pthread_t id) {
	list *p;

	// we must be atomic

	debug(LIST, "free_id(%ld) waiting for mutex", id);
        pthread_mutex_lock(&list_mutex);

	debug(LIST, "freeing id %ld", id);

	if (id == 0) {
		fprintf(stderr, "free_id(0)?? huh?!\n");
		exit(0);
	}

	p = walk_and_locate(id);
	if (p) {
		debug(SOCKET, "shutting down");
		shutdown(p->socket, SHUT_RDWR);
		close(p->socket);
		c_shutdowns++;
		debug(SOCKET, "free_id(%ld) closed %s %d", id, p->host, p->socket);
		p->id = 0;
		p->start = 0;
		p->socket = 0;
		p->host[0] = 0;
	} else {
		// this may be non-fatal.  it's possible that the thread
		// was canceled and the id zeroed out before free_id() was 
		// called..
		// but if I cancel the thread, then this should never get
		// called..?!
		fprintf(stderr, "free_id(%ld) couldn't locate id!?\n", id);
		exit(-1);
	}

	c_frees++;
        pthread_mutex_unlock(&list_mutex);
}


// dump the list (for debugging)

void walk_and_print(void) {
	list *p;

	debug(LIST, "walk_and_print()");

	p = thread_l_head;
	while (p != NULL) {
		printf("id: %ld  start: %ld  next: %p\n",
			p->id, p->start, p->next);

		p = (list *) p->next;
	}
}

// a 600 line program should not NEED a debug() 
// but mine do, because I suck.

void debug(int code, char *type, char *fmt, ...) {
	va_list ap;
	char buf[1024];

	if (code > o_debug)
		return;

	va_start(ap, fmt);
	vsnprintf(buf, sizeof(buf)-1, fmt, ap);
	va_end(ap);

	fprintf(stderr, "[%ld] %s : %s\n", pthread_self(), type, buf);
}


// make argv0 purty

void status(char *fmt, ...) {
	va_list ap;
	char buf[1024];

	va_start(ap, fmt);
	vsnprintf(buf, sizeof(buf)-1, fmt, ap);
	va_end(ap);

	sprintf(argv0ptr, "httpscan: %s (scanned=%d fucked=%d active=%d errors=%d hits=%d)",
		buf, c_hosts, c_fucked, c_running, c_errors, c_hits);
}	
