/* http_get - fetch the contents of an http URL
**
** Originally based on a simple version by Al Globus <globus@nas.nasa.gov>.
** Debugged and prettified by Jef Poskanzer <jef@mail.acme.com>.  Also includes
** ifdefs to handle https via OpenSSL.
*/

#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <signal.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netdb.h>

#ifdef USE_SSL
#include <openssl/ssl.h>
#include <openssl/err.h>
#endif


/* Forwards. */
static void usage();
static int getURL( char* url, char* referer, char* user_agent, char* auth_token, int ncookies, char** cookies, char* header_name, char* header_value );
static int getURLbyParts( int protocol, char* host, unsigned short port, char* file, char* referer, char* user_agent, char* auth_token, int ncookies, char** cookies, char* header_name, char* header_value );
static int open_client_socket( char* hostname, unsigned short port );
static void show_error( char* cause );
static void sigcatch( int sig );
static int b64_encode( unsigned char* ptr, int len, char* space, int size );


/* Globals. */
static char* argv0;
static int verbose;
static int timeout;
static char* url;

/* Protocol symbols. */
#define PROTO_HTTP 0
#ifdef USE_SSL
#define PROTO_HTTPS 1
#endif

/* Header FSM states. */
#define HDST_LINE1_PROTOCOL 0
#define HDST_LINE1_WHITESPACE 1
#define HDST_LINE1_STATUS 2
#define HDST_BOL 10
#define HDST_TEXT 11
#define HDST_LF 12
#define HDST_CR 13
#define HDST_CRLF 14
#define HDST_CRLFCR 15

#define MAX_COOKIES 20


int
main( int argc, char** argv )
    {
    int argn;
    char* referer;
    char* user_agent;
    char* auth_token;
    int ncookies;
    char* cookies[MAX_COOKIES];
    char* header_name;
    char* header_value;
    int status;

    argv0 = argv[0];
    argn = 1;
    timeout = 60;
    referer = (char*) 0;
    user_agent = "http_get";
    auth_token = (char*) 0;
    ncookies = 0;
    header_name = (char*) 0;
    header_value = (char*) 0;
    verbose = 0;
    while ( argn < argc && argv[argn][0] == '-' && argv[argn][1] != '\0' )
	{
	if ( strcmp( argv[argn], "-t" ) == 0 && argn + 1 < argc )
	    {
	    ++argn;
	    timeout = atoi( argv[argn] );
	    }
	else if ( strcmp( argv[argn], "-r" ) == 0 && argn + 1 < argc )
	    {
	    ++argn;
	    referer = argv[argn];
	    }
	else if ( strcmp( argv[argn], "-u" ) == 0 && argn + 1 < argc )
	    {
	    ++argn;
	    user_agent = argv[argn];
	    }
	else if ( strcmp( argv[argn], "-a" ) == 0 && argn + 1 < argc )
	    {
	    ++argn;
	    auth_token = argv[argn];
	    }
	else if ( strcmp( argv[argn], "-c" ) == 0 && argn + 1 < argc )
	    {
	    if ( ncookies >= MAX_COOKIES )
		{
		(void) fprintf( stderr, "%s: too many cookies\n", argv0 );
		exit( 1 );
		}
	    ++argn;
	    cookies[ncookies++] = argv[argn];
	    }
	else if ( strcmp( argv[argn], "-h" ) == 0 && argn + 2 < argc )
	    {
	    ++argn;
	    header_name = argv[argn];
	    ++argn;
	    header_value = argv[argn];
	    }
	else if ( strcmp( argv[argn], "-v" ) == 0 )
	    verbose = 1;
	else
	    usage();
	++argn;
	}
    if ( argn >= argc )
	usage();
    url = argv[argn];
    ++argn;
    if ( argn != argc )
	usage();

    (void) signal( SIGALRM, sigcatch );
    status = getURL( url, referer, user_agent, auth_token, ncookies, cookies, header_name, header_value );

    if ( status == 200 )
	exit( 0 );
    else if ( status == 0 )
	exit( 1 );
    else
	exit( status );
    }


static void
usage()
    {
    (void) fprintf( stderr, "usage:  %s [-c cookie] [-t timeout] [-r referer] [-u user-agent] [-a username:password] [-h header value] [-v] url\n", argv0 );
    exit( 1 );
    }


/* URL must be of the form http://host-name[:port]/file-name */
static int
getURL( char* url, char* referer, char* user_agent, char* auth_token, int ncookies, char** cookies, char* header_name, char* header_value )
    {
    char* s;
    int protocol;
    char host[2000];
    int host_len;
    unsigned short port;
    char* file = (char*) 0;
    char* http = "http://";
    int http_len = strlen( http );
#ifdef USE_SSL
    char* https = "https://";
    int https_len = strlen( https );
#endif /* USE_SSL */
    int proto_len;

    if ( url == (char*) 0 )
        {
	(void) fprintf( stderr, "%s: null URL\n", argv0 );
        exit( 1 );
        }
    if ( strncmp( http, url, http_len ) == 0 )
	{
	proto_len = http_len;
	protocol = PROTO_HTTP;
	}
#ifdef USE_SSL
    else if ( strncmp( https, url, https_len ) == 0 )
	{
	proto_len = https_len;
	protocol = PROTO_HTTPS;
	}
#endif /* USE_SSL */
    else
        {
	(void) fprintf( stderr, "%s: non-http URL\n", argv0 );
        exit( 1 );
        }

    /* Get the host name. */
    for ( s = url + proto_len; *s != '\0' && *s != ':' && *s != '/'; ++s )
	;
    host_len = s - url;
    host_len -= proto_len;
    strncpy( host, url + proto_len, host_len );
    host[host_len] = '\0';

    /* Get port number. */
    if ( *s == ':' )
	{
	port = (unsigned short) atoi( ++s );
	while ( *s != '\0' && *s != '/' )
	    ++s;
	}
    else
	{
#ifdef USE_SSL
	if ( protocol == PROTO_HTTPS )
	    port = 443;
	else
#endif
	    port = 80;
	}

    /* Get the file name. */
    if ( *s == '\0' )
	file = "/";
    else
	file = s;

    return getURLbyParts( protocol, host, port, file, referer, user_agent, auth_token, ncookies, cookies, header_name, header_value );
    }


static int
getURLbyParts( int protocol, char* host, unsigned short port, char* file, char* referer, char* user_agent, char* auth_token, int ncookies, char** cookies, char* header_name, char* header_value )
    {
    int sockfd;
#ifdef USE_SSL
    SSL_CTX* ssl_ctx = (SSL_CTX*) 0;
    SSL* ssl = (SSL*) 0;
#endif
    char buf[20000];
    int i, bytes, b, header_state, status;

    (void) alarm( timeout );
    sockfd = open_client_socket( host, port );

#ifdef USE_SSL
    if ( protocol == PROTO_HTTPS )
	{
	/* Make SSL connection. */
	int r;
	SSL_load_error_strings();
	SSLeay_add_ssl_algorithms();
	ssl_ctx = SSL_CTX_new( SSLv23_client_method() );
	ssl = SSL_new( ssl_ctx );
	SSL_set_fd( ssl, sockfd );
	r = SSL_connect( ssl );
	if ( r <= 0 )
	    {
	    (void) fprintf(
		stderr, "%s: %s - SSL connection failed - %d\n",
		argv0, url, r );
	    ERR_print_errors_fp( stderr );
	    exit( 1 );
	    }
	}
#endif

    /* Build request buffer, starting with the GET. */
    (void) alarm( timeout );
    bytes = snprintf( buf, sizeof(buf), "GET %s HTTP/1.0\r\n", file );
    /* HTTP/1.1 host header - some servers want it even in HTTP/1.0. */
    bytes += snprintf( &buf[bytes], sizeof(buf) - bytes, "Host: %s\r\n", host );
    if ( referer != (char*) 0 )
	/* Referer. */
	bytes += snprintf( &buf[bytes], sizeof(buf) - bytes, "Referer: %s\r\n", referer );
    /* User-agent. */
    bytes += snprintf( &buf[bytes], sizeof(buf) - bytes, "User-Agent: %s\r\n", user_agent );
    /* Fixed headers. */
    bytes += snprintf( &buf[bytes], sizeof(buf) - bytes, "Accept: */*\r\n" );
    bytes += snprintf( &buf[bytes], sizeof(buf) - bytes, "Accept-Language: en\r\n" );
    bytes += snprintf( &buf[bytes], sizeof(buf) - bytes, "Accept-Charset: iso-8859-1,*,utf-8\r\n" );
    if ( auth_token != (char*) 0 )
	{
	/* Basic Auth info. */
	char token_buf[1000];
	token_buf[b64_encode( (unsigned char*) auth_token, strlen( auth_token ), token_buf, sizeof(token_buf) )] = '\0';
	bytes += snprintf( &buf[bytes], sizeof(buf) - bytes, "Authorization: Basic %s\r\n", token_buf );
	}
    /* Cookies. */
    for ( i = 0; i < ncookies; ++i )
	bytes += snprintf( &buf[bytes], sizeof(buf) - bytes, "Cookie: %s\r\n", cookies[i] );
    /* Optional extra header. */
    if ( header_name != (char*) 0 )
	bytes += snprintf( &buf[bytes], sizeof(buf) - bytes, "%s: %s\r\n", header_name, header_value );
    /* Blank line. */
    bytes += snprintf( &buf[bytes], sizeof(buf) - bytes, "\r\n" );
    /* Now actually send it. */
#ifdef USE_SSL
    if ( protocol == PROTO_HTTPS )
	(void) SSL_write( ssl, buf, bytes );
    else
#endif
	(void) write( sockfd, buf, bytes );

    /* Get lines until a blank one. */
    (void) alarm( timeout );
    header_state = HDST_LINE1_PROTOCOL;
    status = 0;
    for (;;)
	{
#ifdef USE_SSL
	if ( protocol == PROTO_HTTPS )
	    bytes = SSL_read( ssl, buf, sizeof(buf) );
	else
#endif
	    bytes = read( sockfd, buf, sizeof(buf) );
	if ( bytes <= 0 )
	    break;
	for ( b = 0; b < bytes; ++b )
	    {
	    if ( verbose )
		(void) write( 1, &buf[b], 1 );
	    switch ( header_state )
		{
		case HDST_LINE1_PROTOCOL:
		switch ( buf[b] )
		    {
		    case ' ': case '\t':
		    header_state = HDST_LINE1_WHITESPACE; ; break;
		    case '\n': header_state = HDST_LF ; break;
		    case '\r': header_state = HDST_CR; break;
		    }
		break;
		case HDST_LINE1_WHITESPACE:
		switch ( buf[b] )
		    {
		    case '0': case '1': case '2': case '3': case '4':
		    case '5': case '6': case '7': case '8': case '9':
		    status = buf[b] - '0';
		    header_state = HDST_LINE1_STATUS;
		    break;
		    case '\n': header_state = HDST_LF ; break;
		    case '\r': header_state = HDST_CR; break;
		    default: header_state = HDST_TEXT; break;
		    }
		break;
		case HDST_LINE1_STATUS:
		switch ( buf[b] )
		    {
		    case '0': case '1': case '2': case '3': case '4':
		    case '5': case '6': case '7': case '8': case '9':
		    status = status * 10 + buf[b] - '0';
		    break;
		    case '\n': header_state = HDST_LF ; break;
		    case '\r': header_state = HDST_CR; break;
		    default: header_state = HDST_TEXT; break;
		    }
		break;
		case HDST_BOL:
		switch ( buf[b] )
		    {
		    case '\n': header_state = HDST_LF; break;
		    case '\r': header_state = HDST_CR; break;
		    default: header_state = HDST_TEXT; break;
		    }
		break;
		case HDST_TEXT:
		switch ( buf[b] )
		    {
		    case '\n': header_state = HDST_LF; break;
		    case '\r': header_state = HDST_CR; break;
		    }
		break;

		case HDST_LF:
		switch ( buf[b] )
		    {
		    case '\n': goto end_of_headers;
		    case '\r': header_state = HDST_CR; break;
		    default: header_state = HDST_TEXT; break;
		    }
		break;

		case HDST_CR:
		switch ( buf[b] )
		    {
		    case '\n': header_state = HDST_CRLF; break;
		    case '\r': goto end_of_headers;
		    default: header_state = HDST_TEXT; break;
		    }
		break;

		case HDST_CRLF:
		switch ( buf[b] )
		    {
		    case '\n': goto end_of_headers;
		    case '\r': header_state = HDST_CRLFCR; break;
		    default: header_state = HDST_TEXT; break;
		    }
		break;

		case HDST_CRLFCR:
		switch ( buf[b] )
		    {
		    case '\n': case '\r': goto end_of_headers;
		    default: header_state = HDST_TEXT; break;
		    }
		break;
		}
	    }
	}
    end_of_headers:
    /* Dump out the rest of the headers buffer. */
    if ( bytes > 0 )
	{
	++b;
	(void) write( 1, &buf[b], bytes - b );
	}

    /* Copy the data. */
    for (;;)
        {
	(void) alarm( timeout );
#ifdef USE_SSL
	if ( protocol == PROTO_HTTPS )
	    bytes = SSL_read( ssl, buf, sizeof(buf) );
	else
#endif
	    bytes = read( sockfd, buf, sizeof(buf) );
	if ( bytes == 0 )
	    break;
	if ( bytes < 0 )
	    show_error( "read" );
	(void) write( 1, buf, bytes );
        }
#ifdef USE_SSL
    if ( protocol == PROTO_HTTPS )
	{
	SSL_free( ssl );
	SSL_CTX_free( ssl_ctx );
	}
#endif
    (void) close( sockfd );
    return status;
    }


#if defined(AF_INET6) && defined(IN6_IS_ADDR_V4MAPPED)
#define USE_IPV6
#endif

static int
open_client_socket( char* hostname, unsigned short port )
    {
#ifdef USE_IPV6
    struct addrinfo hints;
    char portstr[10];
    int gaierr;
    struct addrinfo* ai;
    struct addrinfo* ai2;
    struct addrinfo* aiv4;
    struct addrinfo* aiv6;
    struct sockaddr_in6 sa;
#else /* USE_IPV6 */
    struct hostent *he;
    struct sockaddr_in sa;
#endif /* USE_IPV6 */
    int sa_len, sock_family, sock_type, sock_protocol;
    int sockfd;

    (void) memset( (void*) &sa, 0, sizeof(sa) );

#ifdef USE_IPV6

    (void) memset( &hints, 0, sizeof(hints) );
    hints.ai_family = PF_UNSPEC;
    hints.ai_socktype = SOCK_STREAM;
    (void) snprintf( portstr, sizeof(portstr), "%d", (int) port );
    if ( (gaierr = getaddrinfo( hostname, portstr, &hints, &ai )) != 0 )
	{
	(void) fprintf(
	    stderr, "%s: getaddrinfo %s - %s\n", argv0, hostname,
	    gai_strerror( gaierr ) );
	exit( 1 );
	}

    /* Find the first IPv4 and IPv6 entries. */
    aiv4 = (struct addrinfo*) 0;
    aiv6 = (struct addrinfo*) 0;
    for ( ai2 = ai; ai2 != (struct addrinfo*) 0; ai2 = ai2->ai_next )
	{
	switch ( ai2->ai_family )
	    {
	    case AF_INET: 
	    if ( aiv4 == (struct addrinfo*) 0 )
		aiv4 = ai2;
	    break;
	    case AF_INET6:
	    if ( aiv6 == (struct addrinfo*) 0 )
		aiv6 = ai2;
	    break;
	    }
	}

    /* If there's an IPv4 address, use that, otherwise try IPv6. */
    if ( aiv4 != (struct addrinfo*) 0 )
	{
	if ( sizeof(sa) < aiv4->ai_addrlen )
	    {
	    (void) fprintf(
		stderr, "%s - sockaddr too small (%lu < %lu)\n",
		hostname, (unsigned long) sizeof(sa),
		(unsigned long) aiv4->ai_addrlen );
	    exit( 1 );
	    }
	sock_family = aiv4->ai_family;
	sock_type = aiv4->ai_socktype;
	sock_protocol = aiv4->ai_protocol;
	sa_len = aiv4->ai_addrlen;
	(void) memmove( &sa, aiv4->ai_addr, sa_len );
	goto ok;
	}
    if ( aiv6 != (struct addrinfo*) 0 )
	{
	if ( sizeof(sa) < aiv6->ai_addrlen )
	    {
	    (void) fprintf(
		stderr, "%s - sockaddr too small (%lu < %lu)\n",
		hostname, (unsigned long) sizeof(sa),
		(unsigned long) aiv6->ai_addrlen );
	    exit( 1 );
	    }
	sock_family = aiv6->ai_family;
	sock_type = aiv6->ai_socktype;
	sock_protocol = aiv6->ai_protocol;
	sa_len = aiv6->ai_addrlen;
	(void) memmove( &sa, aiv6->ai_addr, sa_len );
	goto ok;
	}

    (void) fprintf(
	stderr, "%s: no valid address found for host %s\n", argv0, hostname );
    exit( 1 );

    ok:
    freeaddrinfo( ai );

#else /* USE_IPV6 */

    he = gethostbyname( hostname );
    if ( he == (struct hostent*) 0 )
	{
	(void) fprintf( stderr, "%s: unknown host - %s\n", argv0, hostname );
	exit( 1 );
	}
    sock_family = sa.sin_family = he->h_addrtype;
    sock_type = SOCK_STREAM;
    sock_protocol = 0;
    sa_len = sizeof(sa);
    (void) memmove( &sa.sin_addr, he->h_addr, he->h_length );
    sa.sin_port = htons( port );

#endif /* USE_IPV6 */

    sockfd = socket( sock_family, sock_type, sock_protocol );
    if ( sockfd < 0 )
	show_error( "socket" );

    if ( connect( sockfd, (struct sockaddr*) &sa, sa_len ) < 0 )
	show_error( "connect" );

    return sockfd;
    }


static void
show_error( char* cause )
    {
    char buf[5000];
    (void) sprintf( buf, "%s: %s - %s", argv0, url, cause );
    perror( buf );
    exit( 1 );
    }


static void
sigcatch( int sig )
    {
    (void) fprintf( stderr, "%s: %s - timed out\n", argv0, url );
    exit( 1 );
    }


/* Base-64 encoding.  This encodes binary data as printable ASCII characters.
** Three 8-bit binary bytes are turned into four 6-bit values, like so:
**
**   [11111111]  [22222222]  [33333333]
**
**   [111111] [112222] [222233] [333333]
**
** Then the 6-bit values are represented using the characters "A-Za-z0-9+/".
*/

static char b64_encode_table[64] = {
    'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',  /* 0-7 */
    'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',  /* 8-15 */
    'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',  /* 16-23 */
    'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',  /* 24-31 */
    'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',  /* 32-39 */
    'o', 'p', 'q', 'r', 's', 't', 'u', 'v',  /* 40-47 */
    'w', 'x', 'y', 'z', '0', '1', '2', '3',  /* 48-55 */
    '4', '5', '6', '7', '8', '9', '+', '/'   /* 56-63 */
    };

static int b64_decode_table[256] = {
    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,  /* 00-0F */
    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,  /* 10-1F */
    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,62,-1,-1,-1,63,  /* 20-2F */
    52,53,54,55,56,57,58,59,60,61,-1,-1,-1,-1,-1,-1,  /* 30-3F */
    -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,  /* 40-4F */
    15,16,17,18,19,20,21,22,23,24,25,-1,-1,-1,-1,-1,  /* 50-5F */
    -1,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,  /* 60-6F */
    41,42,43,44,45,46,47,48,49,50,51,-1,-1,-1,-1,-1,  /* 70-7F */
    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,  /* 80-8F */
    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,  /* 90-9F */
    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,  /* A0-AF */
    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,  /* B0-BF */
    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,  /* C0-CF */
    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,  /* D0-DF */
    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,  /* E0-EF */
    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1   /* F0-FF */
    };

/* Do base-64 encoding on a hunk of bytes.   Return the actual number of
** bytes generated.  Base-64 encoding takes up 4/3 the space of the original,
** plus a bit for end-padding.  3/2+5 gives a safe margin.
*/
static int
b64_encode( unsigned char* ptr, int len, char* space, int size )
    {
    int ptr_idx, space_idx, phase;
    char c;

    space_idx = 0;
    phase = 0;
    for ( ptr_idx = 0; ptr_idx < len; ++ptr_idx )
	{
	switch ( phase )
	    {
	    case 0:
	    c = b64_encode_table[ptr[ptr_idx] >> 2];
	    if ( space_idx < size )
		space[space_idx++] = c;
	    c = b64_encode_table[( ptr[ptr_idx] & 0x3 ) << 4];
	    if ( space_idx < size )
		space[space_idx++] = c;
	    ++phase;
	    break;
	    case 1:
	    space[space_idx - 1] =
	      b64_encode_table[
		b64_decode_table[(int) ((unsigned char) space[space_idx - 1])] |
		( ptr[ptr_idx] >> 4 ) ];
	    c = b64_encode_table[( ptr[ptr_idx] & 0xf ) << 2];
	    if ( space_idx < size )
		space[space_idx++] = c;
	    ++phase;
	    break;
	    case 2:
	    space[space_idx - 1] =
	      b64_encode_table[
		b64_decode_table[(int) ((unsigned char) space[space_idx - 1])] |
		( ptr[ptr_idx] >> 6 ) ];
	    c = b64_encode_table[ptr[ptr_idx] & 0x3f];
	    if ( space_idx < size )
		space[space_idx++] = c;
	    phase = 0;
	    break;
	    }
	}
    /* Pad with ='s. */
    while ( phase++ < 3 )
	if ( space_idx < size )
	    space[space_idx++] = '=';
    return space_idx;
    }
