/* googrep.c - search text using a Google-style pattern
**
** Copyright  2007 by Jef Poskanzer <jef@mail.acme.com>.
** All rights reserved.
**
** Redistribution and use in source and binary forms, with or without
** modification, are permitted provided that the following conditions
** are met:
** 1. Redistributions of source code must retain the above copyright
**    notice, this list of conditions and the following disclaimer.
** 2. Redistributions in binary form must reproduce the above copyright
**    notice, this list of conditions and the following disclaimer in the
**    documentation and/or other materials provided with the distribution.
**
** THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
** ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
** IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
** ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
** FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
** DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
** OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
** HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
** LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
** OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
** SUCH DAMAGE.
**
** For commentary on this license please see http://www.acme.com/license.html
*/


#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <regex.h>
#include <string.h>


static char* argv0;

static int flag_count, flag_with_filename, flag_no_filename, flag_dont_ignore_case, flag_files_without_matches, flag_files_with_matches, flag_line_number, flag_quiet, flag_no_messages, flag_invert_match;
static int show_filenames;
static int success, count, line_number;


static void googrep( char* filename, FILE* fp, regex_t* terms, int nterms );
static void show_match( char* filename, char* line );
static void escape( char* src, char* dst );


int
main( int argc, char* argv[] )
    {
    int argn, i;
    char c;
    char* pattern;
    char* cp;
    char* escaped;
    char* term;
    int nterms;
    int reg_flags;
    regex_t* terms;
    char* usage = "%s [-cHhILlnqsv] pattern [file ...]\n";

    argv0 = argv[0];

    /* Default flags. */
    flag_count = 0;
    flag_with_filename = 0;
    flag_no_filename = 0;
    flag_dont_ignore_case = 0;
    flag_files_without_matches = 0;
    flag_files_with_matches = 0;
    flag_line_number = 0;
    flag_quiet = 0;
    flag_no_messages = 0;
    flag_invert_match = 0;

    /* Parse the flags. */
    argn = 1;
    while ( argn < argc && argv[argn][0] == '-' && argv[argn][1] != '\0' )
	{
	for ( i = 1; ( c = argv[argn][i] ) != '\0'; ++i )
	    {
	    switch ( c )
		{
		case 'c': flag_count = 1; break;
		case 'H': flag_with_filename = 1; break;
		case 'h': flag_no_filename = 1; break;
		case 'I': flag_dont_ignore_case = 1; break;
		case 'L': flag_files_without_matches = 1; break;
		case 'l': flag_files_with_matches = 1; break;
		case 'n': flag_line_number = 1; break;
		case 'q': flag_quiet = 1; break;
		case 's': flag_no_messages = 1; break;
		case 'v': flag_invert_match = 1; break;
		default:
		(void) fprintf( stderr, usage, argv0 );
		exit( 2 );
		}
	    }
	++argn;
	}

    /* Parse the pattern. */
    if ( argn == argc )
	{
	(void) fprintf( stderr, usage, argv0 );
	exit( 2 );
	}
    pattern = argv[argn];
    /* Allocate space for the regexes. */
    terms = (regex_t*) malloc( sizeof(regex_t) * strlen( pattern ) / 2 );
    if ( terms == (regex_t*) 0 )
	{
	(void) fprintf( stderr, "%s: out of memory\n", argv0 );
	exit( 2 );
	}
    /* Allocate a string to build the escaped terms. */
    escaped = (char*) malloc( strlen( pattern ) * 2 );
    if ( escaped == (char*) 0 )
	{
	(void) fprintf( stderr, "%s: out of memory\n", argv0 );
	exit( 2 );
	}
    /* Allocate a string to build the augmented terms. */
    term = (char*) malloc( strlen( pattern ) + 40 );
    if ( term == (char*) 0 )
	{
	(void) fprintf( stderr, "%s: out of memory\n", argv0 );
	exit( 2 );
	}
    /* Regex flags. */
    reg_flags = REG_EXTENDED|REG_NOSUB;
    if ( ! flag_dont_ignore_case )
	reg_flags |= REG_ICASE;
    nterms = 0;
    for (;;)
	{
	/* Find the term. */
	if ( *pattern == '"' )
	    {
	    ++pattern;
	    cp = strchr( pattern, '"' );
	    }
	else if ( *pattern == '\'' )
	    {
	    ++pattern;
	    cp = strchr( pattern, '\'' );
	    }
	else
	    cp = strchr( pattern, ' ' );
	if ( cp != (char*) 0 )
	    {
	    *cp = '\0';
	    ++cp;
	    }
	/* Build the augmented term. */
	(void) strcpy( term, "(^|[^[:alnum:]_])(" );
	(void) escape( pattern, escaped );
	(void) strcat( term, escaped );
	(void) strcat( term, ")([^[:alnum:]_]|$)" );
	/* And compile it. */
	if ( regcomp( &terms[nterms], term, reg_flags ) != 0 )
	    {
	    (void) fprintf( stderr, "%s: invalid regular expression - %s\n", argv0, pattern );
	    exit( 2 );
	    }
	++nterms;
	if ( cp == (char*) 0 )
	    break;
	while ( *cp == ' ' )
	    ++cp;
	if ( *cp == '\0' )
	    break;
	pattern = cp;
	}
    ++argn;

    /* Go through the files. */
    success = 0;
    if ( argn == argc )
	{
	show_filenames = 0;
	googrep( "stdin", stdin, terms, nterms );
	}
    else
	{
	show_filenames = ( argc - argn > 1 );
	while ( argn < argc )
	    {
	    if ( strcmp( argv[argn], "-" ) == 0 )
		googrep( "stdin", stdin, terms, nterms );
	    else
		{
		FILE* fp = fopen( argv[argn], "r" );
		if ( fp == (FILE*) 0 )
		    {
		    if ( ! flag_no_messages )
			perror( argv[argn] );
		    }
		else
		    {
		    googrep( argv[argn], fp, terms, nterms );
		    (void) fclose( fp );
		    }
		}
	    ++argn;
	    }
	}

    if ( success )
	exit( 0 );
    else
	exit( 1 );
    }


static void
googrep( char* filename, FILE* fp, regex_t* terms, int nterms )
    {
    char line[10000];
    size_t l;
    int i;

    count = 0;
    line_number = 0;

    while ( fgets( line, sizeof(line), fp ) != (char*) 0 )
	{
	++line_number;
	/* Trim off any trailing newline cause regexec doesn't like it. */
	l = strlen( line );
	while ( l > 0 && line[l-1] == '\n' )
	    line[--l] = '\0';

	/* Run all the search terms. */
	for ( i = 0; i < nterms; ++i )
	    {
	    if ( regexec( &terms[i], line, 0, (regmatch_t*) 0, 0 ) != 0 )
		{
		/* A term failed, so no match on this line. */
		if ( flag_invert_match )
		    show_match( filename, line );
		goto next_line;
		}
	    }

	/* All terms match - win! */
	if ( ! flag_invert_match )
	    show_match( filename, line );

	next_line: ;
	}

    if ( flag_count )
	{
	if ( ( show_filenames || flag_with_filename ) && ! flag_no_filename )
	    (void) printf( "%s:", filename );
	(void) printf( "%d\n", count );
	}
    if ( flag_files_without_matches && count == 0 )
	(void) printf( "%s\n", filename );
    if ( flag_files_with_matches && count > 0 )
	(void) printf( "%s\n", filename );
    }


static void
show_match( char* filename, char* line )
    {
    success = 1;
    ++count;
    if ( ! ( flag_quiet || flag_count || flag_files_without_matches || flag_files_with_matches ) )
	{
	if ( ( show_filenames || flag_with_filename ) &&
	     ! flag_no_filename )
	    (void) printf( "%s:", filename );
	if ( flag_line_number )
	    (void) printf( "%d:", line_number );
	(void) printf( "%s\n", line );
	}
    }


static void
escape( char* src, char* dst )
    {
    while ( *src != '\0' )
	{
	if ( strchr( "|{}()[].*+?^$", *src ) != (char*) 0 )
	    *dst++ = '\\';
	*dst++ = *src++;
	}
    *dst = '\0';
    }
