pcregrep.cpp File Reference

#include <ctype.h>
#include <locale.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <pcre.h>

Go to the source code of this file.

Classes

struct  option_item

Defines

#define FALSE   0
#define TRUE   1
#define MAX_PATTERN_COUNT   100
#define MBUFTHIRD   8192
#define PO_WORD_MATCH   0x0001
#define PO_LINE_MATCH   0x0002
#define PO_FIXED_STRINGS   0x0004
#define N_COLOUR   (-1)
#define N_EXCLUDE   (-2)
#define N_HELP   (-3)
#define N_INCLUDE   (-4)
#define N_LABEL   (-5)
#define N_LOCALE   (-6)
#define N_NULL   (-7)

Typedefs

typedef int BOOL
typedef void directory_type

Enumerations

enum  {
  FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY,
  FN_FORCE
}
enum  { dee_READ, dee_SKIP, dee_RECURSE }
enum  { DEE_READ, DEE_SKIP }
enum  {
  EL_LF, EL_CR, EL_CRLF, EL_ANY,
  EL_ANYCRLF
}
enum  {
  OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER,
  OP_OP_NUMBER, OP_PATLIST
}

Functions

int isdirectory (char *filename)
directory_typeopendirectory (char *filename)
char * readdirectory (directory_type *dir)
void closedirectory (directory_type *dir)
int isregfile (char *filename)
static BOOL is_stdout_tty (void)
char * strerror (int n)
static char * end_of_line (char *p, char *endptr, int *lenptr)
static char * previous_line (char *p, char *startptr)
static void do_after_lines (int lastmatchnumber, char *lastmatchrestart, char *endptr, char *printname)
static int pcregrep (FILE *in, char *printname)
static int grep_or_recurse (char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
static int usage (int rc)
static void help (void)
static int handle_option (int letter, int options)
static char * ordin (int n)
static BOOL compile_single_pattern (char *pattern, int options, char *filename, int count)
static BOOL compile_pattern (char *pattern, int options, char *filename, int count)
int main (int argc, char **argv)

Variables

static int endlinetype
static char * colour_string = (char *)"1;31"
static char * colour_option = NULL
static char * dee_option = NULL
static char * DEE_option = NULL
static char * newline = NULL
static char * pattern_filename = NULL
static char * stdin_name = (char *)"(standard input)"
static char * locale = NULL
static const unsigned char * pcretables = NULL
static int pattern_count = 0
static pcre ** pattern_list = NULL
static pcre_extra ** hints_list = NULL
static char * include_pattern = NULL
static char * exclude_pattern = NULL
static pcreinclude_compiled = NULL
static pcreexclude_compiled = NULL
static int after_context = 0
static int before_context = 0
static int both_context = 0
static int dee_action = dee_READ
static int DEE_action = DEE_READ
static int error_count = 0
static int filenames = FN_DEFAULT
static int process_options = 0
static BOOL count_only = FALSE
static BOOL do_colour = FALSE
static BOOL hyphenpending = FALSE
static BOOL invert = FALSE
static BOOL multiline = FALSE
static BOOL number = FALSE
static BOOL only_matching = FALSE
static BOOL quiet = FALSE
static BOOL silent = FALSE
static BOOL utf8 = FALSE
static option_item optionlist []
static const char * prefix []
static const char * suffix []
const int utf8_table3 [] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01}
const char utf8_table4 []
int sys_nerr
char * sys_errlist []


Define Documentation

#define FALSE   0
 

Definition at line 60 of file pcregrep.cpp.

#define MAX_PATTERN_COUNT   100
 

Definition at line 65 of file pcregrep.cpp.

Referenced by compile_single_pattern(), and main().

#define MBUFTHIRD   8192
 

Definition at line 70 of file pcregrep.cpp.

Referenced by compile_pattern(), compile_single_pattern(), and pcregrep().

#define N_COLOUR   (-1)
 

Definition at line 170 of file pcregrep.cpp.

#define N_EXCLUDE   (-2)
 

Definition at line 171 of file pcregrep.cpp.

#define N_HELP   (-3)
 

Definition at line 172 of file pcregrep.cpp.

Referenced by handle_option().

#define N_INCLUDE   (-4)
 

Definition at line 173 of file pcregrep.cpp.

#define N_LABEL   (-5)
 

Definition at line 174 of file pcregrep.cpp.

#define N_LOCALE   (-6)
 

Definition at line 175 of file pcregrep.cpp.

#define N_NULL   (-7)
 

Definition at line 176 of file pcregrep.cpp.

#define PO_FIXED_STRINGS   0x0004
 

Definition at line 88 of file pcregrep.cpp.

Referenced by compile_pattern(), and handle_option().

#define PO_LINE_MATCH   0x0002
 

Definition at line 87 of file pcregrep.cpp.

Referenced by handle_option().

#define PO_WORD_MATCH   0x0001
 

Definition at line 86 of file pcregrep.cpp.

Referenced by handle_option().

#define TRUE   1
 

Definition at line 61 of file pcregrep.cpp.


Typedef Documentation

typedef int BOOL
 

Definition at line 63 of file pcregrep.cpp.

typedef void directory_type
 

Definition at line 441 of file pcregrep.cpp.


Enumeration Type Documentation

anonymous enum
 

Enumerator:
FN_NONE 
FN_DEFAULT 
FN_ONLY 
FN_NOMATCH_ONLY 
FN_FORCE 

Definition at line 77 of file pcregrep.cpp.

anonymous enum
 

Enumerator:
dee_READ 
dee_SKIP 
dee_RECURSE 

Definition at line 81 of file pcregrep.cpp.

anonymous enum
 

Enumerator:
DEE_READ 
DEE_SKIP 

Definition at line 82 of file pcregrep.cpp.

00082 { DEE_READ, DEE_SKIP };

anonymous enum
 

Enumerator:
EL_LF 
EL_CR 
EL_CRLF 
EL_ANY 
EL_ANYCRLF 

Definition at line 92 of file pcregrep.cpp.

anonymous enum
 

Enumerator:
OP_NODATA 
OP_STRING 
OP_OP_STRING 
OP_NUMBER 
OP_OP_NUMBER 
OP_PATLIST 

Definition at line 156 of file pcregrep.cpp.


Function Documentation

void closedirectory directory_type dir  ) 
 

Definition at line 446 of file pcregrep.cpp.

Referenced by grep_or_recurse().

00446 {}

static BOOL compile_pattern char *  pattern,
int  options,
char *  filename,
int  count
[static]
 

Definition at line 1540 of file pcregrep.cpp.

References compile_single_pattern(), end_of_line(), FALSE, MBUFTHIRD, PO_FIXED_STRINGS, and process_options.

01541 {
01542 if ((process_options & PO_FIXED_STRINGS) != 0)
01543   {
01544   char *eop = pattern + strlen(pattern);
01545   char buffer[MBUFTHIRD];
01546   for(;;)
01547     {
01548     int ellength;
01549     char *p = end_of_line(pattern, eop, &ellength);
01550     if (ellength == 0)
01551       return compile_single_pattern(pattern, options, filename, count);
01552     sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
01553     pattern = p;
01554     if (!compile_single_pattern(buffer, options, filename, count))
01555       return FALSE;
01556     }
01557   }
01558 else return compile_single_pattern(pattern, options, filename, count);
01559 }

static BOOL compile_single_pattern char *  pattern,
int  options,
char *  filename,
int  count
[static]
 

Definition at line 1473 of file pcregrep.cpp.

References FALSE, MAX_PATTERN_COUNT, MBUFTHIRD, NULL, ordin(), pattern_count, pattern_list, pcre_compile(), pcretables, prefix, process_options, suffix, and TRUE.

Referenced by compile_pattern().

01474 {
01475 char buffer[MBUFTHIRD + 16];
01476 const char *error;
01477 int errptr;
01478 
01479 if (pattern_count >= MAX_PATTERN_COUNT)
01480   {
01481   fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
01482     (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
01483   return FALSE;
01484   }
01485 
01486 sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
01487   suffix[process_options]);
01488 pattern_list[pattern_count] =
01489   pcre_compile(buffer, options, &error, &errptr, pcretables);
01490 if (pattern_list[pattern_count] != NULL)
01491   {
01492   pattern_count++;
01493   return TRUE;
01494   }
01495 
01496 /* Handle compile errors */
01497 
01498 errptr -= (int)strlen(prefix[process_options]);
01499 if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
01500 
01501 if (filename == NULL)
01502   {
01503   if (count == 0)
01504     fprintf(stderr, "pcregrep: Error in command-line regex "
01505       "at offset %d: %s\n", errptr, error);
01506   else
01507     fprintf(stderr, "pcregrep: Error in %s command-line regex "
01508       "at offset %d: %s\n", ordin(count), errptr, error);
01509   }
01510 else
01511   {
01512   fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
01513     "at offset %d: %s\n", count, filename, errptr, error);
01514   }
01515 
01516 return FALSE;
01517 }

static void do_after_lines int  lastmatchnumber,
char *  lastmatchrestart,
char *  endptr,
char *  printname
[static]
 

Definition at line 773 of file pcregrep.cpp.

References after_context, end_of_line(), hyphenpending, NULL, number, pp, and TRUE.

00775 {
00776 if (after_context > 0 && lastmatchnumber > 0)
00777   {
00778   int count = 0;
00779   while (lastmatchrestart < endptr && count++ < after_context)
00780     {
00781     int ellength;
00782     char *pp = lastmatchrestart;
00783     if (printname != NULL) fprintf(stdout, "%s-", printname);
00784     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
00785     pp = end_of_line(pp, endptr, &ellength);
00786     fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
00787     lastmatchrestart = pp;
00788     }
00789   hyphenpending = TRUE;
00790   }
00791 }

static char* end_of_line char *  p,
char *  endptr,
int *  lenptr
[static]
 

Definition at line 507 of file pcregrep.cpp.

References EL_CR, EL_CRLF, EL_LF, and endlinetype.

Referenced by compile_pattern(), do_after_lines(), and pcregrep().

00508 {
00509 switch(endlinetype)
00510   {
00511   default:      /* Just in case */
00512   case EL_LF:
00513   while (p < endptr && *p != '\n') p++;
00514   if (p < endptr)
00515     {
00516     *lenptr = 1;
00517     return p + 1;
00518     }
00519   *lenptr = 0;
00520   return endptr;
00521 
00522   case EL_CR:
00523   while (p < endptr && *p != '\r') p++;
00524   if (p < endptr)
00525     {
00526     *lenptr = 1;
00527     return p + 1;
00528     }
00529   *lenptr = 0;
00530   return endptr;
00531 
00532   case EL_CRLF:
00533   for (;;)
00534     {
00535     while (p < endptr && *p != '\r') p++;
00536     if (++p >= endptr)
00537       {
00538       *lenptr = 0;
00539       return endptr;
00540       }
00541     if (*p == '\n')
00542       {
00543       *lenptr = 2;
00544       return p + 1;
00545       }
00546     }
00547   break;
00548 
00549   case EL_ANYCRLF:
00550   while (p < endptr)
00551     {
00552     int extra = 0;
00553     register int c = *((unsigned char *)p);
00554 
00555     if (utf8 && c >= 0xc0)
00556       {
00557       int gcii, gcss;
00558       extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
00559       gcss = 6*extra;
00560       c = (c & utf8_table3[extra]) << gcss;
00561       for (gcii = 1; gcii <= extra; gcii++)
00562         {
00563         gcss -= 6;
00564         c |= (p[gcii] & 0x3f) << gcss;
00565         }
00566       }
00567 
00568     p += 1 + extra;
00569 
00570     switch (c)
00571       {
00572       case 0x0a:    /* LF */
00573       *lenptr = 1;
00574       return p;
00575 
00576       case 0x0d:    /* CR */
00577       if (p < endptr && *p == 0x0a)
00578         {
00579         *lenptr = 2;
00580         p++;
00581         }
00582       else *lenptr = 1;
00583       return p;
00584 
00585       default:
00586       break;
00587       }
00588     }   /* End of loop for ANYCRLF case */
00589 
00590   *lenptr = 0;  /* Must have hit the end */
00591   return endptr;
00592 
00593   case EL_ANY:
00594   while (p < endptr)
00595     {
00596     int extra = 0;
00597     register int c = *((unsigned char *)p);
00598 
00599     if (utf8 && c >= 0xc0)
00600       {
00601       int gcii, gcss;
00602       extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
00603       gcss = 6*extra;
00604       c = (c & utf8_table3[extra]) << gcss;
00605       for (gcii = 1; gcii <= extra; gcii++)
00606         {
00607         gcss -= 6;
00608         c |= (p[gcii] & 0x3f) << gcss;
00609         }
00610       }
00611 
00612     p += 1 + extra;
00613 
00614     switch (c)
00615       {
00616       case 0x0a:    /* LF */
00617       case 0x0b:    /* VT */
00618       case 0x0c:    /* FF */
00619       *lenptr = 1;
00620       return p;
00621 
00622       case 0x0d:    /* CR */
00623       if (p < endptr && *p == 0x0a)
00624         {
00625         *lenptr = 2;
00626         p++;
00627         }
00628       else *lenptr = 1;
00629       return p;
00630 
00631       case 0x85:    /* NEL */
00632       *lenptr = utf8? 2 : 1;
00633       return p;
00634 
00635       case 0x2028:  /* LS */
00636       case 0x2029:  /* PS */
00637       *lenptr = 3;
00638       return p;
00639 
00640       default:
00641       break;
00642       }
00643     }   /* End of loop for ANY case */
00644 
00645   *lenptr = 0;  /* Must have hit the end */
00646   return endptr;
00647   }     /* End of overall switch */
00648 }

static int grep_or_recurse char *  pathname,
BOOL  dir_recurse,
BOOL  only_one_at_top
[static]
 

Definition at line 1234 of file pcregrep.cpp.

References closedirectory(), DEE_action, dee_action, dee_RECURSE, DEE_SKIP, dee_SKIP, runtests::dir, exclude_compiled, FALSE, filenames, FN_DEFAULT, include_compiled, isdirectory(), isregfile(), NULL, opendirectory(), pcre_exec(), pcregrep(), readdirectory(), silent, stdin_name, and strerror().

01235 {
01236 int rc = 1;
01237 int sep;
01238 FILE *in;
01239 
01240 /* If the file name is "-" we scan stdin */
01241 
01242 if (strcmp(pathname, "-") == 0)
01243   {
01244   return pcregrep(stdin,
01245     (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
01246       stdin_name : NULL);
01247   }
01248 
01249 
01250 /* If the file is a directory, skip if skipping or if we are recursing, scan
01251 each file within it, subject to any include or exclude patterns that were set.
01252 The scanning code is localized so it can be made system-specific. */
01253 
01254 if ((sep = isdirectory(pathname)) != 0)
01255   {
01256   if (dee_action == dee_SKIP) return 1;
01257   if (dee_action == dee_RECURSE)
01258     {
01259     char buffer[1024];
01260     char *nextfile;
01261     directory_type *dir = opendirectory(pathname);
01262 
01263     if (dir == NULL)
01264       {
01265       if (!silent)
01266         fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
01267           strerror(errno));
01268       return 2;
01269       }
01270 
01271     while ((nextfile = readdirectory(dir)) != NULL)
01272       {
01273       int frc, blen;
01274       sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
01275       blen = strlen(buffer);
01276 
01277       if (exclude_compiled != NULL &&
01278           pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)
01279         continue;
01280 
01281       if (include_compiled != NULL &&
01282           pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)
01283         continue;
01284 
01285       frc = grep_or_recurse(buffer, dir_recurse, FALSE);
01286       if (frc > 1) rc = frc;
01287        else if (frc == 0 && rc == 1) rc = 0;
01288       }
01289 
01290     closedirectory(dir);
01291     return rc;
01292     }
01293   }
01294 
01295 /* If the file is not a directory and not a regular file, skip it if that's
01296 been requested. */
01297 
01298 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
01299 
01300 /* Control reaches here if we have a regular file, or if we have a directory
01301 and recursion or skipping was not requested, or if we have anything else and
01302 skipping was not requested. The scan proceeds. If this is the first and only
01303 argument at top level, we don't show the file name, unless we are only showing
01304 the file name, or the filename was forced (-H). */
01305 
01306 in = fopen(pathname, "r");
01307 if (in == NULL)
01308   {
01309   if (!silent)
01310     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
01311       strerror(errno));
01312   return 2;
01313   }
01314 
01315 rc = pcregrep(in, (filenames > FN_DEFAULT ||
01316   (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
01317 
01318 fclose(in);
01319 return rc;
01320 }

static int handle_option int  letter,
int  options
[static]
 

Definition at line 1390 of file pcregrep.cpp.

References count_only, dee_action, dee_RECURSE, filenames, FN_FORCE, FN_NOMATCH_ONLY, FN_NONE, FN_ONLY, help(), invert, multiline, N_HELP, number, only_matching, PCRE_CASELESS, PCRE_FIRSTLINE, PCRE_MULTILINE, PCRE_UTF8, pcre_version(), PO_FIXED_STRINGS, PO_LINE_MATCH, PO_WORD_MATCH, process_options, quiet, silent, TRUE, usage(), and utf8.

01391 {
01392 switch(letter)
01393   {
01394   case N_HELP: help(); exit(0);
01395   case 'c': count_only = TRUE; break;
01396   case 'F': process_options |= PO_FIXED_STRINGS; break;
01397   case 'H': filenames = FN_FORCE; break;
01398   case 'h': filenames = FN_NONE; break;
01399   case 'i': options |= PCRE_CASELESS; break;
01400   case 'l': filenames = FN_ONLY; break;
01401   case 'L': filenames = FN_NOMATCH_ONLY; break;
01402   case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
01403   case 'n': number = TRUE; break;
01404   case 'o': only_matching = TRUE; break;
01405   case 'q': quiet = TRUE; break;
01406   case 'r': dee_action = dee_RECURSE; break;
01407   case 's': silent = TRUE; break;
01408   case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
01409   case 'v': invert = TRUE; break;
01410   case 'w': process_options |= PO_WORD_MATCH; break;
01411   case 'x': process_options |= PO_LINE_MATCH; break;
01412 
01413   case 'V':
01414   fprintf(stderr, "pcregrep version %s\n", pcre_version());
01415   exit(0);
01416   break;
01417 
01418   default:
01419   fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
01420   exit(usage(2));
01421   }
01422 
01423 return options;
01424 }

static void help void   )  [static]
 

Definition at line 1351 of file pcregrep.cpp.

References option_item::help_text, option_item::long_name, option_item::one_char, op, and optionlist.

Referenced by handle_option().

01352 {
01353 option_item *op;
01354 
01355 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
01356 printf("Search for PATTERN in each FILE or standard input.\n");
01357 printf("PATTERN must be present if neither -e nor -f is used.\n");
01358 printf("\"-\" can be used as a file name to mean STDIN.\n\n");
01359 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
01360 
01361 printf("Options:\n");
01362 
01363 for (op = optionlist; op->one_char != 0; op++)
01364   {
01365   int n;
01366   char s[4];
01367   if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");
01368   printf("  %s --%s%n", s, op->long_name, &n);
01369   n = 30 - n;
01370   if (n < 1) n = 1;
01371   printf("%.*s%s\n", n, "                    ", op->help_text);
01372   }
01373 
01374 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
01375 printf("trailing white space is removed and blank lines are ignored.\n");
01376 printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
01377 
01378 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
01379 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
01380 }

static BOOL is_stdout_tty void   )  [static]
 

Definition at line 459 of file pcregrep.cpp.

References FALSE.

00460 {
00461 return FALSE;
00462 }

int isdirectory char *  filename  ) 
 

Definition at line 443 of file pcregrep.cpp.

Referenced by grep_or_recurse().

00443 { return 0; }

int isregfile char *  filename  ) 
 

Definition at line 453 of file pcregrep.cpp.

Referenced by grep_or_recurse().

00453 { return 1; }

int main int  argc,
char **  argv
 

Definition at line 1570 of file pcregrep.cpp.

References FALSE, util::threadpool::i, MAX_PATTERN_COUNT, newline, NULL, op, pattern_count, pattern_filename, pcre_config(), PCRE_CONFIG_NEWLINE, and usage().

01571 {
01572 int i, j;
01573 int rc = 1;
01574 int pcre_options = 0;
01575 int cmd_pattern_count = 0;
01576 int hint_count = 0;
01577 int errptr;
01578 BOOL only_one_at_top;
01579 char *patterns[MAX_PATTERN_COUNT];
01580 const char *locale_from = "--locale";
01581 const char *error;
01582 
01583 /* Set the default line ending value from the default in the PCRE library;
01584 "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
01585 */
01586 
01587 (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
01588 switch(i)
01589   {
01590   default:                 newline = (char *)"lf"; break;
01591   case '\r':               newline = (char *)"cr"; break;
01592   case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
01593   case -1:                 newline = (char *)"any"; break;
01594   case -2:                 newline = (char *)"anycrlf"; break;
01595   }
01596 
01597 /* Process the options */
01598 
01599 for (i = 1; i < argc; i++)
01600   {
01601   option_item *op = NULL;
01602   char *option_data = (char *)"";    /* default to keep compiler happy */
01603   BOOL longop;
01604   BOOL longopwasequals = FALSE;
01605 
01606   if (argv[i][0] != '-') break;
01607 
01608   /* If we hit an argument that is just "-", it may be a reference to STDIN,
01609   but only if we have previously had -e or -f to define the patterns. */
01610 
01611   if (argv[i][1] == 0)
01612     {
01613     if (pattern_filename != NULL || pattern_count > 0) break;
01614       else exit(usage(2));
01615     }
01616 
01617   /* Handle a long name option, or -- to terminate the options */
01618 
01619   if (argv[i][1] == '-')
01620     {
01621     char *arg = argv[i] + 2;
01622     char *argequals = strchr(arg, '=');
01623 
01624     if (*arg == 0)    /* -- terminates options */
01625       {
01626       i++;
01627       break;                /* out of the options-handling loop */
01628       }
01629 
01630     longop = TRUE;
01631 
01632     /* Some long options have data that follows after =, for example file=name.
01633     Some options have variations in the long name spelling: specifically, we
01634     allow "regexp" because GNU grep allows it, though I personally go along
01635     with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
01636     These options are entered in the table as "regex(p)". No option is in both
01637     these categories, fortunately. */
01638 
01639     for (op = optionlist; op->one_char != 0; op++)
01640       {
01641       char *opbra = strchr(op->long_name, '(');
01642       char *equals = strchr(op->long_name, '=');
01643       if (opbra == NULL)     /* Not a (p) case */
01644         {
01645         if (equals == NULL)  /* Not thing=data case */
01646           {
01647           if (strcmp(arg, op->long_name) == 0) break;
01648           }
01649         else                 /* Special case xxx=data */
01650           {
01651           int oplen = equals - op->long_name;
01652           int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
01653           if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
01654             {
01655             option_data = arg + arglen;
01656             if (*option_data == '=')
01657               {
01658               option_data++;
01659               longopwasequals = TRUE;
01660               }
01661             break;
01662             }
01663           }
01664         }
01665       else                   /* Special case xxxx(p) */
01666         {
01667         char buff1[24];
01668         char buff2[24];
01669         int baselen = opbra - op->long_name;
01670         sprintf(buff1, "%.*s", baselen, op->long_name);
01671         sprintf(buff2, "%s%.*s", buff1,
01672           (int)strlen(op->long_name) - baselen - 2, opbra + 1);
01673         if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
01674           break;
01675         }
01676       }
01677 
01678     if (op->one_char == 0)
01679       {
01680       fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
01681       exit(usage(2));
01682       }
01683     }
01684 
01685 
01686   /* Jeffrey Friedl's debugging harness uses these additional options which
01687   are not in the right form for putting in the option table because they use
01688   only one hyphen, yet are more than one character long. By putting them
01689   separately here, they will not get displayed as part of the help() output,
01690   but I don't think Jeffrey will care about that. */
01691 
01692 #ifdef JFRIEDL_DEBUG
01693   else if (strcmp(argv[i], "-pre") == 0) {
01694           jfriedl_prefix = argv[++i];
01695           continue;
01696   } else if (strcmp(argv[i], "-post") == 0) {
01697           jfriedl_postfix = argv[++i];
01698           continue;
01699   } else if (strcmp(argv[i], "-XT") == 0) {
01700           sscanf(argv[++i], "%d", &jfriedl_XT);
01701           continue;
01702   } else if (strcmp(argv[i], "-XR") == 0) {
01703           sscanf(argv[++i], "%d", &jfriedl_XR);
01704           continue;
01705   }
01706 #endif
01707 
01708 
01709   /* One-char options; many that have no data may be in a single argument; we
01710   continue till we hit the last one or one that needs data. */
01711 
01712   else
01713     {
01714     char *s = argv[i] + 1;
01715     longop = FALSE;
01716     while (*s != 0)
01717       {
01718       for (op = optionlist; op->one_char != 0; op++)
01719         { if (*s == op->one_char) break; }
01720       if (op->one_char == 0)
01721         {
01722         fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
01723           *s, argv[i]);
01724         exit(usage(2));
01725         }
01726       if (op->type != OP_NODATA || s[1] == 0)
01727         {
01728         option_data = s+1;
01729         break;
01730         }
01731       pcre_options = handle_option(*s++, pcre_options);
01732       }
01733     }
01734 
01735   /* At this point we should have op pointing to a matched option. If the type
01736   is NO_DATA, it means that there is no data, and the option might set
01737   something in the PCRE options. */
01738 
01739   if (op->type == OP_NODATA)
01740     {
01741     pcre_options = handle_option(op->one_char, pcre_options);
01742     continue;
01743     }
01744 
01745   /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
01746   either has a value or defaults to something. It cannot have data in a
01747   separate item. At the moment, the only such options are "colo(u)r" and
01748   Jeffrey Friedl's special -S debugging option. */
01749 
01750   if (*option_data == 0 &&
01751       (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
01752     {
01753     switch (op->one_char)
01754       {
01755       case N_COLOUR:
01756       colour_option = (char *)"auto";
01757       break;
01758 #ifdef JFRIEDL_DEBUG
01759       case 'S':
01760       S_arg = 0;
01761       break;
01762 #endif
01763       }
01764     continue;
01765     }
01766 
01767   /* Otherwise, find the data string for the option. */
01768 
01769   if (*option_data == 0)
01770     {
01771     if (i >= argc - 1 || longopwasequals)
01772       {
01773       fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
01774       exit(usage(2));
01775       }
01776     option_data = argv[++i];
01777     }
01778 
01779   /* If the option type is OP_PATLIST, it's the -e option, which can be called
01780   multiple times to create a list of patterns. */
01781 
01782   if (op->type == OP_PATLIST)
01783     {
01784     if (cmd_pattern_count >= MAX_PATTERN_COUNT)
01785       {
01786       fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
01787         MAX_PATTERN_COUNT);
01788       return 2;
01789       }
01790     patterns[cmd_pattern_count++] = option_data;
01791     }
01792 
01793   /* Otherwise, deal with single string or numeric data values. */
01794 
01795   else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
01796     {
01797     *((char **)op->dataptr) = option_data;
01798     }
01799   else
01800     {
01801     char *endptr;
01802     int n = strtoul(option_data, &endptr, 10);
01803     if (*endptr != 0)
01804       {
01805       if (longop)
01806         {
01807         char *equals = strchr(op->long_name, '=');
01808         int nlen = (equals == NULL)? (int)strlen(op->long_name) :
01809           equals - op->long_name;
01810         fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
01811           option_data, nlen, op->long_name);
01812         }
01813       else
01814         fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
01815           option_data, op->one_char);
01816       exit(usage(2));
01817       }
01818     *((int *)op->dataptr) = n;
01819     }
01820   }
01821 
01822 /* Options have been decoded. If -C was used, its value is used as a default
01823 for -A and -B. */
01824 
01825 if (both_context > 0)
01826   {
01827   if (after_context == 0) after_context = both_context;
01828   if (before_context == 0) before_context = both_context;
01829   }
01830 
01831 /* If a locale has not been provided as an option, see if the LC_CTYPE or
01832 LC_ALL environment variable is set, and if so, use it. */
01833 
01834 if (locale == NULL)
01835   {
01836   locale = getenv("LC_ALL");
01837   locale_from = "LCC_ALL";
01838   }
01839 
01840 if (locale == NULL)
01841   {
01842   locale = getenv("LC_CTYPE");
01843   locale_from = "LC_CTYPE";
01844   }
01845 
01846 /* If a locale has been provided, set it, and generate the tables the PCRE
01847 needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
01848 
01849 if (locale != NULL)
01850   {
01851   if (setlocale(LC_CTYPE, locale) == NULL)
01852     {
01853     fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
01854       locale, locale_from);
01855     return 2;
01856     }
01857   pcretables = pcre_maketables();
01858   }
01859 
01860 /* Sort out colouring */
01861 
01862 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
01863   {
01864   if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
01865   else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
01866   else
01867     {
01868     fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
01869       colour_option);
01870     return 2;
01871     }
01872   if (do_colour)
01873     {
01874     char *cs = getenv("PCREGREP_COLOUR");
01875     if (cs == NULL) cs = getenv("PCREGREP_COLOR");
01876     if (cs != NULL) colour_string = cs;
01877     }
01878   }
01879 
01880 /* Interpret the newline type; the default settings are Unix-like. */
01881 
01882 if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
01883   {
01884   pcre_options |= PCRE_NEWLINE_CR;
01885   endlinetype = EL_CR;
01886   }
01887 else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
01888   {
01889   pcre_options |= PCRE_NEWLINE_LF;
01890   endlinetype = EL_LF;
01891   }
01892 else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
01893   {
01894   pcre_options |= PCRE_NEWLINE_CRLF;
01895   endlinetype = EL_CRLF;
01896   }
01897 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
01898   {
01899   pcre_options |= PCRE_NEWLINE_ANY;
01900   endlinetype = EL_ANY;
01901   }
01902 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
01903   {
01904   pcre_options |= PCRE_NEWLINE_ANYCRLF;
01905   endlinetype = EL_ANYCRLF;
01906   }
01907 else
01908   {
01909   fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
01910   return 2;
01911   }
01912 
01913 /* Interpret the text values for -d and -D */
01914 
01915 if (dee_option != NULL)
01916   {
01917   if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
01918   else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
01919   else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
01920   else
01921     {
01922     fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
01923     return 2;
01924     }
01925   }
01926 
01927 if (DEE_option != NULL)
01928   {
01929   if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
01930   else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
01931   else
01932     {
01933     fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
01934     return 2;
01935     }
01936   }
01937 
01938 /* Check the values for Jeffrey Friedl's debugging options. */
01939 
01940 #ifdef JFRIEDL_DEBUG
01941 if (S_arg > 9)
01942   {
01943   fprintf(stderr, "pcregrep: bad value for -S option\n");
01944   return 2;
01945   }
01946 if (jfriedl_XT != 0 || jfriedl_XR != 0)
01947   {
01948   if (jfriedl_XT == 0) jfriedl_XT = 1;
01949   if (jfriedl_XR == 0) jfriedl_XR = 1;
01950   }
01951 #endif
01952 
01953 /* Get memory to store the pattern and hints lists. */
01954 
01955 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
01956 hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
01957 
01958 if (pattern_list == NULL || hints_list == NULL)
01959   {
01960   fprintf(stderr, "pcregrep: malloc failed\n");
01961   goto EXIT2;
01962   }
01963 
01964 /* If no patterns were provided by -e, and there is no file provided by -f,
01965 the first argument is the one and only pattern, and it must exist. */
01966 
01967 if (cmd_pattern_count == 0 && pattern_filename == NULL)
01968   {
01969   if (i >= argc) return usage(2);
01970   patterns[cmd_pattern_count++] = argv[i++];
01971   }
01972 
01973 /* Compile the patterns that were provided on the command line, either by
01974 multiple uses of -e or as a single unkeyed pattern. */
01975 
01976 for (j = 0; j < cmd_pattern_count; j++)
01977   {
01978   if (!compile_pattern(patterns[j], pcre_options, NULL,
01979        (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
01980     goto EXIT2;
01981   }
01982 
01983 /* Compile the regular expressions that are provided in a file. */
01984 
01985 if (pattern_filename != NULL)
01986   {
01987   int linenumber = 0;
01988   FILE *f;
01989   char *filename;
01990   char buffer[MBUFTHIRD];
01991 
01992   if (strcmp(pattern_filename, "-") == 0)
01993     {
01994     f = stdin;
01995     filename = stdin_name;
01996     }
01997   else
01998     {
01999     f = fopen(pattern_filename, "r");
02000     if (f == NULL)
02001       {
02002       fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
02003         strerror(errno));
02004       goto EXIT2;
02005       }
02006     filename = pattern_filename;
02007     }
02008 
02009   while (fgets(buffer, MBUFTHIRD, f) != NULL)
02010     {
02011     char *s = buffer + (int)strlen(buffer);
02012     while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
02013     *s = 0;
02014     linenumber++;
02015     if (buffer[0] == 0) continue;   /* Skip blank lines */
02016     if (!compile_pattern(buffer, pcre_options, filename, linenumber))
02017       goto EXIT2;
02018     }
02019 
02020   if (f != stdin) fclose(f);
02021   }
02022 
02023 /* Study the regular expressions, as we will be running them many times */
02024 
02025 for (j = 0; j < pattern_count; j++)
02026   {
02027   hints_list[j] = pcre_study(pattern_list[j], 0, &error);
02028   if (error != NULL)
02029     {
02030     char s[16];
02031     if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
02032     fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
02033     goto EXIT2;
02034     }
02035   hint_count++;
02036   }
02037 
02038 /* If there are include or exclude patterns, compile them. */
02039 
02040 if (exclude_pattern != NULL)
02041   {
02042   exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
02043     pcretables);
02044   if (exclude_compiled == NULL)
02045     {
02046     fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
02047       errptr, error);
02048     goto EXIT2;
02049     }
02050   }
02051 
02052 if (include_pattern != NULL)
02053   {
02054   include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
02055     pcretables);
02056   if (include_compiled == NULL)
02057     {
02058     fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
02059       errptr, error);
02060     goto EXIT2;
02061     }
02062   }
02063 
02064 /* If there are no further arguments, do the business on stdin and exit. */
02065 
02066 if (i >= argc)
02067   {
02068   rc = pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);
02069   goto EXIT;
02070   }
02071 
02072 /* Otherwise, work through the remaining arguments as files or directories.
02073 Pass in the fact that there is only one argument at top level - this suppresses
02074 the file name if the argument is not a directory and filenames are not
02075 otherwise forced. */
02076 
02077 only_one_at_top = i == argc - 1;   /* Catch initial value of i */
02078 
02079 for (; i < argc; i++)
02080   {
02081   int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
02082     only_one_at_top);
02083   if (frc > 1) rc = frc;
02084     else if (frc == 0 && rc == 1) rc = 0;
02085   }
02086 
02087 EXIT:
02088 if (pattern_list != NULL)
02089   {
02090   for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
02091   free(pattern_list);
02092   }
02093 if (hints_list != NULL)
02094   {
02095   for (i = 0; i < hint_count; i++) free(hints_list[i]);
02096   free(hints_list);
02097   }
02098 return rc;
02099 
02100 EXIT2:
02101 rc = 2;
02102 goto EXIT;
02103 }

directory_type* opendirectory char *  filename  ) 
 

Definition at line 444 of file pcregrep.cpp.

Referenced by grep_or_recurse().

00444 { return (directory_type*)0;}

static char* ordin int  n  )  [static]
 

Definition at line 1436 of file pcregrep.cpp.

Referenced by compile_single_pattern().

01437 {
01438 static char buffer[8];
01439 char *p = buffer;
01440 sprintf(p, "%d", n);
01441 while (*p != 0) p++;
01442 switch (n%10)
01443   {
01444   case 1: strcpy(p, "st"); break;
01445   case 2: strcpy(p, "nd"); break;
01446   case 3: strcpy(p, "rd"); break;
01447   default: strcpy(p, "th"); break;
01448   }
01449 return buffer;
01450 }

static int pcregrep FILE *  in,
char *  printname
[static]
 

Definition at line 817 of file pcregrep.cpp.

References buffer, end_of_line(), runtests::end_time, FALSE, hints_list, util::threadpool::i, length, match(), MBUFTHIRD, multiline, NULL, pattern_count, pattern_list, pcre_exec(), runtests::start_time, and TRUE.

Referenced by grep_or_recurse().

00818 {
00819 int rc = 1;
00820 int linenumber = 1;
00821 int lastmatchnumber = 0;
00822 int count = 0;
00823 int offsets[99];
00824 char *lastmatchrestart = NULL;
00825 char buffer[3*MBUFTHIRD];
00826 char *ptr = buffer;
00827 char *endptr;
00828 size_t bufflength;
00829 BOOL endhyphenpending = FALSE;
00830 
00831 /* Do the first read into the start of the buffer and set up the pointer to
00832 end of what we have. */
00833 
00834 bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
00835 endptr = buffer + bufflength;
00836 
00837 /* Loop while the current pointer is not at the end of the file. For large
00838 files, endptr will be at the end of the buffer when we are in the middle of the
00839 file, but ptr will never get there, because as soon as it gets over 2/3 of the
00840 way, the buffer is shifted left and re-filled. */
00841 
00842 while (ptr < endptr)
00843   {
00844   int i, endlinelength;
00845   int mrc = 0;
00846   BOOL match = FALSE;
00847   char *t = ptr;
00848   size_t length, linelength;
00849 
00850   /* At this point, ptr is at the start of a line. We need to find the length
00851   of the subject string to pass to pcre_exec(). In multiline mode, it is the
00852   length remainder of the data in the buffer. Otherwise, it is the length of
00853   the next line. After matching, we always advance by the length of the next
00854   line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
00855   that any match is constrained to be in the first line. */
00856 
00857   t = end_of_line(t, endptr, &endlinelength);
00858   linelength = t - ptr - endlinelength;
00859   length = multiline? (size_t)(endptr - ptr) : linelength;
00860 
00861   /* Extra processing for Jeffrey Friedl's debugging. */
00862 
00863 #ifdef JFRIEDL_DEBUG
00864   if (jfriedl_XT || jfriedl_XR)
00865   {
00866       #include <sys/time.h>
00867       #include <time.h>
00868       struct timeval start_time, end_time;
00869       struct timezone dummy;
00870 
00871       if (jfriedl_XT)
00872       {
00873           unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
00874           const char *orig = ptr;
00875           ptr = malloc(newlen + 1);
00876           if (!ptr) {
00877                   printf("out of memory");
00878                   exit(2);
00879           }
00880           endptr = ptr;
00881           strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
00882           for (i = 0; i < jfriedl_XT; i++) {
00883                   strncpy(endptr, orig,  length);
00884                   endptr += length;
00885           }
00886           strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
00887           length = newlen;
00888       }
00889 
00890       if (gettimeofday(&start_time, &dummy) != 0)
00891               perror("bad gettimeofday");
00892 
00893 
00894       for (i = 0; i < jfriedl_XR; i++)
00895           match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);
00896 
00897       if (gettimeofday(&end_time, &dummy) != 0)
00898               perror("bad gettimeofday");
00899 
00900       double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
00901                       -
00902                       (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
00903 
00904       printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
00905       return 0;
00906   }
00907 #endif
00908 
00909 
00910   /* Run through all the patterns until one matches. Note that we don't include
00911   the final newline in the subject string. */
00912 
00913   for (i = 0; i < pattern_count; i++)
00914     {
00915     mrc = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,
00916       offsets, 99);
00917     if (mrc >= 0) { match = TRUE; break; }
00918     if (mrc != PCRE_ERROR_NOMATCH)
00919       {
00920       fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
00921       if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
00922       fprintf(stderr, "this line:\n");
00923       fwrite(ptr, 1, linelength, stderr);   /* In case binary zero included */
00924       fprintf(stderr, "\n");
00925       if (error_count == 0 &&
00926           (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
00927         {
00928         fprintf(stderr, "pcregrep: error %d means that a resource limit "
00929           "was exceeded\n", mrc);
00930         fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
00931         }
00932       if (error_count++ > 20)
00933         {
00934         fprintf(stderr, "pcregrep: too many errors - abandoned\n");
00935         exit(2);
00936         }
00937       match = invert;    /* No more matching; don't show the line again */
00938       break;
00939       }
00940     }
00941 
00942   /* If it's a match or a not-match (as required), do what's wanted. */
00943 
00944   if (match != invert)
00945     {
00946     BOOL hyphenprinted = FALSE;
00947 
00948     /* We've failed if we want a file that doesn't have any matches. */
00949 
00950     if (filenames == FN_NOMATCH_ONLY) return 1;
00951 
00952     /* Just count if just counting is wanted. */
00953 
00954     if (count_only) count++;
00955 
00956     /* If all we want is a file name, there is no need to scan any more lines
00957     in the file. */
00958 
00959     else if (filenames == FN_ONLY)
00960       {
00961       fprintf(stdout, "%s\n", printname);
00962       return 0;
00963       }
00964 
00965     /* Likewise, if all we want is a yes/no answer. */
00966 
00967     else if (quiet) return 0;
00968 
00969     /* The --only-matching option prints just the substring that matched, and
00970     does not pring any context. */
00971 
00972     else if (only_matching)
00973       {
00974       if (printname != NULL) fprintf(stdout, "%s:", printname);
00975       if (number) fprintf(stdout, "%d:", linenumber);
00976       fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
00977       fprintf(stdout, "\n");
00978       }
00979 
00980     /* This is the default case when none of the above options is set. We print
00981     the matching lines(s), possibly preceded and/or followed by other lines of
00982     context. */
00983 
00984     else
00985       {
00986       /* See if there is a requirement to print some "after" lines from a
00987       previous match. We never print any overlaps. */
00988 
00989       if (after_context > 0 && lastmatchnumber > 0)
00990         {
00991         int ellength;
00992         int linecount = 0;
00993         char *p = lastmatchrestart;
00994 
00995         while (p < ptr && linecount < after_context)
00996           {
00997           p = end_of_line(p, ptr, &ellength);
00998           linecount++;
00999           }
01000 
01001         /* It is important to advance lastmatchrestart during this printing so
01002         that it interacts correctly with any "before" printing below. Print
01003         each line's data using fwrite() in case there are binary zeroes. */
01004 
01005         while (lastmatchrestart < p)
01006           {
01007           char *pp = lastmatchrestart;
01008           if (printname != NULL) fprintf(stdout, "%s-", printname);
01009           if (number) fprintf(stdout, "%d-", lastmatchnumber++);
01010           pp = end_of_line(pp, endptr, &ellength);
01011           fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
01012           lastmatchrestart = pp;
01013           }
01014         if (lastmatchrestart != ptr) hyphenpending = TRUE;
01015         }
01016 
01017       /* If there were non-contiguous lines printed above, insert hyphens. */
01018 
01019       if (hyphenpending)
01020         {
01021         fprintf(stdout, "--\n");
01022         hyphenpending = FALSE;
01023         hyphenprinted = TRUE;
01024         }
01025 
01026       /* See if there is a requirement to print some "before" lines for this
01027       match. Again, don't print overlaps. */
01028 
01029       if (before_context > 0)
01030         {
01031         int linecount = 0;
01032         char *p = ptr;
01033 
01034         while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
01035                linecount < before_context)
01036           {
01037           linecount++;
01038           p = previous_line(p, buffer);
01039           }
01040 
01041         if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
01042           fprintf(stdout, "--\n");
01043 
01044         while (p < ptr)
01045           {
01046           int ellength;
01047           char *pp = p;
01048           if (printname != NULL) fprintf(stdout, "%s-", printname);
01049           if (number) fprintf(stdout, "%d-", linenumber - linecount--);
01050           pp = end_of_line(pp, endptr, &ellength);
01051           fwrite(p, 1, pp - p, stdout);
01052           p = pp;
01053           }
01054         }
01055 
01056       /* Now print the matching line(s); ensure we set hyphenpending at the end
01057       of the file if any context lines are being output. */
01058 
01059       if (after_context > 0 || before_context > 0)
01060         endhyphenpending = TRUE;
01061 
01062       if (printname != NULL) fprintf(stdout, "%s:", printname);
01063       if (number) fprintf(stdout, "%d:", linenumber);
01064 
01065       /* In multiline mode, we want to print to the end of the line in which
01066       the end of the matched string is found, so we adjust linelength and the
01067       line number appropriately, but only when there actually was a match
01068       (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
01069       the match will always be before the first newline sequence. */
01070 
01071       if (multiline)
01072         {
01073         int ellength;
01074         char *endmatch = ptr;
01075         if (!invert)
01076           {
01077           endmatch += offsets[1];
01078           t = ptr;
01079           while (t < endmatch)
01080             {
01081             t = end_of_line(t, endptr, &ellength);
01082             if (t <= endmatch) linenumber++; else break;
01083             }
01084           }
01085         endmatch = end_of_line(endmatch, endptr, &ellength);
01086         linelength = endmatch - ptr - ellength;
01087         }
01088 
01089       /*** NOTE: Use only fwrite() to output the data line, so that binary
01090       zeroes are treated as just another data character. */
01091 
01092       /* This extra option, for Jeffrey Friedl's debugging requirements,
01093       replaces the matched string, or a specific captured string if it exists,
01094       with X. When this happens, colouring is ignored. */
01095 
01096 #ifdef JFRIEDL_DEBUG
01097       if (S_arg >= 0 && S_arg < mrc)
01098         {
01099         int first = S_arg * 2;
01100         int last  = first + 1;
01101         fwrite(ptr, 1, offsets[first], stdout);
01102         fprintf(stdout, "X");
01103         fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
01104         }
01105       else
01106 #endif
01107 
01108       /* We have to split the line(s) up if colouring. */
01109 
01110       if (do_colour)
01111         {
01112         fwrite(ptr, 1, offsets[0], stdout);
01113         fprintf(stdout, "%c[%sm", 0x1b, colour_string);
01114         fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
01115         fprintf(stdout, "%c[00m", 0x1b);
01116         fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);
01117         }
01118       else fwrite(ptr, 1, linelength + endlinelength, stdout);
01119       }
01120 
01121     /* End of doing what has to be done for a match */
01122 
01123     rc = 0;    /* Had some success */
01124 
01125     /* Remember where the last match happened for after_context. We remember
01126     where we are about to restart, and that line's number. */
01127 
01128     lastmatchrestart = ptr + linelength + endlinelength;
01129     lastmatchnumber = linenumber + 1;
01130     }
01131 
01132   /* For a match in multiline inverted mode (which of course did not cause
01133   anything to be printed), we have to move on to the end of the match before
01134   proceeding. */
01135 
01136   if (multiline && invert && match)
01137     {
01138     int ellength;
01139     char *endmatch = ptr + offsets[1];
01140     t = ptr;
01141     while (t < endmatch)
01142       {
01143       t = end_of_line(t, endptr, &ellength);
01144       if (t <= endmatch) linenumber++; else break;
01145       }
01146     endmatch = end_of_line(endmatch, endptr, &ellength);
01147     linelength = endmatch - ptr - ellength;
01148     }
01149 
01150   /* Advance to after the newline and increment the line number. */
01151 
01152   ptr += linelength + endlinelength;
01153   linenumber++;
01154 
01155   /* If we haven't yet reached the end of the file (the buffer is full), and
01156   the current point is in the top 1/3 of the buffer, slide the buffer down by
01157   1/3 and refill it. Before we do this, if some unprinted "after" lines are
01158   about to be lost, print them. */
01159 
01160   if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
01161     {
01162     if (after_context > 0 &&
01163         lastmatchnumber > 0 &&
01164         lastmatchrestart < buffer + MBUFTHIRD)
01165       {
01166       do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
01167       lastmatchnumber = 0;
01168       }
01169 
01170     /* Now do the shuffle */
01171 
01172     memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
01173     ptr -= MBUFTHIRD;
01174     bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
01175     endptr = buffer + bufflength;
01176 
01177     /* Adjust any last match point */
01178 
01179     if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
01180     }
01181   }     /* Loop through the whole file */
01182 
01183 /* End of file; print final "after" lines if wanted; do_after_lines sets
01184 hyphenpending if it prints something. */
01185 
01186 if (!only_matching && !count_only)
01187   {
01188   do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
01189   hyphenpending |= endhyphenpending;
01190   }
01191 
01192 /* Print the file name if we are looking for those without matches and there
01193 were none. If we found a match, we won't have got this far. */
01194 
01195 if (filenames == FN_NOMATCH_ONLY)
01196   {
01197   fprintf(stdout, "%s\n", printname);
01198   return 0;
01199   }
01200 
01201 /* Print the match count if wanted */
01202 
01203 if (count_only)
01204   {
01205   if (printname != NULL) fprintf(stdout, "%s:", printname);
01206   fprintf(stdout, "%d\n", count);
01207   }
01208 
01209 return rc;
01210 }

static char* previous_line char *  p,
char *  startptr
[static]
 

Definition at line 666 of file pcregrep.cpp.

References EL_ANY, EL_ANYCRLF, EL_CR, EL_CRLF, EL_LF, endlinetype, pp, utf8, utf8_table3, and utf8_table4.

00667 {
00668 switch(endlinetype)
00669   {
00670   default:      /* Just in case */
00671   case EL_LF:
00672   p--;
00673   while (p > startptr && p[-1] != '\n') p--;
00674   return p;
00675 
00676   case EL_CR:
00677   p--;
00678   while (p > startptr && p[-1] != '\n') p--;
00679   return p;
00680 
00681   case EL_CRLF:
00682   for (;;)
00683     {
00684     p -= 2;
00685     while (p > startptr && p[-1] != '\n') p--;
00686     if (p <= startptr + 1 || p[-2] == '\r') return p;
00687     }
00688   return p;   /* But control should never get here */
00689 
00690   case EL_ANY:
00691   case EL_ANYCRLF:
00692   if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
00693   if (utf8) while ((*p & 0xc0) == 0x80) p--;
00694 
00695   while (p > startptr)
00696     {
00697     register int c;
00698     char *pp = p - 1;
00699 
00700     if (utf8)
00701       {
00702       int extra = 0;
00703       while ((*pp & 0xc0) == 0x80) pp--;
00704       c = *((unsigned char *)pp);
00705       if (c >= 0xc0)
00706         {
00707         int gcii, gcss;
00708         extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
00709         gcss = 6*extra;
00710         c = (c & utf8_table3[extra]) << gcss;
00711         for (gcii = 1; gcii <= extra; gcii++)
00712           {
00713           gcss -= 6;
00714           c |= (pp[gcii] & 0x3f) << gcss;
00715           }
00716         }
00717       }
00718     else c = *((unsigned char *)pp);
00719 
00720     if (endlinetype == EL_ANYCRLF) switch (c)
00721       {
00722       case 0x0a:    /* LF */
00723       case 0x0d:    /* CR */
00724       return p;
00725 
00726       default:
00727       break;
00728       }
00729 
00730     else switch (c)
00731       {
00732       case 0x0a:    /* LF */
00733       case 0x0b:    /* VT */
00734       case 0x0c:    /* FF */
00735       case 0x0d:    /* CR */
00736       case 0x85:    /* NEL */
00737       case 0x2028:  /* LS */
00738       case 0x2029:  /* PS */
00739       return p;
00740 
00741       default:
00742       break;
00743       }
00744 
00745     p = pp;  /* Back one character */
00746     }        /* End of loop for ANY case */
00747 
00748   return startptr;  /* Hit start of data */
00749   }     /* End of overall switch */
00750 }

char* readdirectory directory_type dir  ) 
 

Definition at line 445 of file pcregrep.cpp.

Referenced by grep_or_recurse().

00445 { return (char*)0;}

char* strerror int  n  ) 
 

Definition at line 482 of file pcregrep.cpp.

Referenced by grep_or_recurse(), and main().

00483 {
00484 if (n < 0 || n >= sys_nerr) return "unknown error number";
00485 return sys_errlist[n];
00486 }

static int usage int  rc  )  [static]
 

Definition at line 1330 of file pcregrep.cpp.

References option_item::one_char, op, and optionlist.

Referenced by handle_option(), and main().

01331 {
01332 option_item *op;
01333 fprintf(stderr, "Usage: pcregrep [-");
01334 for (op = optionlist; op->one_char != 0; op++)
01335   {
01336   if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
01337   }
01338 fprintf(stderr, "] [long options] [pattern] [files]\n");
01339 fprintf(stderr, "Type `pcregrep --help' for more information.\n");
01340 return rc;
01341 }


Variable Documentation

int after_context = 0 [static]
 

Definition at line 134 of file pcregrep.cpp.

Referenced by do_after_lines().

int before_context = 0 [static]
 

Definition at line 135 of file pcregrep.cpp.

int both_context = 0 [static]
 

Definition at line 136 of file pcregrep.cpp.

char* colour_option = NULL [static]
 

Definition at line 114 of file pcregrep.cpp.

char* colour_string = (char *)"1;31" [static]
 

Definition at line 113 of file pcregrep.cpp.

BOOL count_only = FALSE [static]
 

Definition at line 143 of file pcregrep.cpp.

Referenced by handle_option().

int DEE_action = DEE_READ [static]
 

Definition at line 138 of file pcregrep.cpp.

Referenced by grep_or_recurse().

int dee_action = dee_READ [static]
 

Definition at line 137 of file pcregrep.cpp.

Referenced by grep_or_recurse(), and handle_option().

char* DEE_option = NULL [static]
 

Definition at line 116 of file pcregrep.cpp.

char* dee_option = NULL [static]
 

Definition at line 115 of file pcregrep.cpp.

BOOL do_colour = FALSE [static]
 

Definition at line 144 of file pcregrep.cpp.

int endlinetype [static]
 

Definition at line 111 of file pcregrep.cpp.

Referenced by end_of_line(), and previous_line().

int error_count = 0 [static]
 

Definition at line 139 of file pcregrep.cpp.

pcre* exclude_compiled = NULL [static]
 

Definition at line 132 of file pcregrep.cpp.

Referenced by grep_or_recurse().

char* exclude_pattern = NULL [static]
 

Definition at line 129 of file pcregrep.cpp.

int filenames = FN_DEFAULT [static]
 

Definition at line 140 of file pcregrep.cpp.

Referenced by grep_or_recurse(), and handle_option().

pcre_extra** hints_list = NULL [static]
 

Definition at line 126 of file pcregrep.cpp.

Referenced by pcregrep().

BOOL hyphenpending = FALSE [static]
 

Definition at line 145 of file pcregrep.cpp.

Referenced by do_after_lines().

pcre* include_compiled = NULL [static]
 

Definition at line 131 of file pcregrep.cpp.

Referenced by grep_or_recurse().

char* include_pattern = NULL [static]
 

Definition at line 128 of file pcregrep.cpp.

BOOL invert = FALSE [static]
 

Definition at line 146 of file pcregrep.cpp.

Referenced by handle_option().

char* locale = NULL [static]
 

Definition at line 120 of file pcregrep.cpp.

BOOL multiline = FALSE [static]
 

Definition at line 147 of file pcregrep.cpp.

Referenced by handle_option(), and pcregrep().

char* newline = NULL [static]
 

Definition at line 117 of file pcregrep.cpp.

Referenced by main(), pcrecpp::NewlineMode(), pcre_compile2(), pcre_dfa_exec(), and pcre_exec().

BOOL number = FALSE [static]
 

Definition at line 148 of file pcregrep.cpp.

BOOL only_matching = FALSE [static]
 

Definition at line 149 of file pcregrep.cpp.

Referenced by handle_option().

option_item optionlist[] [static]
 

Definition at line 178 of file pcregrep.cpp.

Referenced by help(), and usage().

int pattern_count = 0 [static]
 

Definition at line 124 of file pcregrep.cpp.

Referenced by compile_single_pattern(), main(), and pcregrep().

char* pattern_filename = NULL [static]
 

Definition at line 118 of file pcregrep.cpp.

Referenced by main().

pcre** pattern_list = NULL [static]
 

Definition at line 125 of file pcregrep.cpp.

Referenced by compile_single_pattern(), and pcregrep().

const unsigned char* pcretables = NULL [static]
 

Definition at line 122 of file pcregrep.cpp.

Referenced by compile_single_pattern().

const char* prefix[] [static]
 

Initial value:

 {
  "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" }

Definition at line 224 of file pcregrep.cpp.

Referenced by compile_single_pattern(), avmplus::ElementE4XNode::CopyAttributesAndNamespaces(), avmplus::E4XNode::FindNamespace(), avmplus::XMLObject::getNamespace(), and avmplus::AvmCore::newNamespace().

int process_options = 0 [static]
 

Definition at line 141 of file pcregrep.cpp.

Referenced by compile_pattern(), compile_single_pattern(), and handle_option().

BOOL quiet = FALSE [static]
 

Definition at line 150 of file pcregrep.cpp.

Referenced by handle_option(), and main().

BOOL silent = FALSE [static]
 

Definition at line 151 of file pcregrep.cpp.

Referenced by grep_or_recurse(), and handle_option().

char* stdin_name = (char *)"(standard input)" [static]
 

Definition at line 119 of file pcregrep.cpp.

Referenced by grep_or_recurse().

const char* suffix[] [static]
 

Initial value:

 {
  "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" }

Definition at line 227 of file pcregrep.cpp.

Referenced by compile_single_pattern(), and extract_superwords().

char* sys_errlist[]
 

int sys_nerr
 

BOOL utf8 = FALSE [static]
 

Definition at line 152 of file pcregrep.cpp.

Referenced by check_escape(), handle_option(), internal_dfa_exec(), match(), pcre_dfa_exec(), and previous_line().

const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01}
 

Definition at line 232 of file pcregrep.cpp.

Referenced by previous_line().

const char utf8_table4[]
 

Initial value:

 {
  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
  3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 }

Definition at line 234 of file pcregrep.cpp.

Referenced by previous_line().


Generated on Sun Oct 12 18:50:48 2008 for Tamarin by  doxygen 1.4.6