pcretest.cpp

Go to the documentation of this file.
00001 /*************************************************
00002 *             PCRE testing program               *
00003 *************************************************/
00004 
00005 /* This program was hacked up as a tester for PCRE. I really should have
00006 written it more tidily in the first place. Will I ever learn? It has grown and
00007 been extended and consequently is now rather, er, *very* untidy in places.
00008 
00009 -----------------------------------------------------------------------------
00010 Redistribution and use in source and binary forms, with or without
00011 modification, are permitted provided that the following conditions are met:
00012 
00013     * Redistributions of source code must retain the above copyright notice,
00014       this list of conditions and the following disclaimer.
00015 
00016     * Redistributions in binary form must reproduce the above copyright
00017       notice, this list of conditions and the following disclaimer in the
00018       documentation and/or other materials provided with the distribution.
00019 
00020     * Neither the name of the University of Cambridge nor the names of its
00021       contributors may be used to endorse or promote products derived from
00022       this software without specific prior written permission.
00023 
00024 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
00025 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00026 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00027 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
00028 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
00029 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
00030 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
00031 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
00032 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
00033 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
00034 POSSIBILITY OF SUCH DAMAGE.
00035 -----------------------------------------------------------------------------
00036 */
00037 
00038 
00039 #ifdef HAVE_CONFIG_H
00040 #include <config.h>
00041 #endif
00042 
00043 #include <ctype.h>
00044 #include <stdio.h>
00045 #include <string.h>
00046 #include <stdlib.h>
00047 #include <time.h>
00048 #include <locale.h>
00049 #include <errno.h>
00050 
00051 
00052 /* A number of things vary for Windows builds. Originally, pcretest opened its
00053 input and output without "b"; then I was told that "b" was needed in some
00054 environments, so it was added for release 5.0 to both the input and output. (It
00055 makes no difference on Unix-like systems.) Later I was told that it is wrong
00056 for the input on Windows. I've now abstracted the modes into two macros that
00057 are set here, to make it easier to fiddle with them, and removed "b" from the
00058 input mode under Windows. */
00059 
00060 #if defined(_WIN32) || defined(WIN32)
00061 #include <io.h>                /* For _setmode() */
00062 #include <fcntl.h>             /* For _O_BINARY */
00063 #define INPUT_MODE   "r"
00064 #define OUTPUT_MODE  "wb"
00065 
00066 #else
00067 #include <sys/time.h>          /* These two includes are needed */
00068 #include <sys/resource.h>      /* for setrlimit(). */
00069 #define INPUT_MODE   "rb"
00070 #define OUTPUT_MODE  "wb"
00071 #endif
00072 
00073 
00074 /* We have to include pcre_internal.h because we need the internal info for
00075 displaying the results of pcre_study() and we also need to know about the
00076 internal macros, structures, and other internal data values; pcretest has
00077 "inside information" compared to a program that strictly follows the PCRE API.
00078 
00079 Although pcre_internal.h does itself include pcre.h, we explicitly include it
00080 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
00081 appropriately for an application, not for building PCRE. */
00082 
00083 #include "pcre.h"
00084 #include "pcre_internal.h"
00085 
00086 /* We need access to the data tables that PCRE uses. So as not to have to keep
00087 two copies, we include the source file here, changing the names of the external
00088 symbols to prevent clashes. */
00089 
00090 #define _pcre_utf8_table1      utf8_table1
00091 #define _pcre_utf8_table1_size utf8_table1_size
00092 #define _pcre_utf8_table2      utf8_table2
00093 #define _pcre_utf8_table3      utf8_table3
00094 #define _pcre_utf8_table4      utf8_table4
00095 #define _pcre_utt              utt
00096 #define _pcre_utt_size         utt_size
00097 #define _pcre_OP_lengths       OP_lengths
00098 
00099 #include "pcre_tables.c"
00100 
00101 /* We also need the pcre_printint() function for printing out compiled
00102 patterns. This function is in a separate file so that it can be included in
00103 pcre_compile.c when that module is compiled with debugging enabled.
00104 
00105 The definition of the macro PRINTABLE, which determines whether to print an
00106 output character as-is or as a hex value when showing compiled patterns, is
00107 contained in this file. We uses it here also, in cases when the locale has not
00108 been explicitly changed, so as to get consistent output from systems that
00109 differ in their output from isprint() even in the "C" locale. */
00110 
00111 #include "pcre_printint.src"
00112 
00113 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
00114 
00115 
00116 /* It is possible to compile this test program without including support for
00117 testing the POSIX interface, though this is not available via the standard
00118 Makefile. */
00119 
00120 #if !defined NOPOSIX
00121 #include "pcreposix.h"
00122 #endif
00123 
00124 /* It is also possible, for the benefit of the version currently imported into
00125 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
00126 interface to the DFA matcher (NODFA), and without the doublecheck of the old
00127 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
00128 UTF8 support if PCRE is built without it. */
00129 
00130 #ifndef SUPPORT_UTF8
00131 #ifndef NOUTF8
00132 #define NOUTF8
00133 #endif
00134 #endif
00135 
00136 
00137 /* Other parameters */
00138 
00139 #ifndef CLOCKS_PER_SEC
00140 #ifdef CLK_TCK
00141 #define CLOCKS_PER_SEC CLK_TCK
00142 #else
00143 #define CLOCKS_PER_SEC 100
00144 #endif
00145 #endif
00146 
00147 /* This is the default loop count for timing. */
00148 
00149 #define LOOPREPEAT 500000
00150 
00151 /* Static variables */
00152 
00153 static FILE *outfile;
00154 static int log_store = 0;
00155 static int callout_count;
00156 static int callout_extra;
00157 static int callout_fail_count;
00158 static int callout_fail_id;
00159 static int debug_lengths;
00160 static int first_callout;
00161 static int locale_set = 0;
00162 static int show_malloc;
00163 static int use_utf8;
00164 static size_t gotten_store;
00165 
00166 /* The buffers grow automatically if very long input lines are encountered. */
00167 
00168 static int buffer_size = 50000;
00169 static uschar *buffer = NULL;
00170 static uschar *dbuffer = NULL;
00171 static uschar *pbuffer = NULL;
00172 
00173 
00174 
00175 /*************************************************
00176 *        Read or extend an input line            *
00177 *************************************************/
00178 
00179 /* Input lines are read into buffer, but both patterns and data lines can be
00180 continued over multiple input lines. In addition, if the buffer fills up, we
00181 want to automatically expand it so as to be able to handle extremely large
00182 lines that are needed for certain stress tests. When the input buffer is
00183 expanded, the other two buffers must also be expanded likewise, and the
00184 contents of pbuffer, which are a copy of the input for callouts, must be
00185 preserved (for when expansion happens for a data line). This is not the most
00186 optimal way of handling this, but hey, this is just a test program!
00187 
00188 Arguments:
00189   f            the file to read
00190   start        where in buffer to start (this *must* be within buffer)
00191 
00192 Returns:       pointer to the start of new data
00193                could be a copy of start, or could be moved
00194                NULL if no data read and EOF reached
00195 */
00196 
00197 static uschar *
00198 extend_inputline(FILE *f, uschar *start)
00199 {
00200 uschar *here = start;
00201 
00202 for (;;)
00203   {
00204   int rlen = buffer_size - (here - buffer);
00205 
00206   if (rlen > 1000)
00207     {
00208     int dlen;
00209     if (fgets((char *)here, rlen,  f) == NULL)
00210       return (here == start)? NULL : start;
00211     dlen = (int)strlen((char *)here);
00212     if (dlen > 0 && here[dlen - 1] == '\n') return start;
00213     here += dlen;
00214     }
00215 
00216   else
00217     {
00218     int new_buffer_size = 2*buffer_size;
00219     uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
00220     uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
00221     uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
00222 
00223     if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
00224       {
00225       fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
00226       exit(1);
00227       }
00228 
00229     memcpy(new_buffer, buffer, buffer_size);
00230     memcpy(new_pbuffer, pbuffer, buffer_size);
00231 
00232     buffer_size = new_buffer_size;
00233 
00234     start = new_buffer + (start - buffer);
00235     here = new_buffer + (here - buffer);
00236 
00237     free(buffer);
00238     free(dbuffer);
00239     free(pbuffer);
00240 
00241     buffer = new_buffer;
00242     dbuffer = new_dbuffer;
00243     pbuffer = new_pbuffer;
00244     }
00245   }
00246 
00247 return NULL;  /* Control never gets here */
00248 }
00249 
00250 
00251 
00252 
00253 
00254 
00255 
00256 /*************************************************
00257 *          Read number from string               *
00258 *************************************************/
00259 
00260 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
00261 around with conditional compilation, just do the job by hand. It is only used
00262 for unpicking arguments, so just keep it simple.
00263 
00264 Arguments:
00265   str           string to be converted
00266   endptr        where to put the end pointer
00267 
00268 Returns:        the unsigned long
00269 */
00270 
00271 static int
00272 get_value(unsigned char *str, unsigned char **endptr)
00273 {
00274 int result = 0;
00275 while(*str != 0 && isspace(*str)) str++;
00276 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
00277 *endptr = str;
00278 return(result);
00279 }
00280 
00281 
00282 
00283 
00284 /*************************************************
00285 *            Convert UTF-8 string to value       *
00286 *************************************************/
00287 
00288 /* This function takes one or more bytes that represents a UTF-8 character,
00289 and returns the value of the character.
00290 
00291 Argument:
00292   utf8bytes   a pointer to the byte vector
00293   vptr        a pointer to an int to receive the value
00294 
00295 Returns:      >  0 => the number of bytes consumed
00296               -6 to 0 => malformed UTF-8 character at offset = (-return)
00297 */
00298 
00299 #if !defined NOUTF8
00300 
00301 static int
00302 utf82ord(unsigned char *utf8bytes, int *vptr)
00303 {
00304 int c = *utf8bytes++;
00305 int d = c;
00306 int i, j, s;
00307 
00308 for (i = -1; i < 6; i++)               /* i is number of additional bytes */
00309   {
00310   if ((d & 0x80) == 0) break;
00311   d <<= 1;
00312   }
00313 
00314 if (i == -1) { *vptr = c; return 1; }  /* ascii character */
00315 if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
00316 
00317 /* i now has a value in the range 1-5 */
00318 
00319 s = 6*i;
00320 d = (c & utf8_table3[i]) << s;
00321 
00322 for (j = 0; j < i; j++)
00323   {
00324   c = *utf8bytes++;
00325   if ((c & 0xc0) != 0x80) return -(j+1);
00326   s -= 6;
00327   d |= (c & 0x3f) << s;
00328   }
00329 
00330 /* Check that encoding was the correct unique one */
00331 
00332 for (j = 0; j < utf8_table1_size; j++)
00333   if (d <= utf8_table1[j]) break;
00334 if (j != i) return -(i+1);
00335 
00336 /* Valid value */
00337 
00338 *vptr = d;
00339 return i+1;
00340 }
00341 
00342 #endif
00343 
00344 
00345 
00346 /*************************************************
00347 *       Convert character value to UTF-8         *
00348 *************************************************/
00349 
00350 /* This function takes an integer value in the range 0 - 0x7fffffff
00351 and encodes it as a UTF-8 character in 0 to 6 bytes.
00352 
00353 Arguments:
00354   cvalue     the character value
00355   utf8bytes  pointer to buffer for result - at least 6 bytes long
00356 
00357 Returns:     number of characters placed in the buffer
00358 */
00359 
00360 #if !defined NOUTF8
00361 
00362 static int
00363 ord2utf8(int cvalue, uschar *utf8bytes)
00364 {
00365 register int i, j;
00366 for (i = 0; i < utf8_table1_size; i++)
00367   if (cvalue <= utf8_table1[i]) break;
00368 utf8bytes += i;
00369 for (j = i; j > 0; j--)
00370  {
00371  *utf8bytes-- = 0x80 | (cvalue & 0x3f);
00372  cvalue >>= 6;
00373  }
00374 *utf8bytes = utf8_table2[i] | cvalue;
00375 return i + 1;
00376 }
00377 
00378 #endif
00379 
00380 
00381 
00382 /*************************************************
00383 *             Print character string             *
00384 *************************************************/
00385 
00386 /* Character string printing function. Must handle UTF-8 strings in utf8
00387 mode. Yields number of characters printed. If handed a NULL file, just counts
00388 chars without printing. */
00389 
00390 static int pchars(unsigned char *p, int length, FILE *f)
00391 {
00392 int c = 0;
00393 int yield = 0;
00394 
00395 while (length-- > 0)
00396   {
00397 #if !defined NOUTF8
00398   if (use_utf8)
00399     {
00400     int rc = utf82ord(p, &c);
00401 
00402     if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
00403       {
00404       length -= rc - 1;
00405       p += rc;
00406       if (PRINTHEX(c))
00407         {
00408         if (f != NULL) fprintf(f, "%c", c);
00409         yield++;
00410         }
00411       else
00412         {
00413         int n = 4;
00414         if (f != NULL) fprintf(f, "\\x{%02x}", c);
00415         yield += (n <= 0x000000ff)? 2 :
00416                  (n <= 0x00000fff)? 3 :
00417                  (n <= 0x0000ffff)? 4 :
00418                  (n <= 0x000fffff)? 5 : 6;
00419         }
00420       continue;
00421       }
00422     }
00423 #endif
00424 
00425    /* Not UTF-8, or malformed UTF-8  */
00426 
00427   c = *p++;
00428   if (PRINTHEX(c))
00429     {
00430     if (f != NULL) fprintf(f, "%c", c);
00431     yield++;
00432     }
00433   else
00434     {
00435     if (f != NULL) fprintf(f, "\\x%02x", c);
00436     yield += 4;
00437     }
00438   }
00439 
00440 return yield;
00441 }
00442 
00443 
00444 
00445 /*************************************************
00446 *              Callout function                  *
00447 *************************************************/
00448 
00449 /* Called from PCRE as a result of the (?C) item. We print out where we are in
00450 the match. Yield zero unless more callouts than the fail count, or the callout
00451 data is not zero. */
00452 
00453 static int callout(pcre_callout_block *cb)
00454 {
00455 FILE *f = (first_callout | callout_extra)? outfile : NULL;
00456 int i, pre_start, post_start, subject_length;
00457 
00458 if (callout_extra)
00459   {
00460   fprintf(f, "Callout %d: last capture = %d\n",
00461     cb->callout_number, cb->capture_last);
00462 
00463   for (i = 0; i < cb->capture_top * 2; i += 2)
00464     {
00465     if (cb->offset_vector[i] < 0)
00466       fprintf(f, "%2d: <unset>\n", i/2);
00467     else
00468       {
00469       fprintf(f, "%2d: ", i/2);
00470       (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
00471         cb->offset_vector[i+1] - cb->offset_vector[i], f);
00472       fprintf(f, "\n");
00473       }
00474     }
00475   }
00476 
00477 /* Re-print the subject in canonical form, the first time or if giving full
00478 datails. On subsequent calls in the same match, we use pchars just to find the
00479 printed lengths of the substrings. */
00480 
00481 if (f != NULL) fprintf(f, "--->");
00482 
00483 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
00484 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
00485   cb->current_position - cb->start_match, f);
00486 
00487 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
00488 
00489 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
00490   cb->subject_length - cb->current_position, f);
00491 
00492 if (f != NULL) fprintf(f, "\n");
00493 
00494 /* Always print appropriate indicators, with callout number if not already
00495 shown. For automatic callouts, show the pattern offset. */
00496 
00497 if (cb->callout_number == 255)
00498   {
00499   fprintf(outfile, "%+3d ", cb->pattern_position);
00500   if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
00501   }
00502 else
00503   {
00504   if (callout_extra) fprintf(outfile, "    ");
00505     else fprintf(outfile, "%3d ", cb->callout_number);
00506   }
00507 
00508 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
00509 fprintf(outfile, "^");
00510 
00511 if (post_start > 0)
00512   {
00513   for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
00514   fprintf(outfile, "^");
00515   }
00516 
00517 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
00518   fprintf(outfile, " ");
00519 
00520 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
00521   pbuffer + cb->pattern_position);
00522 
00523 fprintf(outfile, "\n");
00524 first_callout = 0;
00525 
00526 if (cb->callout_data != NULL)
00527   {
00528   int callout_data = *((int *)(cb->callout_data));
00529   if (callout_data != 0)
00530     {
00531     fprintf(outfile, "Callout data = %d\n", callout_data);
00532     return callout_data;
00533     }
00534   }
00535 
00536 return (cb->callout_number != callout_fail_id)? 0 :
00537        (++callout_count >= callout_fail_count)? 1 : 0;
00538 }
00539 
00540 
00541 /*************************************************
00542 *            Local malloc functions              *
00543 *************************************************/
00544 
00545 /* Alternative malloc function, to test functionality and show the size of the
00546 compiled re. */
00547 
00548 static void *new_malloc(size_t size)
00549 {
00550 void *block = malloc(size);
00551 gotten_store = size;
00552 if (show_malloc)
00553   fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
00554 return block;
00555 }
00556 
00557 static void new_free(void *block)
00558 {
00559 if (show_malloc)
00560   fprintf(outfile, "free             %p\n", block);
00561 free(block);
00562 }
00563 
00564 
00565 /* For recursion malloc/free, to test stacking calls */
00566 
00567 static void *stack_malloc(size_t size)
00568 {
00569 void *block = malloc(size);
00570 if (show_malloc)
00571   fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
00572 return block;
00573 }
00574 
00575 static void stack_free(void *block)
00576 {
00577 if (show_malloc)
00578   fprintf(outfile, "stack_free       %p\n", block);
00579 free(block);
00580 }
00581 
00582 
00583 /*************************************************
00584 *          Call pcre_fullinfo()                  *
00585 *************************************************/
00586 
00587 /* Get one piece of information from the pcre_fullinfo() function */
00588 
00589 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
00590 {
00591 int rc;
00592 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
00593   fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
00594 }
00595 
00596 
00597 
00598 /*************************************************
00599 *         Byte flipping function                 *
00600 *************************************************/
00601 
00602 static unsigned long int
00603 byteflip(unsigned long int value, int n)
00604 {
00605 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
00606 return ((value & 0x000000ff) << 24) |
00607        ((value & 0x0000ff00) <<  8) |
00608        ((value & 0x00ff0000) >>  8) |
00609        ((value & 0xff000000) >> 24);
00610 }
00611 
00612 
00613 
00614 
00615 /*************************************************
00616 *        Check match or recursion limit          *
00617 *************************************************/
00618 
00619 static int
00620 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
00621   int start_offset, int options, int *use_offsets, int use_size_offsets,
00622   int flag, unsigned long int *limit, int errnumber, const char *msg)
00623 {
00624 int count;
00625 int min = 0;
00626 int mid = 64;
00627 int max = -1;
00628 
00629 extra->flags |= flag;
00630 
00631 for (;;)
00632   {
00633   *limit = mid;
00634 
00635   count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
00636     use_offsets, use_size_offsets);
00637 
00638   if (count == errnumber)
00639     {
00640     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
00641     min = mid;
00642     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
00643     }
00644 
00645   else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
00646                          count == PCRE_ERROR_PARTIAL)
00647     {
00648     if (mid == min + 1)
00649       {
00650       fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
00651       break;
00652       }
00653     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
00654     max = mid;
00655     mid = (min + mid)/2;
00656     }
00657   else break;    /* Some other error */
00658   }
00659 
00660 extra->flags &= ~flag;
00661 return count;
00662 }
00663 
00664 
00665 
00666 /*************************************************
00667 *         Case-independent strncmp() function    *
00668 *************************************************/
00669 
00670 /*
00671 Arguments:
00672   s         first string
00673   t         second string
00674   n         number of characters to compare
00675 
00676 Returns:    < 0, = 0, or > 0, according to the comparison
00677 */
00678 
00679 static int
00680 strncmpic(uschar *s, uschar *t, int n)
00681 {
00682 while (n--)
00683   {
00684   int c = tolower(*s++) - tolower(*t++);
00685   if (c) return c;
00686   }
00687 return 0;
00688 }
00689 
00690 
00691 
00692 /*************************************************
00693 *         Check newline indicator                *
00694 *************************************************/
00695 
00696 /* This is used both at compile and run-time to check for <xxx> escapes, where
00697 xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
00698 no match.
00699 
00700 Arguments:
00701   p           points after the leading '<'
00702   f           file for error message
00703 
00704 Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
00705 */
00706 
00707 static int
00708 check_newline(uschar *p, FILE *f)
00709 {
00710 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
00711 if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
00712 if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
00713 if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
00714 if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
00715 fprintf(f, "Unknown newline type at: <%s\n", p);
00716 return 0;
00717 }
00718 
00719 
00720 
00721 /*************************************************
00722 *             Usage function                     *
00723 *************************************************/
00724 
00725 static void
00726 usage(void)
00727 {
00728 printf("Usage:     pcretest [options] [<input> [<output>]]\n");
00729 printf("  -b       show compiled code (bytecode)\n");
00730 printf("  -C       show PCRE compile-time options and exit\n");
00731 printf("  -d       debug: show compiled code and information (-b and -i)\n");
00732 #if !defined NODFA
00733 printf("  -dfa     force DFA matching for all subjects\n");
00734 #endif
00735 printf("  -help    show usage information\n");
00736 printf("  -i       show information about compiled patterns\n"
00737        "  -m       output memory used information\n"
00738        "  -o <n>   set size of offsets vector to <n>\n");
00739 #if !defined NOPOSIX
00740 printf("  -p       use POSIX interface\n");
00741 #endif
00742 printf("  -q       quiet: do not output PCRE version number at start\n");
00743 printf("  -S <n>   set stack size to <n> megabytes\n");
00744 printf("  -s       output store (memory) used information\n"
00745        "  -t       time compilation and execution\n");
00746 printf("  -t <n>   time compilation and execution, repeating <n> times\n");
00747 printf("  -tm      time execution (matching) only\n");
00748 printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
00749 }
00750 
00751 
00752 
00753 /*************************************************
00754 *                Main Program                    *
00755 *************************************************/
00756 
00757 /* Read lines from named file or stdin and write to named file or stdout; lines
00758 consist of a regular expression, in delimiters and optionally followed by
00759 options, followed by a set of test data, terminated by an empty line. */
00760 
00761 int main(int argc, char **argv)
00762 {
00763 FILE *infile = stdin;
00764 int options = 0;
00765 int study_options = 0;
00766 int op = 1;
00767 int timeit = 0;
00768 int timeitm = 0;
00769 int showinfo = 0;
00770 int showstore = 0;
00771 int quiet = 0;
00772 int size_offsets = 45;
00773 int size_offsets_max;
00774 int *offsets = NULL;
00775 #if !defined NOPOSIX
00776 int posix = 0;
00777 #endif
00778 int debug = 0;
00779 int done = 0;
00780 int all_use_dfa = 0;
00781 int yield = 0;
00782 int stack_size;
00783 
00784 /* These vectors store, end-to-end, a list of captured substring names. Assume
00785 that 1024 is plenty long enough for the few names we'll be testing. */
00786 
00787 uschar copynames[1024];
00788 uschar getnames[1024];
00789 
00790 uschar *copynamesptr;
00791 uschar *getnamesptr;
00792 
00793 /* Get buffers from malloc() so that Electric Fence will check their misuse
00794 when I am debugging. They grow automatically when very long lines are read. */
00795 
00796 buffer = (unsigned char *)malloc(buffer_size);
00797 dbuffer = (unsigned char *)malloc(buffer_size);
00798 pbuffer = (unsigned char *)malloc(buffer_size);
00799 
00800 /* The outfile variable is static so that new_malloc can use it. */
00801 
00802 outfile = stdout;
00803 
00804 /* The following  _setmode() stuff is some Windows magic that tells its runtime
00805 library to translate CRLF into a single LF character. At least, that's what
00806 I've been told: never having used Windows I take this all on trust. Originally
00807 it set 0x8000, but then I was advised that _O_BINARY was better. */
00808 
00809 #if defined(_WIN32) || defined(WIN32)
00810 _setmode( _fileno( stdout ), _O_BINARY );
00811 #endif
00812 
00813 /* Scan options */
00814 
00815 while (argc > 1 && argv[op][0] == '-')
00816   {
00817   unsigned char *endptr;
00818 
00819   if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
00820     showstore = 1;
00821   else if (strcmp(argv[op], "-q") == 0) quiet = 1;
00822   else if (strcmp(argv[op], "-b") == 0) debug = 1;
00823   else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
00824   else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
00825 #if !defined NODFA
00826   else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
00827 #endif
00828   else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
00829       ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
00830         *endptr == 0))
00831     {
00832     op++;
00833     argc--;
00834     }
00835   else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
00836     {
00837     int both = argv[op][2] == 0;
00838     int temp;
00839     if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
00840                      *endptr == 0))
00841       {
00842       timeitm = temp;
00843       op++;
00844       argc--;
00845       }
00846     else timeitm = LOOPREPEAT;
00847     if (both) timeit = timeitm;
00848     }
00849   else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
00850       ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
00851         *endptr == 0))
00852     {
00853 #if defined(_WIN32) || defined(WIN32)
00854     printf("PCRE: -S not supported on this OS\n");
00855     exit(1);
00856 #else
00857     int rc;
00858     struct rlimit rlim;
00859     getrlimit(RLIMIT_STACK, &rlim);
00860     rlim.rlim_cur = stack_size * 1024 * 1024;
00861     rc = setrlimit(RLIMIT_STACK, &rlim);
00862     if (rc != 0)
00863       {
00864     printf("PCRE: setrlimit() failed with error %d\n", rc);
00865     exit(1);
00866       }
00867     op++;
00868     argc--;
00869 #endif
00870     }
00871 #if !defined NOPOSIX
00872   else if (strcmp(argv[op], "-p") == 0) posix = 1;
00873 #endif
00874   else if (strcmp(argv[op], "-C") == 0)
00875     {
00876     int rc;
00877     printf("PCRE version %s\n", pcre_version());
00878     printf("Compiled with\n");
00879     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
00880     printf("  %sUTF-8 support\n", rc? "" : "No ");
00881     (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
00882     printf("  %sUnicode properties support\n", rc? "" : "No ");
00883     (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
00884     printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :
00885       (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
00886       (rc == -2)? "ANYCRLF" :
00887       (rc == -1)? "ANY" : "???");
00888     (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
00889     printf("  Internal link size = %d\n", rc);
00890     (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
00891     printf("  POSIX malloc threshold = %d\n", rc);
00892     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
00893     printf("  Default match limit = %d\n", rc);
00894     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
00895     printf("  Default recursion depth limit = %d\n", rc);
00896     (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
00897     printf("  Match recursion uses %s\n", rc? "stack" : "heap");
00898     goto EXIT;
00899     }
00900   else if (strcmp(argv[op], "-help") == 0 ||
00901            strcmp(argv[op], "--help") == 0)
00902     {
00903     usage();
00904     goto EXIT;
00905     }
00906   else
00907     {
00908     printf("** Unknown or malformed option %s\n", argv[op]);
00909     usage();
00910     yield = 1;
00911     goto EXIT;
00912     }
00913   op++;
00914   argc--;
00915   }
00916 
00917 /* Get the store for the offsets vector, and remember what it was */
00918 
00919 size_offsets_max = size_offsets;
00920 offsets = (int *)malloc(size_offsets_max * sizeof(int));
00921 if (offsets == NULL)
00922   {
00923   printf("** Failed to get %d bytes of memory for offsets vector\n",
00924     (int)(size_offsets_max * sizeof(int)));
00925   yield = 1;
00926   goto EXIT;
00927   }
00928 
00929 /* Sort out the input and output files */
00930 
00931 if (argc > 1)
00932   {
00933   infile = fopen(argv[op], INPUT_MODE);
00934   if (infile == NULL)
00935     {
00936     printf("** Failed to open %s\n", argv[op]);
00937     yield = 1;
00938     goto EXIT;
00939     }
00940   }
00941 
00942 if (argc > 2)
00943   {
00944   outfile = fopen(argv[op+1], OUTPUT_MODE);
00945   if (outfile == NULL)
00946     {
00947     printf("** Failed to open %s\n", argv[op+1]);
00948     yield = 1;
00949     goto EXIT;
00950     }
00951   }
00952 
00953 /* Set alternative malloc function */
00954 
00955 pcre_malloc = new_malloc;
00956 pcre_free = new_free;
00957 pcre_stack_malloc = stack_malloc;
00958 pcre_stack_free = stack_free;
00959 
00960 /* Heading line unless quiet, then prompt for first regex if stdin */
00961 
00962 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
00963 
00964 /* Main loop */
00965 
00966 while (!done)
00967   {
00968   pcre *re = NULL;
00969   pcre_extra *extra = NULL;
00970 
00971 #if !defined NOPOSIX  /* There are still compilers that require no indent */
00972   regex_t preg;
00973   int do_posix = 0;
00974 #endif
00975 
00976   const char *error;
00977   unsigned char *p, *pp, *ppp;
00978   unsigned char *to_file = NULL;
00979   const unsigned char *tables = NULL;
00980   unsigned long int true_size, true_study_size = 0;
00981   size_t size, regex_gotten_store;
00982   int do_study = 0;
00983   int do_debug = debug;
00984   int do_G = 0;
00985   int do_g = 0;
00986   int do_showinfo = showinfo;
00987   int do_showrest = 0;
00988   int do_flip = 0;
00989   int erroroffset, len, delimiter, poffset;
00990 
00991   use_utf8 = 0;
00992   debug_lengths = 1;
00993 
00994   if (infile == stdin) printf("  re> ");
00995   if (extend_inputline(infile, buffer) == NULL) break;
00996   if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
00997   fflush(outfile);
00998 
00999   p = buffer;
01000   while (isspace(*p)) p++;
01001   if (*p == 0) continue;
01002 
01003   /* See if the pattern is to be loaded pre-compiled from a file. */
01004 
01005   if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
01006     {
01007     unsigned long int magic, get_options;
01008     uschar sbuf[8];
01009     FILE *f;
01010 
01011     p++;
01012     pp = p + (int)strlen((char *)p);
01013     while (isspace(pp[-1])) pp--;
01014     *pp = 0;
01015 
01016     f = fopen((char *)p, "rb");
01017     if (f == NULL)
01018       {
01019       fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
01020       continue;
01021       }
01022 
01023     if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
01024 
01025     true_size =
01026       (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
01027     true_study_size =
01028       (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
01029 
01030     re = (real_pcre *)new_malloc(true_size);
01031     regex_gotten_store = gotten_store;
01032 
01033     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
01034 
01035     magic = ((real_pcre *)re)->magic_number;
01036     if (magic != MAGIC_NUMBER)
01037       {
01038       if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
01039         {
01040         do_flip = 1;
01041         }
01042       else
01043         {
01044         fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
01045         fclose(f);
01046         continue;
01047         }
01048       }
01049 
01050     fprintf(outfile, "Compiled regex%s loaded from %s\n",
01051       do_flip? " (byte-inverted)" : "", p);
01052 
01053     /* Need to know if UTF-8 for printing data strings */
01054 
01055     new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
01056     use_utf8 = (get_options & PCRE_UTF8) != 0;
01057 
01058     /* Now see if there is any following study data */
01059 
01060     if (true_study_size != 0)
01061       {
01062       pcre_study_data *psd;
01063 
01064       extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
01065       extra->flags = PCRE_EXTRA_STUDY_DATA;
01066 
01067       psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
01068       extra->study_data = psd;
01069 
01070       if (fread(psd, 1, true_study_size, f) != true_study_size)
01071         {
01072         FAIL_READ:
01073         fprintf(outfile, "Failed to read data from %s\n", p);
01074         if (extra != NULL) new_free(extra);
01075         if (re != NULL) new_free(re);
01076         fclose(f);
01077         continue;
01078         }
01079       fprintf(outfile, "Study data loaded from %s\n", p);
01080       do_study = 1;     /* To get the data output if requested */
01081       }
01082     else fprintf(outfile, "No study data\n");
01083 
01084     fclose(f);
01085     goto SHOW_INFO;
01086     }
01087 
01088   /* In-line pattern (the usual case). Get the delimiter and seek the end of
01089   the pattern; if is isn't complete, read more. */
01090 
01091   delimiter = *p++;
01092 
01093   if (isalnum(delimiter) || delimiter == '\\')
01094     {
01095     fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
01096     goto SKIP_DATA;
01097     }
01098 
01099   pp = p;
01100   poffset = p - buffer;
01101 
01102   for(;;)
01103     {
01104     while (*pp != 0)
01105       {
01106       if (*pp == '\\' && pp[1] != 0) pp++;
01107         else if (*pp == delimiter) break;
01108       pp++;
01109       }
01110     if (*pp != 0) break;
01111     if (infile == stdin) printf("    > ");
01112     if ((pp = extend_inputline(infile, pp)) == NULL)
01113       {
01114       fprintf(outfile, "** Unexpected EOF\n");
01115       done = 1;
01116       goto CONTINUE;
01117       }
01118     if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
01119     }
01120 
01121   /* The buffer may have moved while being extended; reset the start of data
01122   pointer to the correct relative point in the buffer. */
01123 
01124   p = buffer + poffset;
01125 
01126   /* If the first character after the delimiter is backslash, make
01127   the pattern end with backslash. This is purely to provide a way
01128   of testing for the error message when a pattern ends with backslash. */
01129 
01130   if (pp[1] == '\\') *pp++ = '\\';
01131 
01132   /* Terminate the pattern at the delimiter, and save a copy of the pattern
01133   for callouts. */
01134 
01135   *pp++ = 0;
01136   strcpy((char *)pbuffer, (char *)p);
01137 
01138   /* Look for options after final delimiter */
01139 
01140   options = 0;
01141   study_options = 0;
01142   log_store = showstore;  /* default from command line */
01143 
01144   while (*pp != 0)
01145     {
01146     switch (*pp++)
01147       {
01148       case 'f': options |= PCRE_FIRSTLINE; break;
01149       case 'g': do_g = 1; break;
01150       case 'i': options |= PCRE_CASELESS; break;
01151       case 'm': options |= PCRE_MULTILINE; break;
01152       case 's': options |= PCRE_DOTALL; break;
01153       case 'x': options |= PCRE_EXTENDED; break;
01154 
01155       case '+': do_showrest = 1; break;
01156       case 'A': options |= PCRE_ANCHORED; break;
01157       case 'B': do_debug = 1; break;
01158       case 'C': options |= PCRE_AUTO_CALLOUT; break;
01159       case 'D': do_debug = do_showinfo = 1; break;
01160       case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
01161       case 'F': do_flip = 1; break;
01162       case 'G': do_G = 1; break;
01163       case 'I': do_showinfo = 1; break;
01164       case 'J': options |= PCRE_DUPNAMES; break;
01165       case 'M': log_store = 1; break;
01166       case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
01167 
01168 #if !defined NOPOSIX
01169       case 'P': do_posix = 1; break;
01170 #endif
01171 
01172       case 'S': do_study = 1; break;
01173       case 'U': options |= PCRE_UNGREEDY; break;
01174       case 'X': options |= PCRE_EXTRA; break;
01175       case 'Z': debug_lengths = 0; break;
01176       case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
01177       case '?': options |= PCRE_NO_UTF8_CHECK; break;
01178 
01179       case 'L':
01180       ppp = pp;
01181       /* The '\r' test here is so that it works on Windows. */
01182       /* The '0' test is just in case this is an unterminated line. */
01183       while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
01184       *ppp = 0;
01185       if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
01186         {
01187         fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
01188         goto SKIP_DATA;
01189         }
01190       locale_set = 1;
01191       tables = pcre_maketables();
01192       pp = ppp;
01193       break;
01194 
01195       case '>':
01196       to_file = pp;
01197       while (*pp != 0) pp++;
01198       while (isspace(pp[-1])) pp--;
01199       *pp = 0;
01200       break;
01201 
01202       case '<':
01203         {
01204         int x = check_newline(pp, outfile);
01205         if (x == 0) goto SKIP_DATA;
01206         options |= x;
01207         while (*pp++ != '>');
01208         }
01209       break;
01210 
01211       case '\r':                      /* So that it works in Windows */
01212       case '\n':
01213       case ' ':
01214       break;
01215 
01216       default:
01217       fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
01218       goto SKIP_DATA;
01219       }
01220     }
01221 
01222   /* Handle compiling via the POSIX interface, which doesn't support the
01223   timing, showing, or debugging options, nor the ability to pass over
01224   local character tables. */
01225 
01226 #if !defined NOPOSIX
01227   if (posix || do_posix)
01228     {
01229     int rc;
01230     int cflags = 0;
01231 
01232     if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
01233     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
01234     if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
01235     if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
01236     if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
01237 
01238     rc = regcomp(&preg, (char *)p, cflags);
01239 
01240     /* Compilation failed; go back for another re, skipping to blank line
01241     if non-interactive. */
01242 
01243     if (rc != 0)
01244       {
01245       (void)regerror(rc, &preg, (char *)buffer, buffer_size);
01246       fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
01247       goto SKIP_DATA;
01248       }
01249     }
01250 
01251   /* Handle compiling via the native interface */
01252 
01253   else
01254 #endif  /* !defined NOPOSIX */
01255 
01256     {
01257     if (timeit > 0)
01258       {
01259       register int i;
01260       clock_t time_taken;
01261       clock_t start_time = clock();
01262       for (i = 0; i < timeit; i++)
01263         {
01264         re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
01265         if (re != NULL) free(re);
01266         }
01267       time_taken = clock() - start_time;
01268       fprintf(outfile, "Compile time %.4f milliseconds\n",
01269         (((double)time_taken * 1000.0) / (double)timeit) /
01270           (double)CLOCKS_PER_SEC);
01271       }
01272 
01273     re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
01274 
01275     /* Compilation failed; go back for another re, skipping to blank line
01276     if non-interactive. */
01277 
01278     if (re == NULL)
01279       {
01280       fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
01281       SKIP_DATA:
01282       if (infile != stdin)
01283         {
01284         for (;;)
01285           {
01286           if (extend_inputline(infile, buffer) == NULL)
01287             {
01288             done = 1;
01289             goto CONTINUE;
01290             }
01291           len = (int)strlen((char *)buffer);
01292           while (len > 0 && isspace(buffer[len-1])) len--;
01293           if (len == 0) break;
01294           }
01295         fprintf(outfile, "\n");
01296         }
01297       goto CONTINUE;
01298       }
01299 
01300     /* Compilation succeeded; print data if required. There are now two
01301     info-returning functions. The old one has a limited interface and
01302     returns only limited data. Check that it agrees with the newer one. */
01303 
01304     if (log_store)
01305       fprintf(outfile, "Memory allocation (code space): %d\n",
01306         (int)(gotten_store -
01307               sizeof(real_pcre) -
01308               ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
01309 
01310     /* Extract the size for possible writing before possibly flipping it,
01311     and remember the store that was got. */
01312 
01313     true_size = ((real_pcre *)re)->size;
01314     regex_gotten_store = gotten_store;
01315 
01316     /* If /S was present, study the regexp to generate additional info to
01317     help with the matching. */
01318 
01319     if (do_study)
01320       {
01321       if (timeit > 0)
01322         {
01323         register int i;
01324         clock_t time_taken;
01325         clock_t start_time = clock();
01326         for (i = 0; i < timeit; i++)
01327           extra = pcre_study(re, study_options, &error);
01328         time_taken = clock() - start_time;
01329         if (extra != NULL) free(extra);
01330         fprintf(outfile, "  Study time %.4f milliseconds\n",
01331           (((double)time_taken * 1000.0) / (double)timeit) /
01332             (double)CLOCKS_PER_SEC);
01333         }
01334       extra = pcre_study(re, study_options, &error);
01335       if (error != NULL)
01336         fprintf(outfile, "Failed to study: %s\n", error);
01337       else if (extra != NULL)
01338         true_study_size = ((pcre_study_data *)(extra->study_data))->size;
01339       }
01340 
01341     /* If the 'F' option was present, we flip the bytes of all the integer
01342     fields in the regex data block and the study block. This is to make it
01343     possible to test PCRE's handling of byte-flipped patterns, e.g. those
01344     compiled on a different architecture. */
01345 
01346     if (do_flip)
01347       {
01348       real_pcre *rre = (real_pcre *)re;
01349       rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
01350       rre->size = byteflip(rre->size, sizeof(rre->size));
01351       rre->options = byteflip(rre->options, sizeof(rre->options));
01352       rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
01353       rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
01354       rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
01355       rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
01356       rre->name_table_offset = byteflip(rre->name_table_offset,
01357         sizeof(rre->name_table_offset));
01358       rre->name_entry_size = byteflip(rre->name_entry_size,
01359         sizeof(rre->name_entry_size));
01360       rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
01361 
01362       if (extra != NULL)
01363         {
01364         pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
01365         rsd->size = byteflip(rsd->size, sizeof(rsd->size));
01366         rsd->options = byteflip(rsd->options, sizeof(rsd->options));
01367         }
01368       }
01369 
01370     /* Extract information from the compiled data if required */
01371 
01372     SHOW_INFO:
01373 
01374     if (do_debug)
01375       {
01376       fprintf(outfile, "------------------------------------------------------------------\n");
01377       pcre_printint(re, outfile, debug_lengths);
01378       }
01379 
01380     if (do_showinfo)
01381       {
01382       unsigned long int get_options, all_options;
01383 #if !defined NOINFOCHECK
01384       int old_first_char, old_options, old_count;
01385 #endif
01386       int count, backrefmax, first_char, need_char, okpartial, jchanged,
01387         hascrorlf;
01388       int nameentrysize, namecount;
01389       const uschar *nametable;
01390 
01391       new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
01392       new_info(re, NULL, PCRE_INFO_SIZE, &size);
01393       new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
01394       new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
01395       new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
01396       new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
01397       new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
01398       new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
01399       new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
01400       new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
01401       new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
01402       new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
01403 
01404 #if !defined NOINFOCHECK
01405       old_count = pcre_info(re, &old_options, &old_first_char);
01406       if (count < 0) fprintf(outfile,
01407         "Error %d from pcre_info()\n", count);
01408       else
01409         {
01410         if (old_count != count) fprintf(outfile,
01411           "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
01412             old_count);
01413 
01414         if (old_first_char != first_char) fprintf(outfile,
01415           "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
01416             first_char, old_first_char);
01417 
01418         if (old_options != (int)get_options) fprintf(outfile,
01419           "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
01420             get_options, old_options);
01421         }
01422 #endif
01423 
01424       if (size != regex_gotten_store) fprintf(outfile,
01425         "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
01426         (int)size, (int)regex_gotten_store);
01427 
01428       fprintf(outfile, "Capturing subpattern count = %d\n", count);
01429       if (backrefmax > 0)
01430         fprintf(outfile, "Max back reference = %d\n", backrefmax);
01431 
01432       if (namecount > 0)
01433         {
01434         fprintf(outfile, "Named capturing subpatterns:\n");
01435         while (namecount-- > 0)
01436           {
01437           fprintf(outfile, "  %s %*s%3d\n", nametable + 2,
01438             nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
01439             GET2(nametable, 0));
01440           nametable += nameentrysize;
01441           }
01442         }
01443 
01444       if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
01445       if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
01446 
01447       all_options = ((real_pcre *)re)->options;
01448       if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
01449 
01450       if (get_options == 0) fprintf(outfile, "No options\n");
01451         else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
01452           ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
01453           ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
01454           ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
01455           ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
01456           ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
01457           ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
01458           ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
01459           ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
01460           ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
01461           ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
01462           ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
01463           ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
01464           ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
01465 
01466       if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
01467 
01468       switch (get_options & PCRE_NEWLINE_BITS)
01469         {
01470         case PCRE_NEWLINE_CR:
01471         fprintf(outfile, "Forced newline sequence: CR\n");
01472         break;
01473 
01474         case PCRE_NEWLINE_LF:
01475         fprintf(outfile, "Forced newline sequence: LF\n");
01476         break;
01477 
01478         case PCRE_NEWLINE_CRLF:
01479         fprintf(outfile, "Forced newline sequence: CRLF\n");
01480         break;
01481 
01482         case PCRE_NEWLINE_ANYCRLF:
01483         fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
01484         break;
01485 
01486         case PCRE_NEWLINE_ANY:
01487         fprintf(outfile, "Forced newline sequence: ANY\n");
01488         break;
01489 
01490         default:
01491         break;
01492         }
01493 
01494       if (first_char == -1)
01495         {
01496         fprintf(outfile, "First char at start or follows newline\n");
01497         }
01498       else if (first_char < 0)
01499         {
01500         fprintf(outfile, "No first char\n");
01501         }
01502       else
01503         {
01504         int ch = first_char & 255;
01505         const char *caseless = ((first_char & REQ_CASELESS) == 0)?
01506           "" : " (caseless)";
01507         if (PRINTHEX(ch))
01508           fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
01509         else
01510           fprintf(outfile, "First char = %d%s\n", ch, caseless);
01511         }
01512 
01513       if (need_char < 0)
01514         {
01515         fprintf(outfile, "No need char\n");
01516         }
01517       else
01518         {
01519         int ch = need_char & 255;
01520         const char *caseless = ((need_char & REQ_CASELESS) == 0)?
01521           "" : " (caseless)";
01522         if (PRINTHEX(ch))
01523           fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
01524         else
01525           fprintf(outfile, "Need char = %d%s\n", ch, caseless);
01526         }
01527 
01528       /* Don't output study size; at present it is in any case a fixed
01529       value, but it varies, depending on the computer architecture, and
01530       so messes up the test suite. (And with the /F option, it might be
01531       flipped.) */
01532 
01533       if (do_study)
01534         {
01535         if (extra == NULL)
01536           fprintf(outfile, "Study returned NULL\n");
01537         else
01538           {
01539           uschar *start_bits = NULL;
01540           new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
01541 
01542           if (start_bits == NULL)
01543             fprintf(outfile, "No starting byte set\n");
01544           else
01545             {
01546             int i;
01547             int c = 24;
01548             fprintf(outfile, "Starting byte set: ");
01549             for (i = 0; i < 256; i++)
01550               {
01551               if ((start_bits[i/8] & (1<<(i&7))) != 0)
01552                 {
01553                 if (c > 75)
01554                   {
01555                   fprintf(outfile, "\n  ");
01556                   c = 2;
01557                   }
01558                 if (PRINTHEX(i) && i != ' ')
01559                   {
01560                   fprintf(outfile, "%c ", i);
01561                   c += 2;
01562                   }
01563                 else
01564                   {
01565                   fprintf(outfile, "\\x%02x ", i);
01566                   c += 5;
01567                   }
01568                 }
01569               }
01570             fprintf(outfile, "\n");
01571             }
01572           }
01573         }
01574       }
01575 
01576     /* If the '>' option was present, we write out the regex to a file, and
01577     that is all. The first 8 bytes of the file are the regex length and then
01578     the study length, in big-endian order. */
01579 
01580     if (to_file != NULL)
01581       {
01582       FILE *f = fopen((char *)to_file, "wb");
01583       if (f == NULL)
01584         {
01585         fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
01586         }
01587       else
01588         {
01589         uschar sbuf[8];
01590         sbuf[0] = (true_size >> 24)  & 255;
01591         sbuf[1] = (true_size >> 16)  & 255;
01592         sbuf[2] = (true_size >>  8)  & 255;
01593         sbuf[3] = (true_size)  & 255;
01594 
01595         sbuf[4] = (true_study_size >> 24)  & 255;
01596         sbuf[5] = (true_study_size >> 16)  & 255;
01597         sbuf[6] = (true_study_size >>  8)  & 255;
01598         sbuf[7] = (true_study_size)  & 255;
01599 
01600         if (fwrite(sbuf, 1, 8, f) < 8 ||
01601             fwrite(re, 1, true_size, f) < true_size)
01602           {
01603           fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
01604           }
01605         else
01606           {
01607           fprintf(outfile, "Compiled regex written to %s\n", to_file);
01608           if (extra != NULL)
01609             {
01610             if (fwrite(extra->study_data, 1, true_study_size, f) <
01611                 true_study_size)
01612               {
01613               fprintf(outfile, "Write error on %s: %s\n", to_file,
01614                 strerror(errno));
01615               }
01616             else fprintf(outfile, "Study data written to %s\n", to_file);
01617 
01618             }
01619           }
01620         fclose(f);
01621         }
01622 
01623       new_free(re);
01624       if (extra != NULL) new_free(extra);
01625       if (tables != NULL) new_free((void *)tables);
01626       continue;  /* With next regex */
01627       }
01628     }        /* End of non-POSIX compile */
01629 
01630   /* Read data lines and test them */
01631 
01632   for (;;)
01633     {
01634     uschar *q;
01635     uschar *bptr;
01636     int *use_offsets = offsets;
01637     int use_size_offsets = size_offsets;
01638     int callout_data = 0;
01639     int callout_data_set = 0;
01640     int count, c;
01641     int copystrings = 0;
01642     int find_match_limit = 0;
01643     int getstrings = 0;
01644     int getlist = 0;
01645     int gmatched = 0;
01646     int start_offset = 0;
01647     int g_notempty = 0;
01648     int use_dfa = 0;
01649 
01650     options = 0;
01651 
01652     *copynames = 0;
01653     *getnames = 0;
01654 
01655     copynamesptr = copynames;
01656     getnamesptr = getnames;
01657 
01658     pcre_callout = callout;
01659     first_callout = 1;
01660     callout_extra = 0;
01661     callout_count = 0;
01662     callout_fail_count = 999999;
01663     callout_fail_id = -1;
01664     show_malloc = 0;
01665 
01666     if (extra != NULL) extra->flags &=
01667       ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
01668 
01669     len = 0;
01670     for (;;)
01671       {
01672       if (infile == stdin) printf("data> ");
01673       if (extend_inputline(infile, buffer + len) == NULL)
01674         {
01675         if (len > 0) break;
01676         done = 1;
01677         goto CONTINUE;
01678         }
01679       if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
01680       len = (int)strlen((char *)buffer);
01681       if (buffer[len-1] == '\n') break;
01682       }
01683 
01684     while (len > 0 && isspace(buffer[len-1])) len--;
01685     buffer[len] = 0;
01686     if (len == 0) break;
01687 
01688     p = buffer;
01689     while (isspace(*p)) p++;
01690 
01691     bptr = q = dbuffer;
01692     while ((c = *p++) != 0)
01693       {
01694       int i = 0;
01695       int n = 0;
01696 
01697       if (c == '\\') switch ((c = *p++))
01698         {
01699         case 'a': c =    7; break;
01700         case 'b': c = '\b'; break;
01701         case 'e': c =   27; break;
01702         case 'f': c = '\f'; break;
01703         case 'n': c = '\n'; break;
01704         case 'r': c = '\r'; break;
01705         case 't': c = '\t'; break;
01706         case 'v': c = '\v'; break;
01707 
01708         case '0': case '1': case '2': case '3':
01709         case '4': case '5': case '6': case '7':
01710         c -= '0';
01711         while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
01712           c = c * 8 + *p++ - '0';
01713 
01714 #if !defined NOUTF8
01715         if (use_utf8 && c > 255)
01716           {
01717           unsigned char buff8[8];
01718           int ii, utn;
01719           utn = ord2utf8(c, buff8);
01720           for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
01721           c = buff8[ii];   /* Last byte */
01722           }
01723 #endif
01724         break;
01725 
01726         case 'x':
01727 
01728         /* Handle \x{..} specially - new Perl thing for utf8 */
01729 
01730 #if !defined NOUTF8
01731         if (*p == '{')
01732           {
01733           unsigned char *pt = p;
01734           c = 0;
01735           while (isxdigit(*(++pt)))
01736             c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
01737           if (*pt == '}')
01738             {
01739             unsigned char buff8[8];
01740             int ii, utn;
01741             utn = ord2utf8(c, buff8);
01742             for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
01743             c = buff8[ii];   /* Last byte */
01744             p = pt + 1;
01745             break;
01746             }
01747           /* Not correct form; fall through */
01748           }
01749 #endif
01750 
01751         /* Ordinary \x */
01752 
01753         c = 0;
01754         while (i++ < 2 && isxdigit(*p))
01755           {
01756           c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
01757           p++;
01758           }
01759         break;
01760 
01761         case 0:   /* \ followed by EOF allows for an empty line */
01762         p--;
01763         continue;
01764 
01765         case '>':
01766         while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
01767         continue;
01768 
01769         case 'A':  /* Option setting */
01770         options |= PCRE_ANCHORED;
01771         continue;
01772 
01773         case 'B':
01774         options |= PCRE_NOTBOL;
01775         continue;
01776 
01777         case 'C':
01778         if (isdigit(*p))    /* Set copy string */
01779           {
01780           while(isdigit(*p)) n = n * 10 + *p++ - '0';
01781           copystrings |= 1 << n;
01782           }
01783         else if (isalnum(*p))
01784           {
01785           uschar *npp = copynamesptr;
01786           while (isalnum(*p)) *npp++ = *p++;
01787           *npp++ = 0;
01788           *npp = 0;
01789           n = pcre_get_stringnumber(re, (char *)copynamesptr);
01790           if (n < 0)
01791             fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
01792           copynamesptr = npp;
01793           }
01794         else if (*p == '+')
01795           {
01796           callout_extra = 1;
01797           p++;
01798           }
01799         else if (*p == '-')
01800           {
01801           pcre_callout = NULL;
01802           p++;
01803           }
01804         else if (*p == '!')
01805           {
01806           callout_fail_id = 0;
01807           p++;
01808           while(isdigit(*p))
01809             callout_fail_id = callout_fail_id * 10 + *p++ - '0';
01810           callout_fail_count = 0;
01811           if (*p == '!')
01812             {
01813             p++;
01814             while(isdigit(*p))
01815               callout_fail_count = callout_fail_count * 10 + *p++ - '0';
01816             }
01817           }
01818         else if (*p == '*')
01819           {
01820           int sign = 1;
01821           callout_data = 0;
01822           if (*(++p) == '-') { sign = -1; p++; }
01823           while(isdigit(*p))
01824             callout_data = callout_data * 10 + *p++ - '0';
01825           callout_data *= sign;
01826           callout_data_set = 1;
01827           }
01828         continue;
01829 
01830 #if !defined NODFA
01831         case 'D':
01832 #if !defined NOPOSIX
01833         if (posix || do_posix)
01834           printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
01835         else
01836 #endif
01837           use_dfa = 1;
01838         continue;
01839 
01840         case 'F':
01841         options |= PCRE_DFA_SHORTEST;
01842         continue;
01843 #endif
01844 
01845         case 'G':
01846         if (isdigit(*p))
01847           {
01848           while(isdigit(*p)) n = n * 10 + *p++ - '0';
01849           getstrings |= 1 << n;
01850           }
01851         else if (isalnum(*p))
01852           {
01853           uschar *npp = getnamesptr;
01854           while (isalnum(*p)) *npp++ = *p++;
01855           *npp++ = 0;
01856           *npp = 0;
01857           n = pcre_get_stringnumber(re, (char *)getnamesptr);
01858           if (n < 0)
01859             fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
01860           getnamesptr = npp;
01861           }
01862         continue;
01863 
01864         case 'L':
01865         getlist = 1;
01866         continue;
01867 
01868         case 'M':
01869         find_match_limit = 1;
01870         continue;
01871 
01872         case 'N':
01873         options |= PCRE_NOTEMPTY;
01874         continue;
01875 
01876         case 'O':
01877         while(isdigit(*p)) n = n * 10 + *p++ - '0';
01878         if (n > size_offsets_max)
01879           {
01880           size_offsets_max = n;
01881           free(offsets);
01882           use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
01883           if (offsets == NULL)
01884             {
01885             printf("** Failed to get %d bytes of memory for offsets vector\n",
01886               (int)(size_offsets_max * sizeof(int)));
01887             yield = 1;
01888             goto EXIT;
01889             }
01890           }
01891         use_size_offsets = n;
01892         if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
01893         continue;
01894 
01895         case 'P':
01896         options |= PCRE_PARTIAL;
01897         continue;
01898 
01899         case 'Q':
01900         while(isdigit(*p)) n = n * 10 + *p++ - '0';
01901         if (extra == NULL)
01902           {
01903           extra = (pcre_extra *)malloc(sizeof(pcre_extra));
01904           extra->flags = 0;
01905           }
01906         extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
01907         extra->match_limit_recursion = n;
01908         continue;
01909 
01910         case 'q':
01911         while(isdigit(*p)) n = n * 10 + *p++ - '0';
01912         if (extra == NULL)
01913           {
01914           extra = (pcre_extra *)malloc(sizeof(pcre_extra));
01915           extra->flags = 0;
01916           }
01917         extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
01918         extra->match_limit = n;
01919         continue;
01920 
01921 #if !defined NODFA
01922         case 'R':
01923         options |= PCRE_DFA_RESTART;
01924         continue;
01925 #endif
01926 
01927         case 'S':
01928         show_malloc = 1;
01929         continue;
01930 
01931         case 'Z':
01932         options |= PCRE_NOTEOL;
01933         continue;
01934 
01935         case '?':
01936         options |= PCRE_NO_UTF8_CHECK;
01937         continue;
01938 
01939         case '<':
01940           {
01941           int x = check_newline(p, outfile);
01942           if (x == 0) goto NEXT_DATA;
01943           options |= x;
01944           while (*p++ != '>');
01945           }
01946         continue;
01947         }
01948       *q++ = c;
01949       }
01950     *q = 0;
01951     len = q - dbuffer;
01952 
01953     if ((all_use_dfa || use_dfa) && find_match_limit)
01954       {
01955       printf("**Match limit not relevant for DFA matching: ignored\n");
01956       find_match_limit = 0;
01957       }
01958 
01959     /* Handle matching via the POSIX interface, which does not
01960     support timing or playing with the match limit or callout data. */
01961 
01962 #if !defined NOPOSIX
01963     if (posix || do_posix)
01964       {
01965       int rc;
01966       int eflags = 0;
01967       regmatch_t *pmatch = NULL;
01968       if (use_size_offsets > 0)
01969         pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
01970       if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
01971       if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
01972 
01973       rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
01974 
01975       if (rc != 0)
01976         {
01977         (void)regerror(rc, &preg, (char *)buffer, buffer_size);
01978         fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
01979         }
01980       else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
01981               != 0)
01982         {
01983         fprintf(outfile, "Matched with REG_NOSUB\n");
01984         }
01985       else
01986         {
01987         size_t i;
01988         for (i = 0; i < (size_t)use_size_offsets; i++)
01989           {
01990           if (pmatch[i].rm_so >= 0)
01991             {
01992             fprintf(outfile, "%2d: ", (int)i);
01993             (void)pchars(dbuffer + pmatch[i].rm_so,
01994               pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
01995             fprintf(outfile, "\n");
01996             if (i == 0 && do_showrest)
01997               {
01998               fprintf(outfile, " 0+ ");
01999               (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
02000                 outfile);
02001               fprintf(outfile, "\n");
02002               }
02003             }
02004           }
02005         }
02006       free(pmatch);
02007       }
02008 
02009     /* Handle matching via the native interface - repeats for /g and /G */
02010 
02011     else
02012 #endif  /* !defined NOPOSIX */
02013 
02014     for (;; gmatched++)    /* Loop for /g or /G */
02015       {
02016       if (timeitm > 0)
02017         {
02018         register int i;
02019         clock_t time_taken;
02020         clock_t start_time = clock();
02021 
02022 #if !defined NODFA
02023         if (all_use_dfa || use_dfa)
02024           {
02025           int workspace[1000];
02026           for (i = 0; i < timeitm; i++)
02027             count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
02028               options | g_notempty, use_offsets, use_size_offsets, workspace,
02029               sizeof(workspace)/sizeof(int));
02030           }
02031         else
02032 #endif
02033 
02034         for (i = 0; i < timeitm; i++)
02035           count = pcre_exec(re, extra, (char *)bptr, len,
02036             start_offset, options | g_notempty, use_offsets, use_size_offsets);
02037 
02038         time_taken = clock() - start_time;
02039         fprintf(outfile, "Execute time %.4f milliseconds\n",
02040           (((double)time_taken * 1000.0) / (double)timeitm) /
02041             (double)CLOCKS_PER_SEC);
02042         }
02043 
02044       /* If find_match_limit is set, we want to do repeated matches with
02045       varying limits in order to find the minimum value for the match limit and
02046       for the recursion limit. */
02047 
02048       if (find_match_limit)
02049         {
02050         if (extra == NULL)
02051           {
02052           extra = (pcre_extra *)malloc(sizeof(pcre_extra));
02053           extra->flags = 0;
02054           }
02055 
02056         (void)check_match_limit(re, extra, bptr, len, start_offset,
02057           options|g_notempty, use_offsets, use_size_offsets,
02058           PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
02059           PCRE_ERROR_MATCHLIMIT, "match()");
02060 
02061         count = check_match_limit(re, extra, bptr, len, start_offset,
02062           options|g_notempty, use_offsets, use_size_offsets,
02063           PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
02064           PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
02065         }
02066 
02067       /* If callout_data is set, use the interface with additional data */
02068 
02069       else if (callout_data_set)
02070         {
02071         if (extra == NULL)
02072           {
02073           extra = (pcre_extra *)malloc(sizeof(pcre_extra));
02074           extra->flags = 0;
02075           }
02076         extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
02077         extra->callout_data = &callout_data;
02078         count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
02079           options | g_notempty, use_offsets, use_size_offsets);
02080         extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
02081         }
02082 
02083       /* The normal case is just to do the match once, with the default
02084       value of match_limit. */
02085 
02086 #if !defined NODFA
02087       else if (all_use_dfa || use_dfa)
02088         {
02089         int workspace[1000];
02090         count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
02091           options | g_notempty, use_offsets, use_size_offsets, workspace,
02092           sizeof(workspace)/sizeof(int));
02093         if (count == 0)
02094           {
02095           fprintf(outfile, "Matched, but too many subsidiary matches\n");
02096           count = use_size_offsets/2;
02097           }
02098         }
02099 #endif
02100 
02101       else
02102         {
02103         count = pcre_exec(re, extra, (char *)bptr, len,
02104           start_offset, options | g_notempty, use_offsets, use_size_offsets);
02105         if (count == 0)
02106           {
02107           fprintf(outfile, "Matched, but too many substrings\n");
02108           count = use_size_offsets/3;
02109           }
02110         }
02111 
02112       /* Matched */
02113 
02114       if (count >= 0)
02115         {
02116         int i, maxcount;
02117 
02118 #if !defined NODFA
02119         if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
02120 #endif
02121           maxcount = use_size_offsets/3;
02122 
02123         /* This is a check against a lunatic return value. */
02124 
02125         if (count > maxcount)
02126           {
02127           fprintf(outfile,
02128             "** PCRE error: returned count %d is too big for offset size %d\n",
02129             count, use_size_offsets);
02130           count = use_size_offsets/3;
02131           if (do_g || do_G)
02132             {
02133             fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
02134             do_g = do_G = FALSE;        /* Break g/G loop */
02135             }
02136           }
02137 
02138         for (i = 0; i < count * 2; i += 2)
02139           {
02140           if (use_offsets[i] < 0)
02141             fprintf(outfile, "%2d: <unset>\n", i/2);
02142           else
02143             {
02144             fprintf(outfile, "%2d: ", i/2);
02145             (void)pchars(bptr + use_offsets[i],
02146               use_offsets[i+1] - use_offsets[i], outfile);
02147             fprintf(outfile, "\n");
02148             if (i == 0)
02149               {
02150               if (do_showrest)
02151                 {
02152                 fprintf(outfile, " 0+ ");
02153                 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
02154                   outfile);
02155                 fprintf(outfile, "\n");
02156                 }
02157               }
02158             }
02159           }
02160 
02161         for (i = 0; i < 32; i++)
02162           {
02163           if ((copystrings & (1 << i)) != 0)
02164             {
02165             char copybuffer[256];
02166             int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
02167               i, copybuffer, sizeof(copybuffer));
02168             if (rc < 0)
02169               fprintf(outfile, "copy substring %d failed %d\n", i, rc);
02170             else
02171               fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
02172             }
02173           }
02174 
02175         for (copynamesptr = copynames;
02176              *copynamesptr != 0;
02177              copynamesptr += (int)strlen((char*)copynamesptr) + 1)
02178           {
02179           char copybuffer[256];
02180           int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
02181             count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
02182           if (rc < 0)
02183             fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
02184           else
02185             fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);
02186           }
02187 
02188         for (i = 0; i < 32; i++)
02189           {
02190           if ((getstrings & (1 << i)) != 0)
02191             {
02192             const char *substring;
02193             int rc = pcre_get_substring((char *)bptr, use_offsets, count,
02194               i, &substring);
02195             if (rc < 0)
02196               fprintf(outfile, "get substring %d failed %d\n", i, rc);
02197             else
02198               {
02199               fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
02200               pcre_free_substring(substring);
02201               }
02202             }
02203           }
02204 
02205         for (getnamesptr = getnames;
02206              *getnamesptr != 0;
02207              getnamesptr += (int)strlen((char*)getnamesptr) + 1)
02208           {
02209           const char *substring;
02210           int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
02211             count, (char *)getnamesptr, &substring);
02212           if (rc < 0)
02213             fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
02214           else
02215             {
02216             fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);
02217             pcre_free_substring(substring);
02218             }
02219           }
02220 
02221         if (getlist)
02222           {
02223           const char **stringlist;
02224           int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
02225             &stringlist);
02226           if (rc < 0)
02227             fprintf(outfile, "get substring list failed %d\n", rc);
02228           else
02229             {
02230             for (i = 0; i < count; i++)
02231               fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
02232             if (stringlist[i] != NULL)
02233               fprintf(outfile, "string list not terminated by NULL\n");
02234             /* free((void *)stringlist); */
02235             pcre_free_substring_list(stringlist);
02236             }
02237           }
02238         }
02239 
02240       /* There was a partial match */
02241 
02242       else if (count == PCRE_ERROR_PARTIAL)
02243         {
02244         fprintf(outfile, "Partial match");
02245 #if !defined NODFA
02246         if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
02247           fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
02248             bptr + use_offsets[0]);
02249 #endif
02250         fprintf(outfile, "\n");
02251         break;  /* Out of the /g loop */
02252         }
02253 
02254       /* Failed to match. If this is a /g or /G loop and we previously set
02255       g_notempty after a null match, this is not necessarily the end. We want
02256       to advance the start offset, and continue. We won't be at the end of the
02257       string - that was checked before setting g_notempty.
02258 
02259       Complication arises in the case when the newline option is "any" or
02260       "anycrlf". If the previous match was at the end of a line terminated by
02261       CRLF, an advance of one character just passes the \r, whereas we should
02262       prefer the longer newline sequence, as does the code in pcre_exec().
02263       Fudge the offset value to achieve this.
02264 
02265       Otherwise, in the case of UTF-8 matching, the advance must be one
02266       character, not one byte. */
02267 
02268       else
02269         {
02270         if (g_notempty != 0)
02271           {
02272           int onechar = 1;
02273           unsigned int obits = ((real_pcre *)re)->options;
02274           use_offsets[0] = start_offset;
02275           if ((obits & PCRE_NEWLINE_BITS) == 0)
02276             {
02277             int d;
02278             (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
02279             obits = (d == '\r')? PCRE_NEWLINE_CR :
02280                     (d == '\n')? PCRE_NEWLINE_LF :
02281                     (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
02282                     (d == -2)? PCRE_NEWLINE_ANYCRLF :
02283                     (d == -1)? PCRE_NEWLINE_ANY : 0;
02284             }
02285           if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
02286                (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
02287               &&
02288               start_offset < len - 1 &&
02289               bptr[start_offset] == '\r' &&
02290               bptr[start_offset+1] == '\n')
02291             onechar++;
02292           else if (use_utf8)
02293             {
02294             while (start_offset + onechar < len)
02295               {
02296               int tb = bptr[start_offset+onechar];
02297               if (tb <= 127) break;
02298               tb &= 0xc0;
02299               if (tb != 0 && tb != 0xc0) onechar++;
02300               }
02301             }
02302           use_offsets[1] = start_offset + onechar;
02303           }
02304         else
02305           {
02306           if (count == PCRE_ERROR_NOMATCH)
02307             {
02308             if (gmatched == 0) fprintf(outfile, "No match\n");
02309             }
02310           else fprintf(outfile, "Error %d\n", count);
02311           break;  /* Out of the /g loop */
02312           }
02313         }
02314 
02315       /* If not /g or /G we are done */
02316 
02317       if (!do_g && !do_G) break;
02318 
02319       /* If we have matched an empty string, first check to see if we are at
02320       the end of the subject. If so, the /g loop is over. Otherwise, mimic
02321       what Perl's /g options does. This turns out to be rather cunning. First
02322       we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
02323       same point. If this fails (picked up above) we advance to the next
02324       character. */
02325 
02326       g_notempty = 0;
02327 
02328       if (use_offsets[0] == use_offsets[1])
02329         {
02330         if (use_offsets[0] == len) break;
02331         g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
02332         }
02333 
02334       /* For /g, update the start offset, leaving the rest alone */
02335 
02336       if (do_g) start_offset = use_offsets[1];
02337 
02338       /* For /G, update the pointer and length */
02339 
02340       else
02341         {
02342         bptr += use_offsets[1];
02343         len -= use_offsets[1];
02344         }
02345       }  /* End of loop for /g and /G */
02346 
02347     NEXT_DATA: continue;
02348     }    /* End of loop for data lines */
02349 
02350   CONTINUE:
02351 
02352 #if !defined NOPOSIX
02353   if (posix || do_posix) regfree(&preg);
02354 #endif
02355 
02356   if (re != NULL) new_free(re);
02357   if (extra != NULL) new_free(extra);
02358   if (tables != NULL)
02359     {
02360     new_free((void *)tables);
02361     setlocale(LC_CTYPE, "C");
02362     locale_set = 0;
02363     }
02364   }
02365 
02366 if (infile == stdin) fprintf(outfile, "\n");
02367 
02368 EXIT:
02369 
02370 if (infile != NULL && infile != stdin) fclose(infile);
02371 if (outfile != NULL && outfile != stdout) fclose(outfile);
02372 
02373 free(buffer);
02374 free(dbuffer);
02375 free(pbuffer);
02376 free(offsets);
02377 
02378 return yield;
02379 }
02380 
02381 /* End of pcretest.c */

Generated on Sun Oct 12 18:50:24 2008 for Tamarin by  doxygen 1.4.6