00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039 #ifdef HAVE_CONFIG_H
00040 #include <config.h>
00041 #endif
00042
00043 #include <ctype.h>
00044 #include <stdio.h>
00045 #include <string.h>
00046 #include <stdlib.h>
00047 #include <time.h>
00048 #include <locale.h>
00049 #include <errno.h>
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060 #if defined(_WIN32) || defined(WIN32)
00061 #include <io.h>
00062 #include <fcntl.h>
00063 #define INPUT_MODE "r"
00064 #define OUTPUT_MODE "wb"
00065
00066 #else
00067 #include <sys/time.h>
00068 #include <sys/resource.h>
00069 #define INPUT_MODE "rb"
00070 #define OUTPUT_MODE "wb"
00071 #endif
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083 #include "pcre.h"
00084 #include "pcre_internal.h"
00085
00086
00087
00088
00089
00090 #define _pcre_utf8_table1 utf8_table1
00091 #define _pcre_utf8_table1_size utf8_table1_size
00092 #define _pcre_utf8_table2 utf8_table2
00093 #define _pcre_utf8_table3 utf8_table3
00094 #define _pcre_utf8_table4 utf8_table4
00095 #define _pcre_utt utt
00096 #define _pcre_utt_size utt_size
00097 #define _pcre_OP_lengths OP_lengths
00098
00099 #include "pcre_tables.c"
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111 #include "pcre_printint.src"
00112
00113 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
00114
00115
00116
00117
00118
00119
00120 #if !defined NOPOSIX
00121 #include "pcreposix.h"
00122 #endif
00123
00124
00125
00126
00127
00128
00129
00130 #ifndef SUPPORT_UTF8
00131 #ifndef NOUTF8
00132 #define NOUTF8
00133 #endif
00134 #endif
00135
00136
00137
00138
00139 #ifndef CLOCKS_PER_SEC
00140 #ifdef CLK_TCK
00141 #define CLOCKS_PER_SEC CLK_TCK
00142 #else
00143 #define CLOCKS_PER_SEC 100
00144 #endif
00145 #endif
00146
00147
00148
00149 #define LOOPREPEAT 500000
00150
00151
00152
00153 static FILE *outfile;
00154 static int log_store = 0;
00155 static int callout_count;
00156 static int callout_extra;
00157 static int callout_fail_count;
00158 static int callout_fail_id;
00159 static int debug_lengths;
00160 static int first_callout;
00161 static int locale_set = 0;
00162 static int show_malloc;
00163 static int use_utf8;
00164 static size_t gotten_store;
00165
00166
00167
00168 static int buffer_size = 50000;
00169 static uschar *buffer = NULL;
00170 static uschar *dbuffer = NULL;
00171 static uschar *pbuffer = NULL;
00172
00173
00174
00175
00176
00177
00178
00179
00180
00181
00182
00183
00184
00185
00186
00187
00188
00189
00190
00191
00192
00193
00194
00195
00196
00197 static uschar *
00198 extend_inputline(FILE *f, uschar *start)
00199 {
00200 uschar *here = start;
00201
00202 for (;;)
00203 {
00204 int rlen = buffer_size - (here - buffer);
00205
00206 if (rlen > 1000)
00207 {
00208 int dlen;
00209 if (fgets((char *)here, rlen, f) == NULL)
00210 return (here == start)? NULL : start;
00211 dlen = (int)strlen((char *)here);
00212 if (dlen > 0 && here[dlen - 1] == '\n') return start;
00213 here += dlen;
00214 }
00215
00216 else
00217 {
00218 int new_buffer_size = 2*buffer_size;
00219 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
00220 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
00221 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
00222
00223 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
00224 {
00225 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
00226 exit(1);
00227 }
00228
00229 memcpy(new_buffer, buffer, buffer_size);
00230 memcpy(new_pbuffer, pbuffer, buffer_size);
00231
00232 buffer_size = new_buffer_size;
00233
00234 start = new_buffer + (start - buffer);
00235 here = new_buffer + (here - buffer);
00236
00237 free(buffer);
00238 free(dbuffer);
00239 free(pbuffer);
00240
00241 buffer = new_buffer;
00242 dbuffer = new_dbuffer;
00243 pbuffer = new_pbuffer;
00244 }
00245 }
00246
00247 return NULL;
00248 }
00249
00250
00251
00252
00253
00254
00255
00256
00257
00258
00259
00260
00261
00262
00263
00264
00265
00266
00267
00268
00269
00270
00271 static int
00272 get_value(unsigned char *str, unsigned char **endptr)
00273 {
00274 int result = 0;
00275 while(*str != 0 && isspace(*str)) str++;
00276 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
00277 *endptr = str;
00278 return(result);
00279 }
00280
00281
00282
00283
00284
00285
00286
00287
00288
00289
00290
00291
00292
00293
00294
00295
00296
00297
00298
00299 #if !defined NOUTF8
00300
00301 static int
00302 utf82ord(unsigned char *utf8bytes, int *vptr)
00303 {
00304 int c = *utf8bytes++;
00305 int d = c;
00306 int i, j, s;
00307
00308 for (i = -1; i < 6; i++)
00309 {
00310 if ((d & 0x80) == 0) break;
00311 d <<= 1;
00312 }
00313
00314 if (i == -1) { *vptr = c; return 1; }
00315 if (i == 0 || i == 6) return 0;
00316
00317
00318
00319 s = 6*i;
00320 d = (c & utf8_table3[i]) << s;
00321
00322 for (j = 0; j < i; j++)
00323 {
00324 c = *utf8bytes++;
00325 if ((c & 0xc0) != 0x80) return -(j+1);
00326 s -= 6;
00327 d |= (c & 0x3f) << s;
00328 }
00329
00330
00331
00332 for (j = 0; j < utf8_table1_size; j++)
00333 if (d <= utf8_table1[j]) break;
00334 if (j != i) return -(i+1);
00335
00336
00337
00338 *vptr = d;
00339 return i+1;
00340 }
00341
00342 #endif
00343
00344
00345
00346
00347
00348
00349
00350
00351
00352
00353
00354
00355
00356
00357
00358
00359
00360 #if !defined NOUTF8
00361
00362 static int
00363 ord2utf8(int cvalue, uschar *utf8bytes)
00364 {
00365 register int i, j;
00366 for (i = 0; i < utf8_table1_size; i++)
00367 if (cvalue <= utf8_table1[i]) break;
00368 utf8bytes += i;
00369 for (j = i; j > 0; j--)
00370 {
00371 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
00372 cvalue >>= 6;
00373 }
00374 *utf8bytes = utf8_table2[i] | cvalue;
00375 return i + 1;
00376 }
00377
00378 #endif
00379
00380
00381
00382
00383
00384
00385
00386
00387
00388
00389
00390 static int pchars(unsigned char *p, int length, FILE *f)
00391 {
00392 int c = 0;
00393 int yield = 0;
00394
00395 while (length-- > 0)
00396 {
00397 #if !defined NOUTF8
00398 if (use_utf8)
00399 {
00400 int rc = utf82ord(p, &c);
00401
00402 if (rc > 0 && rc <= length + 1)
00403 {
00404 length -= rc - 1;
00405 p += rc;
00406 if (PRINTHEX(c))
00407 {
00408 if (f != NULL) fprintf(f, "%c", c);
00409 yield++;
00410 }
00411 else
00412 {
00413 int n = 4;
00414 if (f != NULL) fprintf(f, "\\x{%02x}", c);
00415 yield += (n <= 0x000000ff)? 2 :
00416 (n <= 0x00000fff)? 3 :
00417 (n <= 0x0000ffff)? 4 :
00418 (n <= 0x000fffff)? 5 : 6;
00419 }
00420 continue;
00421 }
00422 }
00423 #endif
00424
00425
00426
00427 c = *p++;
00428 if (PRINTHEX(c))
00429 {
00430 if (f != NULL) fprintf(f, "%c", c);
00431 yield++;
00432 }
00433 else
00434 {
00435 if (f != NULL) fprintf(f, "\\x%02x", c);
00436 yield += 4;
00437 }
00438 }
00439
00440 return yield;
00441 }
00442
00443
00444
00445
00446
00447
00448
00449
00450
00451
00452
00453 static int callout(pcre_callout_block *cb)
00454 {
00455 FILE *f = (first_callout | callout_extra)? outfile : NULL;
00456 int i, pre_start, post_start, subject_length;
00457
00458 if (callout_extra)
00459 {
00460 fprintf(f, "Callout %d: last capture = %d\n",
00461 cb->callout_number, cb->capture_last);
00462
00463 for (i = 0; i < cb->capture_top * 2; i += 2)
00464 {
00465 if (cb->offset_vector[i] < 0)
00466 fprintf(f, "%2d: <unset>\n", i/2);
00467 else
00468 {
00469 fprintf(f, "%2d: ", i/2);
00470 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
00471 cb->offset_vector[i+1] - cb->offset_vector[i], f);
00472 fprintf(f, "\n");
00473 }
00474 }
00475 }
00476
00477
00478
00479
00480
00481 if (f != NULL) fprintf(f, "--->");
00482
00483 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
00484 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
00485 cb->current_position - cb->start_match, f);
00486
00487 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
00488
00489 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
00490 cb->subject_length - cb->current_position, f);
00491
00492 if (f != NULL) fprintf(f, "\n");
00493
00494
00495
00496
00497 if (cb->callout_number == 255)
00498 {
00499 fprintf(outfile, "%+3d ", cb->pattern_position);
00500 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
00501 }
00502 else
00503 {
00504 if (callout_extra) fprintf(outfile, " ");
00505 else fprintf(outfile, "%3d ", cb->callout_number);
00506 }
00507
00508 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
00509 fprintf(outfile, "^");
00510
00511 if (post_start > 0)
00512 {
00513 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
00514 fprintf(outfile, "^");
00515 }
00516
00517 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
00518 fprintf(outfile, " ");
00519
00520 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
00521 pbuffer + cb->pattern_position);
00522
00523 fprintf(outfile, "\n");
00524 first_callout = 0;
00525
00526 if (cb->callout_data != NULL)
00527 {
00528 int callout_data = *((int *)(cb->callout_data));
00529 if (callout_data != 0)
00530 {
00531 fprintf(outfile, "Callout data = %d\n", callout_data);
00532 return callout_data;
00533 }
00534 }
00535
00536 return (cb->callout_number != callout_fail_id)? 0 :
00537 (++callout_count >= callout_fail_count)? 1 : 0;
00538 }
00539
00540
00541
00542
00543
00544
00545
00546
00547
00548 static void *new_malloc(size_t size)
00549 {
00550 void *block = malloc(size);
00551 gotten_store = size;
00552 if (show_malloc)
00553 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
00554 return block;
00555 }
00556
00557 static void new_free(void *block)
00558 {
00559 if (show_malloc)
00560 fprintf(outfile, "free %p\n", block);
00561 free(block);
00562 }
00563
00564
00565
00566
00567 static void *stack_malloc(size_t size)
00568 {
00569 void *block = malloc(size);
00570 if (show_malloc)
00571 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
00572 return block;
00573 }
00574
00575 static void stack_free(void *block)
00576 {
00577 if (show_malloc)
00578 fprintf(outfile, "stack_free %p\n", block);
00579 free(block);
00580 }
00581
00582
00583
00584
00585
00586
00587
00588
00589 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
00590 {
00591 int rc;
00592 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
00593 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
00594 }
00595
00596
00597
00598
00599
00600
00601
00602 static unsigned long int
00603 byteflip(unsigned long int value, int n)
00604 {
00605 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
00606 return ((value & 0x000000ff) << 24) |
00607 ((value & 0x0000ff00) << 8) |
00608 ((value & 0x00ff0000) >> 8) |
00609 ((value & 0xff000000) >> 24);
00610 }
00611
00612
00613
00614
00615
00616
00617
00618
00619 static int
00620 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
00621 int start_offset, int options, int *use_offsets, int use_size_offsets,
00622 int flag, unsigned long int *limit, int errnumber, const char *msg)
00623 {
00624 int count;
00625 int min = 0;
00626 int mid = 64;
00627 int max = -1;
00628
00629 extra->flags |= flag;
00630
00631 for (;;)
00632 {
00633 *limit = mid;
00634
00635 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
00636 use_offsets, use_size_offsets);
00637
00638 if (count == errnumber)
00639 {
00640
00641 min = mid;
00642 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
00643 }
00644
00645 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
00646 count == PCRE_ERROR_PARTIAL)
00647 {
00648 if (mid == min + 1)
00649 {
00650 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
00651 break;
00652 }
00653
00654 max = mid;
00655 mid = (min + mid)/2;
00656 }
00657 else break;
00658 }
00659
00660 extra->flags &= ~flag;
00661 return count;
00662 }
00663
00664
00665
00666
00667
00668
00669
00670
00671
00672
00673
00674
00675
00676
00677
00678
00679 static int
00680 strncmpic(uschar *s, uschar *t, int n)
00681 {
00682 while (n--)
00683 {
00684 int c = tolower(*s++) - tolower(*t++);
00685 if (c) return c;
00686 }
00687 return 0;
00688 }
00689
00690
00691
00692
00693
00694
00695
00696
00697
00698
00699
00700
00701
00702
00703
00704
00705
00706
00707 static int
00708 check_newline(uschar *p, FILE *f)
00709 {
00710 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
00711 if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
00712 if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
00713 if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
00714 if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
00715 fprintf(f, "Unknown newline type at: <%s\n", p);
00716 return 0;
00717 }
00718
00719
00720
00721
00722
00723
00724
00725 static void
00726 usage(void)
00727 {
00728 printf("Usage: pcretest [options] [<input> [<output>]]\n");
00729 printf(" -b show compiled code (bytecode)\n");
00730 printf(" -C show PCRE compile-time options and exit\n");
00731 printf(" -d debug: show compiled code and information (-b and -i)\n");
00732 #if !defined NODFA
00733 printf(" -dfa force DFA matching for all subjects\n");
00734 #endif
00735 printf(" -help show usage information\n");
00736 printf(" -i show information about compiled patterns\n"
00737 " -m output memory used information\n"
00738 " -o <n> set size of offsets vector to <n>\n");
00739 #if !defined NOPOSIX
00740 printf(" -p use POSIX interface\n");
00741 #endif
00742 printf(" -q quiet: do not output PCRE version number at start\n");
00743 printf(" -S <n> set stack size to <n> megabytes\n");
00744 printf(" -s output store (memory) used information\n"
00745 " -t time compilation and execution\n");
00746 printf(" -t <n> time compilation and execution, repeating <n> times\n");
00747 printf(" -tm time execution (matching) only\n");
00748 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
00749 }
00750
00751
00752
00753
00754
00755
00756
00757
00758
00759
00760
00761 int main(int argc, char **argv)
00762 {
00763 FILE *infile = stdin;
00764 int options = 0;
00765 int study_options = 0;
00766 int op = 1;
00767 int timeit = 0;
00768 int timeitm = 0;
00769 int showinfo = 0;
00770 int showstore = 0;
00771 int quiet = 0;
00772 int size_offsets = 45;
00773 int size_offsets_max;
00774 int *offsets = NULL;
00775 #if !defined NOPOSIX
00776 int posix = 0;
00777 #endif
00778 int debug = 0;
00779 int done = 0;
00780 int all_use_dfa = 0;
00781 int yield = 0;
00782 int stack_size;
00783
00784
00785
00786
00787 uschar copynames[1024];
00788 uschar getnames[1024];
00789
00790 uschar *copynamesptr;
00791 uschar *getnamesptr;
00792
00793
00794
00795
00796 buffer = (unsigned char *)malloc(buffer_size);
00797 dbuffer = (unsigned char *)malloc(buffer_size);
00798 pbuffer = (unsigned char *)malloc(buffer_size);
00799
00800
00801
00802 outfile = stdout;
00803
00804
00805
00806
00807
00808
00809 #if defined(_WIN32) || defined(WIN32)
00810 _setmode( _fileno( stdout ), _O_BINARY );
00811 #endif
00812
00813
00814
00815 while (argc > 1 && argv[op][0] == '-')
00816 {
00817 unsigned char *endptr;
00818
00819 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
00820 showstore = 1;
00821 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
00822 else if (strcmp(argv[op], "-b") == 0) debug = 1;
00823 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
00824 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
00825 #if !defined NODFA
00826 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
00827 #endif
00828 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
00829 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
00830 *endptr == 0))
00831 {
00832 op++;
00833 argc--;
00834 }
00835 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
00836 {
00837 int both = argv[op][2] == 0;
00838 int temp;
00839 if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
00840 *endptr == 0))
00841 {
00842 timeitm = temp;
00843 op++;
00844 argc--;
00845 }
00846 else timeitm = LOOPREPEAT;
00847 if (both) timeit = timeitm;
00848 }
00849 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
00850 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
00851 *endptr == 0))
00852 {
00853 #if defined(_WIN32) || defined(WIN32)
00854 printf("PCRE: -S not supported on this OS\n");
00855 exit(1);
00856 #else
00857 int rc;
00858 struct rlimit rlim;
00859 getrlimit(RLIMIT_STACK, &rlim);
00860 rlim.rlim_cur = stack_size * 1024 * 1024;
00861 rc = setrlimit(RLIMIT_STACK, &rlim);
00862 if (rc != 0)
00863 {
00864 printf("PCRE: setrlimit() failed with error %d\n", rc);
00865 exit(1);
00866 }
00867 op++;
00868 argc--;
00869 #endif
00870 }
00871 #if !defined NOPOSIX
00872 else if (strcmp(argv[op], "-p") == 0) posix = 1;
00873 #endif
00874 else if (strcmp(argv[op], "-C") == 0)
00875 {
00876 int rc;
00877 printf("PCRE version %s\n", pcre_version());
00878 printf("Compiled with\n");
00879 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
00880 printf(" %sUTF-8 support\n", rc? "" : "No ");
00881 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
00882 printf(" %sUnicode properties support\n", rc? "" : "No ");
00883 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
00884 printf(" Newline sequence is %s\n", (rc == '\r')? "CR" :
00885 (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
00886 (rc == -2)? "ANYCRLF" :
00887 (rc == -1)? "ANY" : "???");
00888 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
00889 printf(" Internal link size = %d\n", rc);
00890 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
00891 printf(" POSIX malloc threshold = %d\n", rc);
00892 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
00893 printf(" Default match limit = %d\n", rc);
00894 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
00895 printf(" Default recursion depth limit = %d\n", rc);
00896 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
00897 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
00898 goto EXIT;
00899 }
00900 else if (strcmp(argv[op], "-help") == 0 ||
00901 strcmp(argv[op], "--help") == 0)
00902 {
00903 usage();
00904 goto EXIT;
00905 }
00906 else
00907 {
00908 printf("** Unknown or malformed option %s\n", argv[op]);
00909 usage();
00910 yield = 1;
00911 goto EXIT;
00912 }
00913 op++;
00914 argc--;
00915 }
00916
00917
00918
00919 size_offsets_max = size_offsets;
00920 offsets = (int *)malloc(size_offsets_max * sizeof(int));
00921 if (offsets == NULL)
00922 {
00923 printf("** Failed to get %d bytes of memory for offsets vector\n",
00924 (int)(size_offsets_max * sizeof(int)));
00925 yield = 1;
00926 goto EXIT;
00927 }
00928
00929
00930
00931 if (argc > 1)
00932 {
00933 infile = fopen(argv[op], INPUT_MODE);
00934 if (infile == NULL)
00935 {
00936 printf("** Failed to open %s\n", argv[op]);
00937 yield = 1;
00938 goto EXIT;
00939 }
00940 }
00941
00942 if (argc > 2)
00943 {
00944 outfile = fopen(argv[op+1], OUTPUT_MODE);
00945 if (outfile == NULL)
00946 {
00947 printf("** Failed to open %s\n", argv[op+1]);
00948 yield = 1;
00949 goto EXIT;
00950 }
00951 }
00952
00953
00954
00955 pcre_malloc = new_malloc;
00956 pcre_free = new_free;
00957 pcre_stack_malloc = stack_malloc;
00958 pcre_stack_free = stack_free;
00959
00960
00961
00962 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
00963
00964
00965
00966 while (!done)
00967 {
00968 pcre *re = NULL;
00969 pcre_extra *extra = NULL;
00970
00971 #if !defined NOPOSIX
00972 regex_t preg;
00973 int do_posix = 0;
00974 #endif
00975
00976 const char *error;
00977 unsigned char *p, *pp, *ppp;
00978 unsigned char *to_file = NULL;
00979 const unsigned char *tables = NULL;
00980 unsigned long int true_size, true_study_size = 0;
00981 size_t size, regex_gotten_store;
00982 int do_study = 0;
00983 int do_debug = debug;
00984 int do_G = 0;
00985 int do_g = 0;
00986 int do_showinfo = showinfo;
00987 int do_showrest = 0;
00988 int do_flip = 0;
00989 int erroroffset, len, delimiter, poffset;
00990
00991 use_utf8 = 0;
00992 debug_lengths = 1;
00993
00994 if (infile == stdin) printf(" re> ");
00995 if (extend_inputline(infile, buffer) == NULL) break;
00996 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
00997 fflush(outfile);
00998
00999 p = buffer;
01000 while (isspace(*p)) p++;
01001 if (*p == 0) continue;
01002
01003
01004
01005 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
01006 {
01007 unsigned long int magic, get_options;
01008 uschar sbuf[8];
01009 FILE *f;
01010
01011 p++;
01012 pp = p + (int)strlen((char *)p);
01013 while (isspace(pp[-1])) pp--;
01014 *pp = 0;
01015
01016 f = fopen((char *)p, "rb");
01017 if (f == NULL)
01018 {
01019 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
01020 continue;
01021 }
01022
01023 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
01024
01025 true_size =
01026 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
01027 true_study_size =
01028 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
01029
01030 re = (real_pcre *)new_malloc(true_size);
01031 regex_gotten_store = gotten_store;
01032
01033 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
01034
01035 magic = ((real_pcre *)re)->magic_number;
01036 if (magic != MAGIC_NUMBER)
01037 {
01038 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
01039 {
01040 do_flip = 1;
01041 }
01042 else
01043 {
01044 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
01045 fclose(f);
01046 continue;
01047 }
01048 }
01049
01050 fprintf(outfile, "Compiled regex%s loaded from %s\n",
01051 do_flip? " (byte-inverted)" : "", p);
01052
01053
01054
01055 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
01056 use_utf8 = (get_options & PCRE_UTF8) != 0;
01057
01058
01059
01060 if (true_study_size != 0)
01061 {
01062 pcre_study_data *psd;
01063
01064 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
01065 extra->flags = PCRE_EXTRA_STUDY_DATA;
01066
01067 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
01068 extra->study_data = psd;
01069
01070 if (fread(psd, 1, true_study_size, f) != true_study_size)
01071 {
01072 FAIL_READ:
01073 fprintf(outfile, "Failed to read data from %s\n", p);
01074 if (extra != NULL) new_free(extra);
01075 if (re != NULL) new_free(re);
01076 fclose(f);
01077 continue;
01078 }
01079 fprintf(outfile, "Study data loaded from %s\n", p);
01080 do_study = 1;
01081 }
01082 else fprintf(outfile, "No study data\n");
01083
01084 fclose(f);
01085 goto SHOW_INFO;
01086 }
01087
01088
01089
01090
01091 delimiter = *p++;
01092
01093 if (isalnum(delimiter) || delimiter == '\\')
01094 {
01095 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
01096 goto SKIP_DATA;
01097 }
01098
01099 pp = p;
01100 poffset = p - buffer;
01101
01102 for(;;)
01103 {
01104 while (*pp != 0)
01105 {
01106 if (*pp == '\\' && pp[1] != 0) pp++;
01107 else if (*pp == delimiter) break;
01108 pp++;
01109 }
01110 if (*pp != 0) break;
01111 if (infile == stdin) printf(" > ");
01112 if ((pp = extend_inputline(infile, pp)) == NULL)
01113 {
01114 fprintf(outfile, "** Unexpected EOF\n");
01115 done = 1;
01116 goto CONTINUE;
01117 }
01118 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
01119 }
01120
01121
01122
01123
01124 p = buffer + poffset;
01125
01126
01127
01128
01129
01130 if (pp[1] == '\\') *pp++ = '\\';
01131
01132
01133
01134
01135 *pp++ = 0;
01136 strcpy((char *)pbuffer, (char *)p);
01137
01138
01139
01140 options = 0;
01141 study_options = 0;
01142 log_store = showstore;
01143
01144 while (*pp != 0)
01145 {
01146 switch (*pp++)
01147 {
01148 case 'f': options |= PCRE_FIRSTLINE; break;
01149 case 'g': do_g = 1; break;
01150 case 'i': options |= PCRE_CASELESS; break;
01151 case 'm': options |= PCRE_MULTILINE; break;
01152 case 's': options |= PCRE_DOTALL; break;
01153 case 'x': options |= PCRE_EXTENDED; break;
01154
01155 case '+': do_showrest = 1; break;
01156 case 'A': options |= PCRE_ANCHORED; break;
01157 case 'B': do_debug = 1; break;
01158 case 'C': options |= PCRE_AUTO_CALLOUT; break;
01159 case 'D': do_debug = do_showinfo = 1; break;
01160 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
01161 case 'F': do_flip = 1; break;
01162 case 'G': do_G = 1; break;
01163 case 'I': do_showinfo = 1; break;
01164 case 'J': options |= PCRE_DUPNAMES; break;
01165 case 'M': log_store = 1; break;
01166 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
01167
01168 #if !defined NOPOSIX
01169 case 'P': do_posix = 1; break;
01170 #endif
01171
01172 case 'S': do_study = 1; break;
01173 case 'U': options |= PCRE_UNGREEDY; break;
01174 case 'X': options |= PCRE_EXTRA; break;
01175 case 'Z': debug_lengths = 0; break;
01176 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
01177 case '?': options |= PCRE_NO_UTF8_CHECK; break;
01178
01179 case 'L':
01180 ppp = pp;
01181
01182
01183 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
01184 *ppp = 0;
01185 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
01186 {
01187 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
01188 goto SKIP_DATA;
01189 }
01190 locale_set = 1;
01191 tables = pcre_maketables();
01192 pp = ppp;
01193 break;
01194
01195 case '>':
01196 to_file = pp;
01197 while (*pp != 0) pp++;
01198 while (isspace(pp[-1])) pp--;
01199 *pp = 0;
01200 break;
01201
01202 case '<':
01203 {
01204 int x = check_newline(pp, outfile);
01205 if (x == 0) goto SKIP_DATA;
01206 options |= x;
01207 while (*pp++ != '>');
01208 }
01209 break;
01210
01211 case '\r':
01212 case '\n':
01213 case ' ':
01214 break;
01215
01216 default:
01217 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
01218 goto SKIP_DATA;
01219 }
01220 }
01221
01222
01223
01224
01225
01226 #if !defined NOPOSIX
01227 if (posix || do_posix)
01228 {
01229 int rc;
01230 int cflags = 0;
01231
01232 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
01233 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
01234 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
01235 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
01236 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
01237
01238 rc = regcomp(&preg, (char *)p, cflags);
01239
01240
01241
01242
01243 if (rc != 0)
01244 {
01245 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
01246 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
01247 goto SKIP_DATA;
01248 }
01249 }
01250
01251
01252
01253 else
01254 #endif
01255
01256 {
01257 if (timeit > 0)
01258 {
01259 register int i;
01260 clock_t time_taken;
01261 clock_t start_time = clock();
01262 for (i = 0; i < timeit; i++)
01263 {
01264 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
01265 if (re != NULL) free(re);
01266 }
01267 time_taken = clock() - start_time;
01268 fprintf(outfile, "Compile time %.4f milliseconds\n",
01269 (((double)time_taken * 1000.0) / (double)timeit) /
01270 (double)CLOCKS_PER_SEC);
01271 }
01272
01273 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
01274
01275
01276
01277
01278 if (re == NULL)
01279 {
01280 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
01281 SKIP_DATA:
01282 if (infile != stdin)
01283 {
01284 for (;;)
01285 {
01286 if (extend_inputline(infile, buffer) == NULL)
01287 {
01288 done = 1;
01289 goto CONTINUE;
01290 }
01291 len = (int)strlen((char *)buffer);
01292 while (len > 0 && isspace(buffer[len-1])) len--;
01293 if (len == 0) break;
01294 }
01295 fprintf(outfile, "\n");
01296 }
01297 goto CONTINUE;
01298 }
01299
01300
01301
01302
01303
01304 if (log_store)
01305 fprintf(outfile, "Memory allocation (code space): %d\n",
01306 (int)(gotten_store -
01307 sizeof(real_pcre) -
01308 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
01309
01310
01311
01312
01313 true_size = ((real_pcre *)re)->size;
01314 regex_gotten_store = gotten_store;
01315
01316
01317
01318
01319 if (do_study)
01320 {
01321 if (timeit > 0)
01322 {
01323 register int i;
01324 clock_t time_taken;
01325 clock_t start_time = clock();
01326 for (i = 0; i < timeit; i++)
01327 extra = pcre_study(re, study_options, &error);
01328 time_taken = clock() - start_time;
01329 if (extra != NULL) free(extra);
01330 fprintf(outfile, " Study time %.4f milliseconds\n",
01331 (((double)time_taken * 1000.0) / (double)timeit) /
01332 (double)CLOCKS_PER_SEC);
01333 }
01334 extra = pcre_study(re, study_options, &error);
01335 if (error != NULL)
01336 fprintf(outfile, "Failed to study: %s\n", error);
01337 else if (extra != NULL)
01338 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
01339 }
01340
01341
01342
01343
01344
01345
01346 if (do_flip)
01347 {
01348 real_pcre *rre = (real_pcre *)re;
01349 rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
01350 rre->size = byteflip(rre->size, sizeof(rre->size));
01351 rre->options = byteflip(rre->options, sizeof(rre->options));
01352 rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
01353 rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
01354 rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
01355 rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
01356 rre->name_table_offset = byteflip(rre->name_table_offset,
01357 sizeof(rre->name_table_offset));
01358 rre->name_entry_size = byteflip(rre->name_entry_size,
01359 sizeof(rre->name_entry_size));
01360 rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
01361
01362 if (extra != NULL)
01363 {
01364 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
01365 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
01366 rsd->options = byteflip(rsd->options, sizeof(rsd->options));
01367 }
01368 }
01369
01370
01371
01372 SHOW_INFO:
01373
01374 if (do_debug)
01375 {
01376 fprintf(outfile, "------------------------------------------------------------------\n");
01377 pcre_printint(re, outfile, debug_lengths);
01378 }
01379
01380 if (do_showinfo)
01381 {
01382 unsigned long int get_options, all_options;
01383 #if !defined NOINFOCHECK
01384 int old_first_char, old_options, old_count;
01385 #endif
01386 int count, backrefmax, first_char, need_char, okpartial, jchanged,
01387 hascrorlf;
01388 int nameentrysize, namecount;
01389 const uschar *nametable;
01390
01391 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
01392 new_info(re, NULL, PCRE_INFO_SIZE, &size);
01393 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
01394 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
01395 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
01396 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
01397 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
01398 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
01399 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
01400 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
01401 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
01402 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
01403
01404 #if !defined NOINFOCHECK
01405 old_count = pcre_info(re, &old_options, &old_first_char);
01406 if (count < 0) fprintf(outfile,
01407 "Error %d from pcre_info()\n", count);
01408 else
01409 {
01410 if (old_count != count) fprintf(outfile,
01411 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
01412 old_count);
01413
01414 if (old_first_char != first_char) fprintf(outfile,
01415 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
01416 first_char, old_first_char);
01417
01418 if (old_options != (int)get_options) fprintf(outfile,
01419 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
01420 get_options, old_options);
01421 }
01422 #endif
01423
01424 if (size != regex_gotten_store) fprintf(outfile,
01425 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
01426 (int)size, (int)regex_gotten_store);
01427
01428 fprintf(outfile, "Capturing subpattern count = %d\n", count);
01429 if (backrefmax > 0)
01430 fprintf(outfile, "Max back reference = %d\n", backrefmax);
01431
01432 if (namecount > 0)
01433 {
01434 fprintf(outfile, "Named capturing subpatterns:\n");
01435 while (namecount-- > 0)
01436 {
01437 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
01438 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
01439 GET2(nametable, 0));
01440 nametable += nameentrysize;
01441 }
01442 }
01443
01444 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
01445 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
01446
01447 all_options = ((real_pcre *)re)->options;
01448 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
01449
01450 if (get_options == 0) fprintf(outfile, "No options\n");
01451 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
01452 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
01453 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
01454 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
01455 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
01456 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
01457 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
01458 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
01459 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
01460 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
01461 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
01462 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
01463 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
01464 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
01465
01466 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
01467
01468 switch (get_options & PCRE_NEWLINE_BITS)
01469 {
01470 case PCRE_NEWLINE_CR:
01471 fprintf(outfile, "Forced newline sequence: CR\n");
01472 break;
01473
01474 case PCRE_NEWLINE_LF:
01475 fprintf(outfile, "Forced newline sequence: LF\n");
01476 break;
01477
01478 case PCRE_NEWLINE_CRLF:
01479 fprintf(outfile, "Forced newline sequence: CRLF\n");
01480 break;
01481
01482 case PCRE_NEWLINE_ANYCRLF:
01483 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
01484 break;
01485
01486 case PCRE_NEWLINE_ANY:
01487 fprintf(outfile, "Forced newline sequence: ANY\n");
01488 break;
01489
01490 default:
01491 break;
01492 }
01493
01494 if (first_char == -1)
01495 {
01496 fprintf(outfile, "First char at start or follows newline\n");
01497 }
01498 else if (first_char < 0)
01499 {
01500 fprintf(outfile, "No first char\n");
01501 }
01502 else
01503 {
01504 int ch = first_char & 255;
01505 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
01506 "" : " (caseless)";
01507 if (PRINTHEX(ch))
01508 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
01509 else
01510 fprintf(outfile, "First char = %d%s\n", ch, caseless);
01511 }
01512
01513 if (need_char < 0)
01514 {
01515 fprintf(outfile, "No need char\n");
01516 }
01517 else
01518 {
01519 int ch = need_char & 255;
01520 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
01521 "" : " (caseless)";
01522 if (PRINTHEX(ch))
01523 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
01524 else
01525 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
01526 }
01527
01528
01529
01530
01531
01532
01533 if (do_study)
01534 {
01535 if (extra == NULL)
01536 fprintf(outfile, "Study returned NULL\n");
01537 else
01538 {
01539 uschar *start_bits = NULL;
01540 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
01541
01542 if (start_bits == NULL)
01543 fprintf(outfile, "No starting byte set\n");
01544 else
01545 {
01546 int i;
01547 int c = 24;
01548 fprintf(outfile, "Starting byte set: ");
01549 for (i = 0; i < 256; i++)
01550 {
01551 if ((start_bits[i/8] & (1<<(i&7))) != 0)
01552 {
01553 if (c > 75)
01554 {
01555 fprintf(outfile, "\n ");
01556 c = 2;
01557 }
01558 if (PRINTHEX(i) && i != ' ')
01559 {
01560 fprintf(outfile, "%c ", i);
01561 c += 2;
01562 }
01563 else
01564 {
01565 fprintf(outfile, "\\x%02x ", i);
01566 c += 5;
01567 }
01568 }
01569 }
01570 fprintf(outfile, "\n");
01571 }
01572 }
01573 }
01574 }
01575
01576
01577
01578
01579
01580 if (to_file != NULL)
01581 {
01582 FILE *f = fopen((char *)to_file, "wb");
01583 if (f == NULL)
01584 {
01585 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
01586 }
01587 else
01588 {
01589 uschar sbuf[8];
01590 sbuf[0] = (true_size >> 24) & 255;
01591 sbuf[1] = (true_size >> 16) & 255;
01592 sbuf[2] = (true_size >> 8) & 255;
01593 sbuf[3] = (true_size) & 255;
01594
01595 sbuf[4] = (true_study_size >> 24) & 255;
01596 sbuf[5] = (true_study_size >> 16) & 255;
01597 sbuf[6] = (true_study_size >> 8) & 255;
01598 sbuf[7] = (true_study_size) & 255;
01599
01600 if (fwrite(sbuf, 1, 8, f) < 8 ||
01601 fwrite(re, 1, true_size, f) < true_size)
01602 {
01603 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
01604 }
01605 else
01606 {
01607 fprintf(outfile, "Compiled regex written to %s\n", to_file);
01608 if (extra != NULL)
01609 {
01610 if (fwrite(extra->study_data, 1, true_study_size, f) <
01611 true_study_size)
01612 {
01613 fprintf(outfile, "Write error on %s: %s\n", to_file,
01614 strerror(errno));
01615 }
01616 else fprintf(outfile, "Study data written to %s\n", to_file);
01617
01618 }
01619 }
01620 fclose(f);
01621 }
01622
01623 new_free(re);
01624 if (extra != NULL) new_free(extra);
01625 if (tables != NULL) new_free((void *)tables);
01626 continue;
01627 }
01628 }
01629
01630
01631
01632 for (;;)
01633 {
01634 uschar *q;
01635 uschar *bptr;
01636 int *use_offsets = offsets;
01637 int use_size_offsets = size_offsets;
01638 int callout_data = 0;
01639 int callout_data_set = 0;
01640 int count, c;
01641 int copystrings = 0;
01642 int find_match_limit = 0;
01643 int getstrings = 0;
01644 int getlist = 0;
01645 int gmatched = 0;
01646 int start_offset = 0;
01647 int g_notempty = 0;
01648 int use_dfa = 0;
01649
01650 options = 0;
01651
01652 *copynames = 0;
01653 *getnames = 0;
01654
01655 copynamesptr = copynames;
01656 getnamesptr = getnames;
01657
01658 pcre_callout = callout;
01659 first_callout = 1;
01660 callout_extra = 0;
01661 callout_count = 0;
01662 callout_fail_count = 999999;
01663 callout_fail_id = -1;
01664 show_malloc = 0;
01665
01666 if (extra != NULL) extra->flags &=
01667 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
01668
01669 len = 0;
01670 for (;;)
01671 {
01672 if (infile == stdin) printf("data> ");
01673 if (extend_inputline(infile, buffer + len) == NULL)
01674 {
01675 if (len > 0) break;
01676 done = 1;
01677 goto CONTINUE;
01678 }
01679 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
01680 len = (int)strlen((char *)buffer);
01681 if (buffer[len-1] == '\n') break;
01682 }
01683
01684 while (len > 0 && isspace(buffer[len-1])) len--;
01685 buffer[len] = 0;
01686 if (len == 0) break;
01687
01688 p = buffer;
01689 while (isspace(*p)) p++;
01690
01691 bptr = q = dbuffer;
01692 while ((c = *p++) != 0)
01693 {
01694 int i = 0;
01695 int n = 0;
01696
01697 if (c == '\\') switch ((c = *p++))
01698 {
01699 case 'a': c = 7; break;
01700 case 'b': c = '\b'; break;
01701 case 'e': c = 27; break;
01702 case 'f': c = '\f'; break;
01703 case 'n': c = '\n'; break;
01704 case 'r': c = '\r'; break;
01705 case 't': c = '\t'; break;
01706 case 'v': c = '\v'; break;
01707
01708 case '0': case '1': case '2': case '3':
01709 case '4': case '5': case '6': case '7':
01710 c -= '0';
01711 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
01712 c = c * 8 + *p++ - '0';
01713
01714 #if !defined NOUTF8
01715 if (use_utf8 && c > 255)
01716 {
01717 unsigned char buff8[8];
01718 int ii, utn;
01719 utn = ord2utf8(c, buff8);
01720 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
01721 c = buff8[ii];
01722 }
01723 #endif
01724 break;
01725
01726 case 'x':
01727
01728
01729
01730 #if !defined NOUTF8
01731 if (*p == '{')
01732 {
01733 unsigned char *pt = p;
01734 c = 0;
01735 while (isxdigit(*(++pt)))
01736 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
01737 if (*pt == '}')
01738 {
01739 unsigned char buff8[8];
01740 int ii, utn;
01741 utn = ord2utf8(c, buff8);
01742 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
01743 c = buff8[ii];
01744 p = pt + 1;
01745 break;
01746 }
01747
01748 }
01749 #endif
01750
01751
01752
01753 c = 0;
01754 while (i++ < 2 && isxdigit(*p))
01755 {
01756 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
01757 p++;
01758 }
01759 break;
01760
01761 case 0:
01762 p--;
01763 continue;
01764
01765 case '>':
01766 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
01767 continue;
01768
01769 case 'A':
01770 options |= PCRE_ANCHORED;
01771 continue;
01772
01773 case 'B':
01774 options |= PCRE_NOTBOL;
01775 continue;
01776
01777 case 'C':
01778 if (isdigit(*p))
01779 {
01780 while(isdigit(*p)) n = n * 10 + *p++ - '0';
01781 copystrings |= 1 << n;
01782 }
01783 else if (isalnum(*p))
01784 {
01785 uschar *npp = copynamesptr;
01786 while (isalnum(*p)) *npp++ = *p++;
01787 *npp++ = 0;
01788 *npp = 0;
01789 n = pcre_get_stringnumber(re, (char *)copynamesptr);
01790 if (n < 0)
01791 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
01792 copynamesptr = npp;
01793 }
01794 else if (*p == '+')
01795 {
01796 callout_extra = 1;
01797 p++;
01798 }
01799 else if (*p == '-')
01800 {
01801 pcre_callout = NULL;
01802 p++;
01803 }
01804 else if (*p == '!')
01805 {
01806 callout_fail_id = 0;
01807 p++;
01808 while(isdigit(*p))
01809 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
01810 callout_fail_count = 0;
01811 if (*p == '!')
01812 {
01813 p++;
01814 while(isdigit(*p))
01815 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
01816 }
01817 }
01818 else if (*p == '*')
01819 {
01820 int sign = 1;
01821 callout_data = 0;
01822 if (*(++p) == '-') { sign = -1; p++; }
01823 while(isdigit(*p))
01824 callout_data = callout_data * 10 + *p++ - '0';
01825 callout_data *= sign;
01826 callout_data_set = 1;
01827 }
01828 continue;
01829
01830 #if !defined NODFA
01831 case 'D':
01832 #if !defined NOPOSIX
01833 if (posix || do_posix)
01834 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
01835 else
01836 #endif
01837 use_dfa = 1;
01838 continue;
01839
01840 case 'F':
01841 options |= PCRE_DFA_SHORTEST;
01842 continue;
01843 #endif
01844
01845 case 'G':
01846 if (isdigit(*p))
01847 {
01848 while(isdigit(*p)) n = n * 10 + *p++ - '0';
01849 getstrings |= 1 << n;
01850 }
01851 else if (isalnum(*p))
01852 {
01853 uschar *npp = getnamesptr;
01854 while (isalnum(*p)) *npp++ = *p++;
01855 *npp++ = 0;
01856 *npp = 0;
01857 n = pcre_get_stringnumber(re, (char *)getnamesptr);
01858 if (n < 0)
01859 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
01860 getnamesptr = npp;
01861 }
01862 continue;
01863
01864 case 'L':
01865 getlist = 1;
01866 continue;
01867
01868 case 'M':
01869 find_match_limit = 1;
01870 continue;
01871
01872 case 'N':
01873 options |= PCRE_NOTEMPTY;
01874 continue;
01875
01876 case 'O':
01877 while(isdigit(*p)) n = n * 10 + *p++ - '0';
01878 if (n > size_offsets_max)
01879 {
01880 size_offsets_max = n;
01881 free(offsets);
01882 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
01883 if (offsets == NULL)
01884 {
01885 printf("** Failed to get %d bytes of memory for offsets vector\n",
01886 (int)(size_offsets_max * sizeof(int)));
01887 yield = 1;
01888 goto EXIT;
01889 }
01890 }
01891 use_size_offsets = n;
01892 if (n == 0) use_offsets = NULL;
01893 continue;
01894
01895 case 'P':
01896 options |= PCRE_PARTIAL;
01897 continue;
01898
01899 case 'Q':
01900 while(isdigit(*p)) n = n * 10 + *p++ - '0';
01901 if (extra == NULL)
01902 {
01903 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
01904 extra->flags = 0;
01905 }
01906 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
01907 extra->match_limit_recursion = n;
01908 continue;
01909
01910 case 'q':
01911 while(isdigit(*p)) n = n * 10 + *p++ - '0';
01912 if (extra == NULL)
01913 {
01914 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
01915 extra->flags = 0;
01916 }
01917 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
01918 extra->match_limit = n;
01919 continue;
01920
01921 #if !defined NODFA
01922 case 'R':
01923 options |= PCRE_DFA_RESTART;
01924 continue;
01925 #endif
01926
01927 case 'S':
01928 show_malloc = 1;
01929 continue;
01930
01931 case 'Z':
01932 options |= PCRE_NOTEOL;
01933 continue;
01934
01935 case '?':
01936 options |= PCRE_NO_UTF8_CHECK;
01937 continue;
01938
01939 case '<':
01940 {
01941 int x = check_newline(p, outfile);
01942 if (x == 0) goto NEXT_DATA;
01943 options |= x;
01944 while (*p++ != '>');
01945 }
01946 continue;
01947 }
01948 *q++ = c;
01949 }
01950 *q = 0;
01951 len = q - dbuffer;
01952
01953 if ((all_use_dfa || use_dfa) && find_match_limit)
01954 {
01955 printf("**Match limit not relevant for DFA matching: ignored\n");
01956 find_match_limit = 0;
01957 }
01958
01959
01960
01961
01962 #if !defined NOPOSIX
01963 if (posix || do_posix)
01964 {
01965 int rc;
01966 int eflags = 0;
01967 regmatch_t *pmatch = NULL;
01968 if (use_size_offsets > 0)
01969 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
01970 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
01971 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
01972
01973 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
01974
01975 if (rc != 0)
01976 {
01977 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
01978 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
01979 }
01980 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
01981 != 0)
01982 {
01983 fprintf(outfile, "Matched with REG_NOSUB\n");
01984 }
01985 else
01986 {
01987 size_t i;
01988 for (i = 0; i < (size_t)use_size_offsets; i++)
01989 {
01990 if (pmatch[i].rm_so >= 0)
01991 {
01992 fprintf(outfile, "%2d: ", (int)i);
01993 (void)pchars(dbuffer + pmatch[i].rm_so,
01994 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
01995 fprintf(outfile, "\n");
01996 if (i == 0 && do_showrest)
01997 {
01998 fprintf(outfile, " 0+ ");
01999 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
02000 outfile);
02001 fprintf(outfile, "\n");
02002 }
02003 }
02004 }
02005 }
02006 free(pmatch);
02007 }
02008
02009
02010
02011 else
02012 #endif
02013
02014 for (;; gmatched++)
02015 {
02016 if (timeitm > 0)
02017 {
02018 register int i;
02019 clock_t time_taken;
02020 clock_t start_time = clock();
02021
02022 #if !defined NODFA
02023 if (all_use_dfa || use_dfa)
02024 {
02025 int workspace[1000];
02026 for (i = 0; i < timeitm; i++)
02027 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
02028 options | g_notempty, use_offsets, use_size_offsets, workspace,
02029 sizeof(workspace)/sizeof(int));
02030 }
02031 else
02032 #endif
02033
02034 for (i = 0; i < timeitm; i++)
02035 count = pcre_exec(re, extra, (char *)bptr, len,
02036 start_offset, options | g_notempty, use_offsets, use_size_offsets);
02037
02038 time_taken = clock() - start_time;
02039 fprintf(outfile, "Execute time %.4f milliseconds\n",
02040 (((double)time_taken * 1000.0) / (double)timeitm) /
02041 (double)CLOCKS_PER_SEC);
02042 }
02043
02044
02045
02046
02047
02048 if (find_match_limit)
02049 {
02050 if (extra == NULL)
02051 {
02052 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
02053 extra->flags = 0;
02054 }
02055
02056 (void)check_match_limit(re, extra, bptr, len, start_offset,
02057 options|g_notempty, use_offsets, use_size_offsets,
02058 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
02059 PCRE_ERROR_MATCHLIMIT, "match()");
02060
02061 count = check_match_limit(re, extra, bptr, len, start_offset,
02062 options|g_notempty, use_offsets, use_size_offsets,
02063 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
02064 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
02065 }
02066
02067
02068
02069 else if (callout_data_set)
02070 {
02071 if (extra == NULL)
02072 {
02073 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
02074 extra->flags = 0;
02075 }
02076 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
02077 extra->callout_data = &callout_data;
02078 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
02079 options | g_notempty, use_offsets, use_size_offsets);
02080 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
02081 }
02082
02083
02084
02085
02086 #if !defined NODFA
02087 else if (all_use_dfa || use_dfa)
02088 {
02089 int workspace[1000];
02090 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
02091 options | g_notempty, use_offsets, use_size_offsets, workspace,
02092 sizeof(workspace)/sizeof(int));
02093 if (count == 0)
02094 {
02095 fprintf(outfile, "Matched, but too many subsidiary matches\n");
02096 count = use_size_offsets/2;
02097 }
02098 }
02099 #endif
02100
02101 else
02102 {
02103 count = pcre_exec(re, extra, (char *)bptr, len,
02104 start_offset, options | g_notempty, use_offsets, use_size_offsets);
02105 if (count == 0)
02106 {
02107 fprintf(outfile, "Matched, but too many substrings\n");
02108 count = use_size_offsets/3;
02109 }
02110 }
02111
02112
02113
02114 if (count >= 0)
02115 {
02116 int i, maxcount;
02117
02118 #if !defined NODFA
02119 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
02120 #endif
02121 maxcount = use_size_offsets/3;
02122
02123
02124
02125 if (count > maxcount)
02126 {
02127 fprintf(outfile,
02128 "** PCRE error: returned count %d is too big for offset size %d\n",
02129 count, use_size_offsets);
02130 count = use_size_offsets/3;
02131 if (do_g || do_G)
02132 {
02133 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
02134 do_g = do_G = FALSE;
02135 }
02136 }
02137
02138 for (i = 0; i < count * 2; i += 2)
02139 {
02140 if (use_offsets[i] < 0)
02141 fprintf(outfile, "%2d: <unset>\n", i/2);
02142 else
02143 {
02144 fprintf(outfile, "%2d: ", i/2);
02145 (void)pchars(bptr + use_offsets[i],
02146 use_offsets[i+1] - use_offsets[i], outfile);
02147 fprintf(outfile, "\n");
02148 if (i == 0)
02149 {
02150 if (do_showrest)
02151 {
02152 fprintf(outfile, " 0+ ");
02153 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
02154 outfile);
02155 fprintf(outfile, "\n");
02156 }
02157 }
02158 }
02159 }
02160
02161 for (i = 0; i < 32; i++)
02162 {
02163 if ((copystrings & (1 << i)) != 0)
02164 {
02165 char copybuffer[256];
02166 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
02167 i, copybuffer, sizeof(copybuffer));
02168 if (rc < 0)
02169 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
02170 else
02171 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
02172 }
02173 }
02174
02175 for (copynamesptr = copynames;
02176 *copynamesptr != 0;
02177 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
02178 {
02179 char copybuffer[256];
02180 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
02181 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
02182 if (rc < 0)
02183 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
02184 else
02185 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
02186 }
02187
02188 for (i = 0; i < 32; i++)
02189 {
02190 if ((getstrings & (1 << i)) != 0)
02191 {
02192 const char *substring;
02193 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
02194 i, &substring);
02195 if (rc < 0)
02196 fprintf(outfile, "get substring %d failed %d\n", i, rc);
02197 else
02198 {
02199 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
02200 pcre_free_substring(substring);
02201 }
02202 }
02203 }
02204
02205 for (getnamesptr = getnames;
02206 *getnamesptr != 0;
02207 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
02208 {
02209 const char *substring;
02210 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
02211 count, (char *)getnamesptr, &substring);
02212 if (rc < 0)
02213 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
02214 else
02215 {
02216 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
02217 pcre_free_substring(substring);
02218 }
02219 }
02220
02221 if (getlist)
02222 {
02223 const char **stringlist;
02224 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
02225 &stringlist);
02226 if (rc < 0)
02227 fprintf(outfile, "get substring list failed %d\n", rc);
02228 else
02229 {
02230 for (i = 0; i < count; i++)
02231 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
02232 if (stringlist[i] != NULL)
02233 fprintf(outfile, "string list not terminated by NULL\n");
02234
02235 pcre_free_substring_list(stringlist);
02236 }
02237 }
02238 }
02239
02240
02241
02242 else if (count == PCRE_ERROR_PARTIAL)
02243 {
02244 fprintf(outfile, "Partial match");
02245 #if !defined NODFA
02246 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
02247 fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
02248 bptr + use_offsets[0]);
02249 #endif
02250 fprintf(outfile, "\n");
02251 break;
02252 }
02253
02254
02255
02256
02257
02258
02259
02260
02261
02262
02263
02264
02265
02266
02267
02268 else
02269 {
02270 if (g_notempty != 0)
02271 {
02272 int onechar = 1;
02273 unsigned int obits = ((real_pcre *)re)->options;
02274 use_offsets[0] = start_offset;
02275 if ((obits & PCRE_NEWLINE_BITS) == 0)
02276 {
02277 int d;
02278 (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
02279 obits = (d == '\r')? PCRE_NEWLINE_CR :
02280 (d == '\n')? PCRE_NEWLINE_LF :
02281 (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
02282 (d == -2)? PCRE_NEWLINE_ANYCRLF :
02283 (d == -1)? PCRE_NEWLINE_ANY : 0;
02284 }
02285 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
02286 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
02287 &&
02288 start_offset < len - 1 &&
02289 bptr[start_offset] == '\r' &&
02290 bptr[start_offset+1] == '\n')
02291 onechar++;
02292 else if (use_utf8)
02293 {
02294 while (start_offset + onechar < len)
02295 {
02296 int tb = bptr[start_offset+onechar];
02297 if (tb <= 127) break;
02298 tb &= 0xc0;
02299 if (tb != 0 && tb != 0xc0) onechar++;
02300 }
02301 }
02302 use_offsets[1] = start_offset + onechar;
02303 }
02304 else
02305 {
02306 if (count == PCRE_ERROR_NOMATCH)
02307 {
02308 if (gmatched == 0) fprintf(outfile, "No match\n");
02309 }
02310 else fprintf(outfile, "Error %d\n", count);
02311 break;
02312 }
02313 }
02314
02315
02316
02317 if (!do_g && !do_G) break;
02318
02319
02320
02321
02322
02323
02324
02325
02326 g_notempty = 0;
02327
02328 if (use_offsets[0] == use_offsets[1])
02329 {
02330 if (use_offsets[0] == len) break;
02331 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
02332 }
02333
02334
02335
02336 if (do_g) start_offset = use_offsets[1];
02337
02338
02339
02340 else
02341 {
02342 bptr += use_offsets[1];
02343 len -= use_offsets[1];
02344 }
02345 }
02346
02347 NEXT_DATA: continue;
02348 }
02349
02350 CONTINUE:
02351
02352 #if !defined NOPOSIX
02353 if (posix || do_posix) regfree(&preg);
02354 #endif
02355
02356 if (re != NULL) new_free(re);
02357 if (extra != NULL) new_free(extra);
02358 if (tables != NULL)
02359 {
02360 new_free((void *)tables);
02361 setlocale(LC_CTYPE, "C");
02362 locale_set = 0;
02363 }
02364 }
02365
02366 if (infile == stdin) fprintf(outfile, "\n");
02367
02368 EXIT:
02369
02370 if (infile != NULL && infile != stdin) fclose(infile);
02371 if (outfile != NULL && outfile != stdout) fclose(outfile);
02372
02373 free(buffer);
02374 free(dbuffer);
02375 free(pbuffer);
02376 free(offsets);
02377
02378 return yield;
02379 }
02380
02381