pcre.h File Reference

#include <stdlib.h>

Go to the source code of this file.

Classes

struct  pcre_extra
struct  pcre_callout_block

Defines

#define PCRE_MAJOR   7
#define PCRE_MINOR   3
#define PCRE_PRERELEASE
#define PCRE_DATE   2007-08-28
#define PCRE_EXP_DECL   extern
#define EXPORT
#define PCRE_CASELESS   0x00000001
#define PCRE_MULTILINE   0x00000002
#define PCRE_DOTALL   0x00000004
#define PCRE_EXTENDED   0x00000008
#define PCRE_ANCHORED   0x00000010
#define PCRE_DOLLAR_ENDONLY   0x00000020
#define PCRE_EXTRA   0x00000040
#define PCRE_NOTBOL   0x00000080
#define PCRE_NOTEOL   0x00000100
#define PCRE_UNGREEDY   0x00000200
#define PCRE_NOTEMPTY   0x00000400
#define PCRE_UTF8   0x00000800
#define PCRE_NO_AUTO_CAPTURE   0x00001000
#define PCRE_NO_UTF8_CHECK   0x00002000
#define PCRE_AUTO_CALLOUT   0x00004000
#define PCRE_PARTIAL   0x00008000
#define PCRE_DFA_SHORTEST   0x00010000
#define PCRE_DFA_RESTART   0x00020000
#define PCRE_FIRSTLINE   0x00040000
#define PCRE_DUPNAMES   0x00080000
#define PCRE_NEWLINE_CR   0x00100000
#define PCRE_NEWLINE_LF   0x00200000
#define PCRE_NEWLINE_CRLF   0x00300000
#define PCRE_NEWLINE_ANY   0x00400000
#define PCRE_NEWLINE_ANYCRLF   0x00500000
#define PCRE_ERROR_NOMATCH   (-1)
#define PCRE_ERROR_NULL   (-2)
#define PCRE_ERROR_BADOPTION   (-3)
#define PCRE_ERROR_BADMAGIC   (-4)
#define PCRE_ERROR_UNKNOWN_OPCODE   (-5)
#define PCRE_ERROR_UNKNOWN_NODE   (-5)
#define PCRE_ERROR_NOMEMORY   (-6)
#define PCRE_ERROR_NOSUBSTRING   (-7)
#define PCRE_ERROR_MATCHLIMIT   (-8)
#define PCRE_ERROR_CALLOUT   (-9)
#define PCRE_ERROR_BADUTF8   (-10)
#define PCRE_ERROR_BADUTF8_OFFSET   (-11)
#define PCRE_ERROR_PARTIAL   (-12)
#define PCRE_ERROR_BADPARTIAL   (-13)
#define PCRE_ERROR_INTERNAL   (-14)
#define PCRE_ERROR_BADCOUNT   (-15)
#define PCRE_ERROR_DFA_UITEM   (-16)
#define PCRE_ERROR_DFA_UCOND   (-17)
#define PCRE_ERROR_DFA_UMLIMIT   (-18)
#define PCRE_ERROR_DFA_WSSIZE   (-19)
#define PCRE_ERROR_DFA_RECURSE   (-20)
#define PCRE_ERROR_RECURSIONLIMIT   (-21)
#define PCRE_ERROR_NOTUSED   (-22)
#define PCRE_ERROR_BADNEWLINE   (-23)
#define PCRE_INFO_OPTIONS   0
#define PCRE_INFO_SIZE   1
#define PCRE_INFO_CAPTURECOUNT   2
#define PCRE_INFO_BACKREFMAX   3
#define PCRE_INFO_FIRSTBYTE   4
#define PCRE_INFO_FIRSTCHAR   4
#define PCRE_INFO_FIRSTTABLE   5
#define PCRE_INFO_LASTLITERAL   6
#define PCRE_INFO_NAMEENTRYSIZE   7
#define PCRE_INFO_NAMECOUNT   8
#define PCRE_INFO_NAMETABLE   9
#define PCRE_INFO_STUDYSIZE   10
#define PCRE_INFO_DEFAULT_TABLES   11
#define PCRE_INFO_OKPARTIAL   12
#define PCRE_INFO_JCHANGED   13
#define PCRE_INFO_HASCRORLF   14
#define PCRE_CONFIG_UTF8   0
#define PCRE_CONFIG_NEWLINE   1
#define PCRE_CONFIG_LINK_SIZE   2
#define PCRE_CONFIG_POSIX_MALLOC_THRESHOLD   3
#define PCRE_CONFIG_MATCH_LIMIT   4
#define PCRE_CONFIG_STACKRECURSE   5
#define PCRE_CONFIG_UNICODE_PROPERTIES   6
#define PCRE_CONFIG_MATCH_LIMIT_RECURSION   7
#define PCRE_EXTRA_STUDY_DATA   0x0001
#define PCRE_EXTRA_MATCH_LIMIT   0x0002
#define PCRE_EXTRA_CALLOUT_DATA   0x0004
#define PCRE_EXTRA_TABLES   0x0008
#define PCRE_EXTRA_MATCH_LIMIT_RECURSION   0x0010
#define PCRE_SPTR   const char *

Typedefs

typedef real_pcre pcre

Functions

PCRE_EXP_DECL pcrepcre_compile (const char *, int, const char **, int *, const unsigned char *)
PCRE_EXP_DECL pcrepcre_compile2 (const char *, int, int *, const char **, int *, const unsigned char *)
PCRE_EXP_DECL int pcre_config (int, void *)
PCRE_EXP_DECL int pcre_copy_named_substring (const pcre *, const char *, int *, int, const char *, char *, int)
PCRE_EXP_DECL int pcre_copy_substring (const char *, int *, int, int, char *, int)
PCRE_EXP_DECL int pcre_dfa_exec (const pcre *, const pcre_extra *, const char *, int, int, int, int *, int, int *, int)
PCRE_EXP_DECL int pcre_exec (const pcre *, const pcre_extra *, PCRE_SPTR, int, int, int, int *, int)
PCRE_EXP_DECL void pcre_free_substring (const char *)
PCRE_EXP_DECL void pcre_free_substring_list (const char **)
PCRE_EXP_DECL int pcre_fullinfo (const pcre *, const pcre_extra *, int, void *)
PCRE_EXP_DECL int pcre_get_named_substring (const pcre *, const char *, int *, int, const char *, const char **)
PCRE_EXP_DECL int pcre_get_stringnumber (const pcre *, const char *)
PCRE_EXP_DECL int pcre_get_stringtable_entries (const pcre *, const char *, char **, char **)
PCRE_EXP_DECL int pcre_get_substring (const char *, int *, int, int, const char **)
PCRE_EXP_DECL int pcre_get_substring_list (const char *, int *, int, const char ***)
PCRE_EXP_DECL int pcre_info (const pcre *, int *, int *)
PCRE_EXP_DECL const unsigned
char * 
pcre_maketables (void)
PCRE_EXP_DECL int pcre_refcount (pcre *, int)
PCRE_EXP_DECL pcre_extrapcre_study (const pcre *, int, const char **)
PCRE_EXP_DECL const char * pcre_version (void)

Variables

PCRE_EXP_DECL void *(* pcre_malloc )(size_t)
PCRE_EXP_DECL void(* pcre_free )(void *)
PCRE_EXP_DECL void *(* pcre_stack_malloc )(size_t)
PCRE_EXP_DECL void(* pcre_stack_free )(void *)
PCRE_EXP_DECL int(* pcre_callout )(pcre_callout_block *)


Define Documentation

#define EXPORT
 

Definition at line 88 of file pcre.h.

#define PCRE_ANCHORED   0x00000010
 

Definition at line 107 of file pcre.h.

Referenced by pcre_compile2(), pcre_dfa_exec(), pcre_exec(), and pcre_study().

#define PCRE_AUTO_CALLOUT   0x00004000
 

Definition at line 117 of file pcre.h.

#define PCRE_CASELESS   0x00000001
 

Definition at line 103 of file pcre.h.

Referenced by handle_option(), match_ref(), pcre_study(), regcomp(), avmplus::RegExpObject::RegExpObject(), set_start_bits(), and Test_all_options().

#define PCRE_CONFIG_LINK_SIZE   2
 

Definition at line 180 of file pcre.h.

Referenced by main(), and pcre_config().

#define PCRE_CONFIG_MATCH_LIMIT   4
 

Definition at line 182 of file pcre.h.

Referenced by main(), and pcre_config().

#define PCRE_CONFIG_MATCH_LIMIT_RECURSION   7
 

Definition at line 185 of file pcre.h.

Referenced by main(), and pcre_config().

#define PCRE_CONFIG_NEWLINE   1
 

Definition at line 179 of file pcre.h.

Referenced by main(), pcrecpp::NewlineMode(), and pcre_config().

#define PCRE_CONFIG_POSIX_MALLOC_THRESHOLD   3
 

Definition at line 181 of file pcre.h.

Referenced by main(), and pcre_config().

#define PCRE_CONFIG_STACKRECURSE   5
 

Definition at line 183 of file pcre.h.

Referenced by main(), and pcre_config().

#define PCRE_CONFIG_UNICODE_PROPERTIES   6
 

Definition at line 184 of file pcre.h.

Referenced by main(), and pcre_config().

#define PCRE_CONFIG_UTF8   0
 

Definition at line 178 of file pcre.h.

Referenced by main(), and pcre_config().

#define PCRE_DATE   2007-08-28
 

Definition at line 47 of file pcre.h.

Referenced by pcre_version().

#define PCRE_DFA_RESTART   0x00020000
 

Definition at line 120 of file pcre.h.

Referenced by pcre_dfa_exec().

#define PCRE_DFA_SHORTEST   0x00010000
 

Definition at line 119 of file pcre.h.

#define PCRE_DOLLAR_ENDONLY   0x00000020
 

Definition at line 108 of file pcre.h.

Referenced by pcre_exec().

#define PCRE_DOTALL   0x00000004
 

Definition at line 105 of file pcre.h.

Referenced by is_anchored(), regcomp(), avmplus::RegExpObject::RegExpObject(), and Test_all_options().

#define PCRE_DUPNAMES   0x00080000
 

Definition at line 122 of file pcre.h.

Referenced by get_first_set().

#define PCRE_ERROR_BADCOUNT   (-15)
 

Definition at line 146 of file pcre.h.

Referenced by pcre_dfa_exec(), and pcre_exec().

#define PCRE_ERROR_BADMAGIC   (-4)
 

Definition at line 134 of file pcre.h.

Referenced by pcre_dfa_exec(), pcre_exec(), pcre_fullinfo(), and pcre_info().

#define PCRE_ERROR_BADNEWLINE   (-23)
 

Definition at line 154 of file pcre.h.

Referenced by pcre_dfa_exec(), and pcre_exec().

#define PCRE_ERROR_BADOPTION   (-3)
 

Definition at line 133 of file pcre.h.

Referenced by pcre_config(), pcre_dfa_exec(), pcre_exec(), and pcre_fullinfo().

#define PCRE_ERROR_BADPARTIAL   (-13)
 

Definition at line 144 of file pcre.h.

#define PCRE_ERROR_BADUTF8   (-10)
 

Definition at line 141 of file pcre.h.

#define PCRE_ERROR_BADUTF8_OFFSET   (-11)
 

Definition at line 142 of file pcre.h.

#define PCRE_ERROR_CALLOUT   (-9)
 

Definition at line 140 of file pcre.h.

#define PCRE_ERROR_DFA_RECURSE   (-20)
 

Definition at line 151 of file pcre.h.

#define PCRE_ERROR_DFA_UCOND   (-17)
 

Definition at line 148 of file pcre.h.

#define PCRE_ERROR_DFA_UITEM   (-16)
 

Definition at line 147 of file pcre.h.

#define PCRE_ERROR_DFA_UMLIMIT   (-18)
 

Definition at line 149 of file pcre.h.

Referenced by pcre_dfa_exec().

#define PCRE_ERROR_DFA_WSSIZE   (-19)
 

Definition at line 150 of file pcre.h.

Referenced by pcre_dfa_exec().

#define PCRE_ERROR_INTERNAL   (-14)
 

Definition at line 145 of file pcre.h.

#define PCRE_ERROR_MATCHLIMIT   (-8)
 

Definition at line 139 of file pcre.h.

Referenced by match().

#define PCRE_ERROR_NOMATCH   (-1)
 

Definition at line 131 of file pcre.h.

Referenced by check_match_limit(), and internal_dfa_exec().

#define PCRE_ERROR_NOMEMORY   (-6)
 

Definition at line 137 of file pcre.h.

Referenced by pcre_copy_substring(), pcre_get_substring(), and pcre_get_substring_list().

#define PCRE_ERROR_NOSUBSTRING   (-7)
 

Definition at line 138 of file pcre.h.

Referenced by pcre_copy_substring(), pcre_get_stringnumber(), pcre_get_stringtable_entries(), and pcre_get_substring().

#define PCRE_ERROR_NOTUSED   (-22)
 

Definition at line 153 of file pcre.h.

#define PCRE_ERROR_NULL   (-2)
 

Definition at line 132 of file pcre.h.

Referenced by pcre_dfa_exec(), pcre_exec(), pcre_fullinfo(), pcre_info(), and pcre_refcount().

#define PCRE_ERROR_PARTIAL   (-12)
 

Definition at line 143 of file pcre.h.

Referenced by check_match_limit().

#define PCRE_ERROR_RECURSIONLIMIT   (-21)
 

Definition at line 152 of file pcre.h.

Referenced by match().

#define PCRE_ERROR_UNKNOWN_NODE   (-5)
 

Definition at line 136 of file pcre.h.

#define PCRE_ERROR_UNKNOWN_OPCODE   (-5)
 

Definition at line 135 of file pcre.h.

#define PCRE_EXP_DECL   extern
 

Definition at line 75 of file pcre.h.

#define PCRE_EXTENDED   0x00000008
 

Definition at line 106 of file pcre.h.

Referenced by check_auto_possessive(), avmplus::RegExpObject::RegExpObject(), and Test_all_options().

#define PCRE_EXTRA   0x00000040
 

Definition at line 109 of file pcre.h.

#define PCRE_EXTRA_CALLOUT_DATA   0x0004
 

Definition at line 192 of file pcre.h.

Referenced by pcre_dfa_exec(), and pcre_exec().

#define PCRE_EXTRA_MATCH_LIMIT   0x0002
 

Definition at line 191 of file pcre.h.

Referenced by pcre_dfa_exec(), and pcre_exec().

#define PCRE_EXTRA_MATCH_LIMIT_RECURSION   0x0010
 

Definition at line 194 of file pcre.h.

Referenced by pcre_dfa_exec(), and pcre_exec().

#define PCRE_EXTRA_STUDY_DATA   0x0001
 

Definition at line 190 of file pcre.h.

Referenced by main(), pcre_dfa_exec(), pcre_exec(), pcre_fullinfo(), and pcre_study().

#define PCRE_EXTRA_TABLES   0x0008
 

Definition at line 193 of file pcre.h.

Referenced by pcre_dfa_exec(), and pcre_exec().

#define PCRE_FIRSTLINE   0x00040000
 

Definition at line 121 of file pcre.h.

Referenced by handle_option(), and pcre_exec().

#define PCRE_INFO_BACKREFMAX   3
 

Definition at line 161 of file pcre.h.

Referenced by pcre_fullinfo().

#define PCRE_INFO_CAPTURECOUNT   2
 

Definition at line 160 of file pcre.h.

Referenced by pcre_fullinfo().

#define PCRE_INFO_DEFAULT_TABLES   11
 

Definition at line 170 of file pcre.h.

Referenced by pcre_fullinfo(), and pcre_study().

#define PCRE_INFO_FIRSTBYTE   4
 

Definition at line 162 of file pcre.h.

Referenced by pcre_fullinfo().

#define PCRE_INFO_FIRSTCHAR   4
 

Definition at line 163 of file pcre.h.

#define PCRE_INFO_FIRSTTABLE   5
 

Definition at line 164 of file pcre.h.

Referenced by pcre_fullinfo().

#define PCRE_INFO_HASCRORLF   14
 

Definition at line 173 of file pcre.h.

Referenced by pcre_fullinfo().

#define PCRE_INFO_JCHANGED   13
 

Definition at line 172 of file pcre.h.

Referenced by pcre_fullinfo().

#define PCRE_INFO_LASTLITERAL   6
 

Definition at line 165 of file pcre.h.

Referenced by pcre_fullinfo().

#define PCRE_INFO_NAMECOUNT   8
 

Definition at line 167 of file pcre.h.

Referenced by pcre_fullinfo(), pcre_get_stringnumber(), and pcre_get_stringtable_entries().

#define PCRE_INFO_NAMEENTRYSIZE   7
 

Definition at line 166 of file pcre.h.

Referenced by pcre_fullinfo(), pcre_get_stringnumber(), and pcre_get_stringtable_entries().

#define PCRE_INFO_NAMETABLE   9
 

Definition at line 168 of file pcre.h.

Referenced by pcre_fullinfo(), pcre_get_stringnumber(), and pcre_get_stringtable_entries().

#define PCRE_INFO_OKPARTIAL   12
 

Definition at line 171 of file pcre.h.

Referenced by pcre_fullinfo().

#define PCRE_INFO_OPTIONS   0
 

Definition at line 158 of file pcre.h.

Referenced by main(), and pcre_fullinfo().

#define PCRE_INFO_SIZE   1
 

Definition at line 159 of file pcre.h.

Referenced by pcre_fullinfo().

#define PCRE_INFO_STUDYSIZE   10
 

Definition at line 169 of file pcre.h.

Referenced by pcre_fullinfo().

#define PCRE_MAJOR   7
 

Definition at line 44 of file pcre.h.

Referenced by pcre_version().

#define PCRE_MINOR   3
 

Definition at line 45 of file pcre.h.

#define PCRE_MULTILINE   0x00000002
 

Definition at line 104 of file pcre.h.

Referenced by handle_option(), regcomp(), avmplus::RegExpObject::RegExpObject(), and Test_all_options().

#define PCRE_NEWLINE_ANY   0x00400000
 

Definition at line 126 of file pcre.h.

Referenced by check_newline(), pcre_compile2(), pcre_dfa_exec(), and pcre_exec().

#define PCRE_NEWLINE_ANYCRLF   0x00500000
 

Definition at line 127 of file pcre.h.

Referenced by check_newline(), pcre_compile2(), pcre_dfa_exec(), and pcre_exec().

#define PCRE_NEWLINE_CR   0x00100000
 

Definition at line 123 of file pcre.h.

Referenced by check_newline(), pcrecpp::NewlineMode(), pcre_compile2(), pcre_dfa_exec(), pcre_exec(), and TestReplace().

#define PCRE_NEWLINE_CRLF   0x00300000
 

Definition at line 125 of file pcre.h.

Referenced by check_newline(), pcrecpp::NewlineMode(), and TestReplace().

#define PCRE_NEWLINE_LF   0x00200000
 

Definition at line 124 of file pcre.h.

Referenced by check_newline(), pcrecpp::NewlineMode(), pcre_compile2(), pcre_dfa_exec(), pcre_exec(), and TestReplace().

#define PCRE_NO_AUTO_CAPTURE   0x00001000
 

Definition at line 115 of file pcre.h.

Referenced by regcomp(), and regexec().

#define PCRE_NO_UTF8_CHECK   0x00002000
 

Definition at line 116 of file pcre.h.

Referenced by avmplus::RegExpObject::exec(), pcre_compile2(), and avmplus::RegExpObject::replace().

#define PCRE_NOTBOL   0x00000080
 

Definition at line 110 of file pcre.h.

Referenced by pcre_exec(), and regexec().

#define PCRE_NOTEMPTY   0x00000400
 

Definition at line 113 of file pcre.h.

Referenced by pcre_exec().

#define PCRE_NOTEOL   0x00000100
 

Definition at line 111 of file pcre.h.

Referenced by pcre_exec(), and regexec().

#define PCRE_PARTIAL   0x00008000
 

Definition at line 118 of file pcre.h.

Referenced by pcre_exec().

#define PCRE_PRERELEASE
 

Definition at line 46 of file pcre.h.

Referenced by pcre_version().

#define PCRE_SPTR   const char *
 

Definition at line 206 of file pcre.h.

#define PCRE_UNGREEDY   0x00000200
 

Definition at line 112 of file pcre.h.

Referenced by compile_branch().

#define PCRE_UTF8   0x00000800
 

Definition at line 114 of file pcre.h.

Referenced by check_escape(), compile_branch(), find_fixedlength(), handle_option(), internal_dfa_exec(), main(), pcre_compile2(), pcre_dfa_exec(), pcre_exec(), pcre_study(), regcomp(), and avmplus::RegExpObject::RegExpObject().


Typedef Documentation

typedef struct real_pcre pcre
 

Definition at line 199 of file pcre.h.


Function Documentation

PCRE_EXP_DECL pcre* pcre_compile const char *  ,
int  ,
const char **  ,
int *  ,
const unsigned char * 
 

Definition at line 5787 of file pcre_compile.cpp.

References NULL, and pcre_compile2().

Referenced by compile_single_pattern(), and avmplus::RegExpObject::RegExpObject().

05789 {
05790 return pcre_compile2(pattern, options, NULL, errorptr, erroroffset, tables);
05791 }

PCRE_EXP_DECL pcre* pcre_compile2 const char *  ,
int  ,
int *  ,
const char **  ,
int *  ,
const unsigned char * 
 

Definition at line 5795 of file pcre_compile.cpp.

References _pcre_default_tables, _pcre_valid_utf8(), compile_data::backref_map, compile_data::bracount, compile_data::cbits, cbits_offset, compile_regex(), COMPILE_WORK_SIZE, compile_data::ctypes, ctypes_offset, DPRINTF, real_pcre::dummy1, compile_data::end_pattern, ERR0, ERR15, ERR16, ERR17, ERR20, ERR21, ERR22, ERR23, ERR32, ERR44, ERR53, ERR56, compile_data::external_options, FALSE, compile_data::fcc, fcc_offset, find_bracket(), find_firstassertedchar(), real_pcre::first_byte, compile_data::had_accept, compile_data::hwm, is_anchored(), is_startline(), compile_data::lcc, lcc_offset, real_pcre::magic_number, MAGIC_NUMBER, real_pcre::name_count, compile_data::name_entry_size, real_pcre::name_entry_size, compile_data::name_table, real_pcre::name_table_offset, compile_data::names_found, newline, NEWLINE, compile_data::nl, compile_data::nllen, compile_data::nltype, NLTYPE_ANY, NLTYPE_ANYCRLF, NLTYPE_FIXED, compile_data::nopartial, NULL, real_pcre::nullpad, offset, OP_BRA, OP_END, real_pcre::options, PCRE_ANCHORED, PCRE_FIRSTSET, pcre_free, pcre_malloc, PCRE_NEWLINE_ANY, PCRE_NEWLINE_ANYCRLF, PCRE_NEWLINE_BITS, PCRE_NEWLINE_CR, PCRE_NEWLINE_LF, PCRE_NO_UTF8_CHECK, PCRE_NOPARTIAL, PCRE_REQCHSET, PCRE_STARTLINE, PCRE_UTF8, PUBLIC_OPTIONS, real_pcre::ref_count, real_pcre::req_byte, REQ_CASELESS, REQ_VARY, compile_data::req_varyopt, real_pcre::size, compile_data::start_code, compile_data::start_pattern, compile_data::start_workspace, real_pcre::tables, compile_data::top_backref, real_pcre::top_backref, real_pcre::top_bracket, and TRUE.

Referenced by pcre_compile(), and regcomp().

05797 {
05798 real_pcre *re;
05799 int length = 1;  /* For final END opcode */
05800 int firstbyte, reqbyte, newline;
05801 int errorcode = 0;
05802 int skipatstart = 0;
05803 #ifdef SUPPORT_UTF8
05804 BOOL utf8;
05805 #endif
05806 size_t size;
05807 uschar *code;
05808 const uschar *codestart;
05809 const uschar *ptr;
05810 compile_data compile_block;
05811 compile_data *cd = &compile_block;
05812 
05813 /* This space is used for "compiling" into during the first phase, when we are
05814 computing the amount of memory that is needed. Compiled items are thrown away
05815 as soon as possible, so that a fairly large buffer should be sufficient for
05816 this purpose. The same space is used in the second phase for remembering where
05817 to fill in forward references to subpatterns. */
05818 
05819 uschar cworkspace[COMPILE_WORK_SIZE];
05820 
05821 
05822 /* Set this early so that early errors get offset 0. */
05823 
05824 ptr = (const uschar *)pattern;
05825 
05826 /* We can't pass back an error message if errorptr is NULL; I guess the best we
05827 can do is just return NULL, but we can set a code value if there is a code
05828 pointer. */
05829 
05830 if (errorptr == NULL)
05831   {
05832   if (errorcodeptr != NULL) *errorcodeptr = 99;
05833   return NULL;
05834   }
05835 
05836 *errorptr = NULL;
05837 if (errorcodeptr != NULL) *errorcodeptr = ERR0;
05838 
05839 /* However, we can give a message for this error */
05840 
05841 if (erroroffset == NULL)
05842   {
05843   errorcode = ERR16;
05844   goto PCRE_EARLY_ERROR_RETURN2;
05845   }
05846 
05847 *erroroffset = 0;
05848 
05849 /* Can't support UTF8 unless PCRE has been compiled to include the code. */
05850 
05851 #ifdef SUPPORT_UTF8
05852 utf8 = (options & PCRE_UTF8) != 0;
05853 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 &&
05854      (*erroroffset = _pcre_valid_utf8((uschar *)pattern, -1)) >= 0)
05855   {
05856   errorcode = ERR44;
05857   goto PCRE_EARLY_ERROR_RETURN2;
05858   }
05859 #else
05860 if ((options & PCRE_UTF8) != 0)
05861   {
05862   errorcode = ERR32;
05863   goto PCRE_EARLY_ERROR_RETURN;
05864   }
05865 #endif
05866 
05867 if ((options & ~PUBLIC_OPTIONS) != 0)
05868   {
05869   errorcode = ERR17;
05870   goto PCRE_EARLY_ERROR_RETURN;
05871   }
05872 
05873 /* Set up pointers to the individual character tables */
05874 
05875 if (tables == NULL) tables = _pcre_default_tables;
05876 cd->lcc = tables + lcc_offset;
05877 cd->fcc = tables + fcc_offset;
05878 cd->cbits = tables + cbits_offset;
05879 cd->ctypes = tables + ctypes_offset;
05880 
05881 /* Check for newline settings at the start of the pattern, and remember the
05882 offset for later. */
05883 
05884 if (ptr[0] == '(' && ptr[1] == '*')
05885   {
05886   int newnl = 0;
05887   if (strncmp((char *)(ptr+2), "CR)", 3) == 0)
05888     { skipatstart = 5; newnl = PCRE_NEWLINE_CR; }
05889   else if (strncmp((char *)(ptr+2), "LF)", 3)  == 0)
05890     { skipatstart = 5; newnl = PCRE_NEWLINE_LF; }
05891   else if (strncmp((char *)(ptr+2), "CRLF)", 5)  == 0)
05892     { skipatstart = 7; newnl = PCRE_NEWLINE_CR + PCRE_NEWLINE_LF; }
05893   else if (strncmp((char *)(ptr+2), "ANY)", 4) == 0)
05894     { skipatstart = 6; newnl = PCRE_NEWLINE_ANY; }
05895   else if (strncmp((char *)(ptr+2), "ANYCRLF)", 8)  == 0)
05896     { skipatstart = 10; newnl = PCRE_NEWLINE_ANYCRLF; }
05897   if (skipatstart > 0)
05898     options = (options & ~PCRE_NEWLINE_BITS) | newnl;
05899   }
05900 
05901 /* Handle different types of newline. The three bits give seven cases. The
05902 current code allows for fixed one- or two-byte sequences, plus "any" and
05903 "anycrlf". */
05904 
05905 switch (options & PCRE_NEWLINE_BITS)
05906   {
05907   case 0: newline = NEWLINE; break;   /* Build-time default */
05908   case PCRE_NEWLINE_CR: newline = '\r'; break;
05909   case PCRE_NEWLINE_LF: newline = '\n'; break;
05910   case PCRE_NEWLINE_CR+
05911        PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
05912   case PCRE_NEWLINE_ANY: newline = -1; break;
05913   case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
05914   default: errorcode = ERR56; goto PCRE_EARLY_ERROR_RETURN;
05915   }
05916 
05917 if (newline == -2)
05918   {
05919   cd->nltype = NLTYPE_ANYCRLF;
05920   }
05921 else if (newline < 0)
05922   {
05923   cd->nltype = NLTYPE_ANY;
05924   }
05925 else
05926   {
05927   cd->nltype = NLTYPE_FIXED;
05928   if (newline > 255)
05929     {
05930     cd->nllen = 2;
05931     cd->nl[0] = (newline >> 8) & 255;
05932     cd->nl[1] = newline & 255;
05933     }
05934   else
05935     {
05936     cd->nllen = 1;
05937     cd->nl[0] = newline;
05938     }
05939   }
05940 
05941 /* Maximum back reference and backref bitmap. The bitmap records up to 31 back
05942 references to help in deciding whether (.*) can be treated as anchored or not.
05943 */
05944 
05945 cd->top_backref = 0;
05946 cd->backref_map = 0;
05947 
05948 /* Reflect pattern for debugging output */
05949 
05950 DPRINTF(("------------------------------------------------------------------\n"));
05951 DPRINTF(("%s\n", pattern));
05952 
05953 /* Pretend to compile the pattern while actually just accumulating the length
05954 of memory required. This behaviour is triggered by passing a non-NULL final
05955 argument to compile_regex(). We pass a block of workspace (cworkspace) for it
05956 to compile parts of the pattern into; the compiled code is discarded when it is
05957 no longer needed, so hopefully this workspace will never overflow, though there
05958 is a test for its doing so. */
05959 
05960 cd->bracount = 0;
05961 cd->names_found = 0;
05962 cd->name_entry_size = 0;
05963 cd->name_table = NULL;
05964 cd->start_workspace = cworkspace;
05965 cd->start_code = cworkspace;
05966 cd->hwm = cworkspace;
05967 cd->start_pattern = (const uschar *)pattern;
05968 cd->end_pattern = (const uschar *)(pattern + strlen(pattern));
05969 cd->req_varyopt = 0;
05970 cd->nopartial = FALSE;
05971 cd->external_options = options;
05972 
05973 /* Now do the pre-compile. On error, errorcode will be set non-zero, so we
05974 don't need to look at the result of the function here. The initial options have
05975 been put into the cd block so that they can be changed if an option setting is
05976 found within the regex right at the beginning. Bringing initial option settings
05977 outside can help speed up starting point checks. */
05978 
05979 ptr += skipatstart;
05980 code = cworkspace;
05981 *code = OP_BRA;
05982 (void)compile_regex(cd->external_options, cd->external_options & PCRE_IMS,
05983   &code, &ptr, &errorcode, FALSE, FALSE, 0, &firstbyte, &reqbyte, NULL, cd,
05984   &length);
05985 if (errorcode != 0) goto PCRE_EARLY_ERROR_RETURN;
05986 
05987 DPRINTF(("end pre-compile: length=%d workspace=%d\n", length,
05988   cd->hwm - cworkspace));
05989 
05990 if (length > MAX_PATTERN_SIZE)
05991   {
05992   errorcode = ERR20;
05993   goto PCRE_EARLY_ERROR_RETURN;
05994   }
05995 
05996 /* Compute the size of data block needed and get it, either from malloc or
05997 externally provided function. Integer overflow should no longer be possible
05998 because nowadays we limit the maximum value of cd->names_found and
05999 cd->name_entry_size. */
06000 
06001 size = length + sizeof(real_pcre) + cd->names_found * (cd->name_entry_size + 3);
06002 re = (real_pcre *)(pcre_malloc)(size);
06003 
06004 if (re == NULL)
06005   {
06006   errorcode = ERR21;
06007   goto PCRE_EARLY_ERROR_RETURN;
06008   }
06009 
06010 /* Put in the magic number, and save the sizes, initial options, and character
06011 table pointer. NULL is used for the default character tables. The nullpad field
06012 is at the end; it's there to help in the case when a regex compiled on a system
06013 with 4-byte pointers is run on another with 8-byte pointers. */
06014 
06015 re->magic_number = MAGIC_NUMBER;
06016 re->size = (pcre_uint32)size;
06017 re->options = cd->external_options;
06018 re->dummy1 = 0;
06019 re->first_byte = 0;
06020 re->req_byte = 0;
06021 re->name_table_offset = sizeof(real_pcre);
06022 re->name_entry_size = cd->name_entry_size;
06023 re->name_count = cd->names_found;
06024 re->ref_count = 0;
06025 re->tables = (tables == _pcre_default_tables)? NULL : tables;
06026 re->nullpad = NULL;
06027 
06028 /* The starting points of the name/number translation table and of the code are
06029 passed around in the compile data block. The start/end pattern and initial
06030 options are already set from the pre-compile phase, as is the name_entry_size
06031 field. Reset the bracket count and the names_found field. Also reset the hwm
06032 field; this time it's used for remembering forward references to subpatterns.
06033 */
06034 
06035 cd->bracount = 0;
06036 cd->names_found = 0;
06037 cd->name_table = (uschar *)re + re->name_table_offset;
06038 codestart = cd->name_table + re->name_entry_size * re->name_count;
06039 cd->start_code = codestart;
06040 cd->hwm = cworkspace;
06041 cd->req_varyopt = 0;
06042 cd->nopartial = FALSE;
06043 cd->had_accept = FALSE;
06044 
06045 /* Set up a starting, non-extracting bracket, then compile the expression. On
06046 error, errorcode will be set non-zero, so we don't need to look at the result
06047 of the function here. */
06048 
06049 ptr = (const uschar *)pattern + skipatstart;
06050 code = (uschar *)codestart;
06051 *code = OP_BRA;
06052 (void)compile_regex(re->options, re->options & PCRE_IMS, &code, &ptr,
06053   &errorcode, FALSE, FALSE, 0, &firstbyte, &reqbyte, NULL, cd, NULL);
06054 re->top_bracket = cd->bracount;
06055 re->top_backref = cd->top_backref;
06056 
06057 if (cd->nopartial) re->options |= PCRE_NOPARTIAL;
06058 if (cd->had_accept) reqbyte = -1;   /* Must disable after (*ACCEPT) */
06059 
06060 /* If not reached end of pattern on success, there's an excess bracket. */
06061 
06062 if (errorcode == 0 && *ptr != 0) errorcode = ERR22;
06063 
06064 /* Fill in the terminating state and check for disastrous overflow, but
06065 if debugging, leave the test till after things are printed out. */
06066 
06067 *code++ = OP_END;
06068 
06069 #ifndef PCRE_DEBUG
06070 if (code - codestart > length) errorcode = ERR23;
06071 #endif
06072 
06073 /* Fill in any forward references that are required. */
06074 
06075 while (errorcode == 0 && cd->hwm > cworkspace)
06076   {
06077   int offset, recno;
06078   const uschar *groupptr;
06079   cd->hwm -= LINK_SIZE;
06080   offset = GET(cd->hwm, 0);
06081   recno = GET(codestart, offset);
06082   groupptr = find_bracket(codestart, (re->options & PCRE_UTF8) != 0, recno);
06083   if (groupptr == NULL) errorcode = ERR53;
06084     else PUT(((uschar *)codestart), offset, groupptr - codestart);
06085   }
06086 
06087 /* Give an error if there's back reference to a non-existent capturing
06088 subpattern. */
06089 
06090 if (errorcode == 0 && re->top_backref > re->top_bracket) errorcode = ERR15;
06091 
06092 /* Failed to compile, or error while post-processing */
06093 
06094 if (errorcode != 0)
06095   {
06096   (pcre_free)(re);
06097   PCRE_EARLY_ERROR_RETURN:
06098   *erroroffset = ptr - (const uschar *)pattern;
06099   PCRE_EARLY_ERROR_RETURN2:
06100   *errorptr = error_texts[errorcode];
06101   if (errorcodeptr != NULL) *errorcodeptr = errorcode;
06102   return NULL;
06103   }
06104 
06105 /* If the anchored option was not passed, set the flag if we can determine that
06106 the pattern is anchored by virtue of ^ characters or \A or anything else (such
06107 as starting with .* when DOTALL is set).
06108 
06109 Otherwise, if we know what the first byte has to be, save it, because that
06110 speeds up unanchored matches no end. If not, see if we can set the
06111 PCRE_STARTLINE flag. This is helpful for multiline matches when all branches
06112 start with ^. and also when all branches start with .* for non-DOTALL matches.
06113 */
06114 
06115 if ((re->options & PCRE_ANCHORED) == 0)
06116   {
06117   int temp_options = re->options;   /* May get changed during these scans */
06118   if (is_anchored(codestart, &temp_options, 0, cd->backref_map))
06119     re->options |= PCRE_ANCHORED;
06120   else
06121     {
06122     if (firstbyte < 0)
06123       firstbyte = find_firstassertedchar(codestart, &temp_options, FALSE);
06124     if (firstbyte >= 0)   /* Remove caseless flag for non-caseable chars */
06125       {
06126       int ch = firstbyte & 255;
06127       re->first_byte = ((firstbyte & REQ_CASELESS) != 0 &&
06128          cd->fcc[ch] == ch)? ch : firstbyte;
06129       re->options |= PCRE_FIRSTSET;
06130       }
06131     else if (is_startline(codestart, 0, cd->backref_map))
06132       re->options |= PCRE_STARTLINE;
06133     }
06134   }
06135 
06136 /* For an anchored pattern, we use the "required byte" only if it follows a
06137 variable length item in the regex. Remove the caseless flag for non-caseable
06138 bytes. */
06139 
06140 if (reqbyte >= 0 &&
06141      ((re->options & PCRE_ANCHORED) == 0 || (reqbyte & REQ_VARY) != 0))
06142   {
06143   int ch = reqbyte & 255;
06144   re->req_byte = ((reqbyte & REQ_CASELESS) != 0 &&
06145     cd->fcc[ch] == ch)? (reqbyte & ~REQ_CASELESS) : reqbyte;
06146   re->options |= PCRE_REQCHSET;
06147   }
06148 
06149 /* Print out the compiled data if debugging is enabled. This is never the
06150 case when building a production library. */
06151 
06152 #ifdef PCRE_DEBUG
06153 
06154 printf("Length = %d top_bracket = %d top_backref = %d\n",
06155   length, re->top_bracket, re->top_backref);
06156 
06157 printf("Options=%08x\n", re->options);
06158 
06159 if ((re->options & PCRE_FIRSTSET) != 0)
06160   {
06161   int ch = re->first_byte & 255;
06162   const char *caseless = ((re->first_byte & REQ_CASELESS) == 0)?
06163     "" : " (caseless)";
06164   if (isprint(ch)) printf("First char = %c%s\n", ch, caseless);
06165     else printf("First char = \\x%02x%s\n", ch, caseless);
06166   }
06167 
06168 if ((re->options & PCRE_REQCHSET) != 0)
06169   {
06170   int ch = re->req_byte & 255;
06171   const char *caseless = ((re->req_byte & REQ_CASELESS) == 0)?
06172     "" : " (caseless)";
06173   if (isprint(ch)) printf("Req char = %c%s\n", ch, caseless);
06174     else printf("Req char = \\x%02x%s\n", ch, caseless);
06175   }
06176 
06177 pcre_printint(re, stdout, TRUE);
06178 
06179 /* This check is done here in the debugging case so that the code that
06180 was compiled can be seen. */
06181 
06182 if (code - codestart > length)
06183   {
06184   (pcre_free)(re);
06185   *errorptr = error_texts[ERR23];
06186   *erroroffset = ptr - (uschar *)pattern;
06187   if (errorcodeptr != NULL) *errorcodeptr = ERR23;
06188   return NULL;
06189   }
06190 #endif   /* DEBUG */
06191 
06192 return (pcre *)re;
06193 }

PCRE_EXP_DECL int pcre_config int  ,
void * 
 

Definition at line 74 of file pcre_config.cpp.

References LINK_SIZE, MATCH_LIMIT, MATCH_LIMIT_RECURSION, NEWLINE, PCRE_CONFIG_LINK_SIZE, PCRE_CONFIG_MATCH_LIMIT, PCRE_CONFIG_MATCH_LIMIT_RECURSION, PCRE_CONFIG_NEWLINE, PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, PCRE_CONFIG_STACKRECURSE, PCRE_CONFIG_UNICODE_PROPERTIES, PCRE_CONFIG_UTF8, PCRE_ERROR_BADOPTION, and POSIX_MALLOC_THRESHOLD.

Referenced by main(), and pcrecpp::NewlineMode().

00075 {
00076 switch (what)
00077   {
00078   case PCRE_CONFIG_UTF8:
00079 #ifdef SUPPORT_UTF8
00080   *((int *)where) = 1;
00081 #else
00082   *((int *)where) = 0;
00083 #endif
00084   break;
00085 
00086   case PCRE_CONFIG_UNICODE_PROPERTIES:
00087 #ifdef SUPPORT_UCP
00088   *((int *)where) = 1;
00089 #else
00090   *((int *)where) = 0;
00091 #endif
00092   break;
00093 
00094   case PCRE_CONFIG_NEWLINE:
00095   *((int *)where) = NEWLINE;
00096   break;
00097 
00098   case PCRE_CONFIG_LINK_SIZE:
00099   *((int *)where) = LINK_SIZE;
00100   break;
00101 
00102   case PCRE_CONFIG_POSIX_MALLOC_THRESHOLD:
00103   *((int *)where) = POSIX_MALLOC_THRESHOLD;
00104   break;
00105 
00106   case PCRE_CONFIG_MATCH_LIMIT:
00107   *((unsigned int *)where) = MATCH_LIMIT;
00108   break;
00109 
00110   case PCRE_CONFIG_MATCH_LIMIT_RECURSION:
00111   *((unsigned int *)where) = MATCH_LIMIT_RECURSION;
00112   break;
00113 
00114   case PCRE_CONFIG_STACKRECURSE:
00115 #ifdef NO_RECURSE
00116   *((int *)where) = 0;
00117 #else
00118   *((int *)where) = 1;
00119 #endif
00120   break;
00121 
00122   default: return PCRE_ERROR_BADOPTION;
00123   }
00124 
00125 return 0;
00126 }

PCRE_EXP_DECL int pcre_copy_named_substring const pcre ,
const char *  ,
int *  ,
int  ,
const char *  ,
char *  ,
int 
 

Definition at line 280 of file pcre_get.cpp.

References get_first_set(), and pcre_copy_substring().

00282 {
00283 int n = get_first_set(code, stringname, ovector);
00284 if (n <= 0) return n;
00285 return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
00286 }

PCRE_EXP_DECL int pcre_copy_substring const char *  ,
int *  ,
int  ,
int  ,
char *  ,
int 
 

Definition at line 235 of file pcre_get.cpp.

References PCRE_ERROR_NOMEMORY, and PCRE_ERROR_NOSUBSTRING.

Referenced by pcre_copy_named_substring().

00237 {
00238 int yield;
00239 if (stringnumber < 0 || stringnumber >= stringcount)
00240   return PCRE_ERROR_NOSUBSTRING;
00241 stringnumber *= 2;
00242 yield = ovector[stringnumber+1] - ovector[stringnumber];
00243 if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
00244 memcpy(buffer, subject + ovector[stringnumber], yield);
00245 buffer[yield] = 0;
00246 return yield;
00247 }

PCRE_EXP_DECL int pcre_dfa_exec const pcre ,
const pcre_extra ,
const char *  ,
int  ,
int  ,
int  ,
int *  ,
int  ,
int *  ,
int 
 

Definition at line 2488 of file pcre_dfa_exec.cpp.

References _pcre_try_flipped(), dfa_match_data::callout_data, pcre_extra::callout_data, dfa_match_data::end_subject, FALSE, flags, pcre_extra::flags, real_pcre::magic_number, MAGIC_NUMBER, dfa_match_data::moptions, real_pcre::name_count, real_pcre::name_entry_size, real_pcre::name_table_offset, newline, NEWLINE, NULL, real_pcre::options, PCRE_ANCHORED, PCRE_DFA_RESTART, PCRE_ERROR_BADCOUNT, PCRE_ERROR_BADMAGIC, PCRE_ERROR_BADNEWLINE, PCRE_ERROR_BADOPTION, PCRE_ERROR_DFA_UMLIMIT, PCRE_ERROR_DFA_WSSIZE, PCRE_ERROR_NULL, PCRE_EXTRA_CALLOUT_DATA, PCRE_EXTRA_MATCH_LIMIT, PCRE_EXTRA_MATCH_LIMIT_RECURSION, PCRE_EXTRA_STUDY_DATA, PCRE_EXTRA_TABLES, PCRE_NEWLINE_ANY, PCRE_NEWLINE_ANYCRLF, PCRE_NEWLINE_BITS, PCRE_NEWLINE_CR, PCRE_NEWLINE_LF, PCRE_UTF8, dfa_match_data::poptions, PUBLIC_DFA_EXEC_OPTIONS, dfa_match_data::start_code, dfa_match_data::start_subject, pcre_extra::study_data, dfa_match_data::tables, real_pcre::tables, pcre_extra::tables, and utf8.

02491 {
02492 real_pcre *re = (real_pcre *)argument_re;
02493 dfa_match_data match_block;
02494 dfa_match_data *md = &match_block;
02495 BOOL utf8, anchored, startline, firstline;
02496 const uschar *current_subject, *end_subject, *lcc;
02497 
02498 pcre_study_data internal_study;
02499 const pcre_study_data *study = NULL;
02500 real_pcre internal_re;
02501 
02502 const uschar *req_byte_ptr;
02503 const uschar *start_bits = NULL;
02504 BOOL first_byte_caseless = FALSE;
02505 BOOL req_byte_caseless = FALSE;
02506 int first_byte = -1;
02507 int req_byte = -1;
02508 int req_byte2 = -1;
02509 int newline;
02510 
02511 /* Plausibility checks */
02512 
02513 if ((options & ~PUBLIC_DFA_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
02514 if (re == NULL || subject == NULL || workspace == NULL ||
02515    (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
02516 if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
02517 if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;
02518 
02519 /* We need to find the pointer to any study data before we test for byte
02520 flipping, so we scan the extra_data block first. This may set two fields in the
02521 match block, so we must initialize them beforehand. However, the other fields
02522 in the match block must not be set until after the byte flipping. */
02523 
02524 md->tables = re->tables;
02525 md->callout_data = NULL;
02526 
02527 if (extra_data != NULL)
02528   {
02529   unsigned int flags = extra_data->flags;
02530   if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
02531     study = (const pcre_study_data *)extra_data->study_data;
02532   if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0) return PCRE_ERROR_DFA_UMLIMIT;
02533   if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
02534     return PCRE_ERROR_DFA_UMLIMIT;
02535   if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
02536     md->callout_data = extra_data->callout_data;
02537   if ((flags & PCRE_EXTRA_TABLES) != 0)
02538     md->tables = extra_data->tables;
02539   }
02540 
02541 /* Check that the first field in the block is the magic number. If it is not,
02542 test for a regex that was compiled on a host of opposite endianness. If this is
02543 the case, flipped values are put in internal_re and internal_study if there was
02544 study data too. */
02545 
02546 if (re->magic_number != MAGIC_NUMBER)
02547   {
02548   re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
02549   if (re == NULL) return PCRE_ERROR_BADMAGIC;
02550   if (study != NULL) study = &internal_study;
02551   }
02552 
02553 /* Set some local values */
02554 
02555 current_subject = (const unsigned char *)subject + start_offset;
02556 end_subject = (const unsigned char *)subject + length;
02557 req_byte_ptr = current_subject - 1;
02558 
02559 #ifdef SUPPORT_UTF8
02560 utf8 = (re->options & PCRE_UTF8) != 0;
02561 #else
02562 utf8 = FALSE;
02563 #endif
02564 
02565 anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||
02566   (re->options & PCRE_ANCHORED) != 0;
02567 
02568 /* The remaining fixed data for passing around. */
02569 
02570 md->start_code = (const uschar *)argument_re +
02571     re->name_table_offset + re->name_count * re->name_entry_size;
02572 md->start_subject = (const unsigned char *)subject;
02573 md->end_subject = end_subject;
02574 md->moptions = options;
02575 md->poptions = re->options;
02576 
02577 /* Handle different types of newline. The three bits give eight cases. If
02578 nothing is set at run time, whatever was used at compile time applies. */
02579 
02580 switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
02581          PCRE_NEWLINE_BITS)
02582   {
02583   case 0: newline = NEWLINE; break;   /* Compile-time default */
02584   case PCRE_NEWLINE_CR: newline = '\r'; break;
02585   case PCRE_NEWLINE_LF: newline = '\n'; break;
02586   case PCRE_NEWLINE_CR+
02587        PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
02588   case PCRE_NEWLINE_ANY: newline = -1; break;
02589   case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
02590   default: return PCRE_ERROR_BADNEWLINE;
02591   }
02592 
02593 if (newline == -2)
02594   {
02595   md->nltype = NLTYPE_ANYCRLF;
02596   }
02597 else if (newline < 0)
02598   {
02599   md->nltype = NLTYPE_ANY;
02600   }
02601 else
02602   {
02603   md->nltype = NLTYPE_FIXED;
02604   if (newline > 255)
02605     {
02606     md->nllen = 2;
02607     md->nl[0] = (newline >> 8) & 255;
02608     md->nl[1] = newline & 255;
02609     }
02610   else
02611     {
02612     md->nllen = 1;
02613     md->nl[0] = newline;
02614     }
02615   }
02616 
02617 /* Check a UTF-8 string if required. Unfortunately there's no way of passing
02618 back the character offset. */
02619 
02620 #ifdef SUPPORT_UTF8
02621 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
02622   {
02623   if (_pcre_valid_utf8((uschar *)subject, length) >= 0)
02624     return PCRE_ERROR_BADUTF8;
02625   if (start_offset > 0 && start_offset < length)
02626     {
02627     int tb = ((uschar *)subject)[start_offset];
02628     if (tb > 127)
02629       {
02630       tb &= 0xc0;
02631       if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET;
02632       }
02633     }
02634   }
02635 #endif
02636 
02637 /* If the exec call supplied NULL for tables, use the inbuilt ones. This
02638 is a feature that makes it possible to save compiled regex and re-use them
02639 in other programs later. */
02640 
02641 if (md->tables == NULL) md->tables = _pcre_default_tables;
02642 
02643 /* The lower casing table and the "must be at the start of a line" flag are
02644 used in a loop when finding where to start. */
02645 
02646 lcc = md->tables + lcc_offset;
02647 startline = (re->options & PCRE_STARTLINE) != 0;
02648 firstline = (re->options & PCRE_FIRSTLINE) != 0;
02649 
02650 /* Set up the first character to match, if available. The first_byte value is
02651 never set for an anchored regular expression, but the anchoring may be forced
02652 at run time, so we have to test for anchoring. The first char may be unset for
02653 an unanchored pattern, of course. If there's no first char and the pattern was
02654 studied, there may be a bitmap of possible first characters. */
02655 
02656 if (!anchored)
02657   {
02658   if ((re->options & PCRE_FIRSTSET) != 0)
02659     {
02660     first_byte = re->first_byte & 255;
02661     if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
02662       first_byte = lcc[first_byte];
02663     }
02664   else
02665     {
02666     if (startline && study != NULL &&
02667          (study->options & PCRE_STUDY_MAPPED) != 0)
02668       start_bits = study->start_bits;
02669     }
02670   }
02671 
02672 /* For anchored or unanchored matches, there may be a "last known required
02673 character" set. */
02674 
02675 if ((re->options & PCRE_REQCHSET) != 0)
02676   {
02677   req_byte = re->req_byte & 255;
02678   req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
02679   req_byte2 = (md->tables + fcc_offset)[req_byte];  /* case flipped */
02680   }
02681 
02682 /* Call the main matching function, looping for a non-anchored regex after a
02683 failed match. Unless restarting, optimize by moving to the first match
02684 character if possible, when not anchored. Then unless wanting a partial match,
02685 check for a required later character. */
02686 
02687 for (;;)
02688   {
02689   int rc;
02690 
02691   if ((options & PCRE_DFA_RESTART) == 0)
02692     {
02693     const uschar *save_end_subject = end_subject;
02694 
02695     /* Advance to a unique first char if possible. If firstline is TRUE, the
02696     start of the match is constrained to the first line of a multiline string.
02697     Implement this by temporarily adjusting end_subject so that we stop
02698     scanning at a newline. If the match fails at the newline, later code breaks
02699     this loop. */
02700 
02701     if (firstline)
02702       {
02703       const uschar *t = current_subject;
02704       while (t < md->end_subject && !IS_NEWLINE(t)) t++;
02705       end_subject = t;
02706       }
02707 
02708     if (first_byte >= 0)
02709       {
02710       if (first_byte_caseless)
02711         while (current_subject < end_subject &&
02712                lcc[*current_subject] != first_byte)
02713           current_subject++;
02714       else
02715         while (current_subject < end_subject && *current_subject != first_byte)
02716           current_subject++;
02717       }
02718 
02719     /* Or to just after a linebreak for a multiline match if possible */
02720 
02721     else if (startline)
02722       {
02723       if (current_subject > md->start_subject + start_offset)
02724         {
02725         while (current_subject <= end_subject && !WAS_NEWLINE(current_subject))
02726           current_subject++;
02727 
02728         /* If we have just passed a CR and the newline option is ANY or
02729         ANYCRLF, and we are now at a LF, advance the match position by one more
02730         character. */
02731 
02732         if (current_subject[-1] == '\r' &&
02733              (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
02734              current_subject < end_subject &&
02735              *current_subject == '\n')
02736           current_subject++;
02737         }
02738       }
02739 
02740     /* Or to a non-unique first char after study */
02741 
02742     else if (start_bits != NULL)
02743       {
02744       while (current_subject < end_subject)
02745         {
02746         register unsigned int c = *current_subject;
02747         if ((start_bits[c/8] & (1 << (c&7))) == 0) current_subject++;
02748           else break;
02749         }
02750       }
02751 
02752     /* Restore fudged end_subject */
02753 
02754     end_subject = save_end_subject;
02755     }
02756 
02757   /* If req_byte is set, we know that that character must appear in the subject
02758   for the match to succeed. If the first character is set, req_byte must be
02759   later in the subject; otherwise the test starts at the match point. This
02760   optimization can save a huge amount of work in patterns with nested unlimited
02761   repeats that aren't going to match. Writing separate code for cased/caseless
02762   versions makes it go faster, as does using an autoincrement and backing off
02763   on a match.
02764 
02765   HOWEVER: when the subject string is very, very long, searching to its end can
02766   take a long time, and give bad performance on quite ordinary patterns. This
02767   showed up when somebody was matching /^C/ on a 32-megabyte string... so we
02768   don't do this when the string is sufficiently long.
02769 
02770   ALSO: this processing is disabled when partial matching is requested.
02771   */
02772 
02773   if (req_byte >= 0 &&
02774       end_subject - current_subject < REQ_BYTE_MAX &&
02775       (options & PCRE_PARTIAL) == 0)
02776     {
02777     register const uschar *p = current_subject + ((first_byte >= 0)? 1 : 0);
02778 
02779     /* We don't need to repeat the search if we haven't yet reached the
02780     place we found it at last time. */
02781 
02782     if (p > req_byte_ptr)
02783       {
02784       if (req_byte_caseless)
02785         {
02786         while (p < end_subject)
02787           {
02788           register int pp = *p++;
02789           if (pp == req_byte || pp == req_byte2) { p--; break; }
02790           }
02791         }
02792       else
02793         {
02794         while (p < end_subject)
02795           {
02796           if (*p++ == req_byte) { p--; break; }
02797           }
02798         }
02799 
02800       /* If we can't find the required character, break the matching loop,
02801       which will cause a return or PCRE_ERROR_NOMATCH. */
02802 
02803       if (p >= end_subject) break;
02804 
02805       /* If we have found the required character, save the point where we
02806       found it, so that we don't search again next time round the loop if
02807       the start hasn't passed this character yet. */
02808 
02809       req_byte_ptr = p;
02810       }
02811     }
02812 
02813   /* OK, now we can do the business */
02814 
02815   rc = internal_dfa_exec(
02816     md,                                /* fixed match data */
02817     md->start_code,                    /* this subexpression's code */
02818     current_subject,                   /* where we currently are */
02819     start_offset,                      /* start offset in subject */
02820     offsets,                           /* offset vector */
02821     offsetcount,                       /* size of same */
02822     workspace,                         /* workspace vector */
02823     wscount,                           /* size of same */
02824     re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL), /* ims flags */
02825     0,                                 /* function recurse level */
02826     0);                                /* regex recurse level */
02827 
02828   /* Anything other than "no match" means we are done, always; otherwise, carry
02829   on only if not anchored. */
02830 
02831   if (rc != PCRE_ERROR_NOMATCH || anchored) return rc;
02832 
02833   /* Advance to the next subject character unless we are at the end of a line
02834   and firstline is set. */
02835 
02836   if (firstline && IS_NEWLINE(current_subject)) break;
02837   current_subject++;
02838   if (utf8)
02839     {
02840     while (current_subject < end_subject && (*current_subject & 0xc0) == 0x80)
02841       current_subject++;
02842     }
02843   if (current_subject > end_subject) break;
02844 
02845   /* If we have just passed a CR and we are now at a LF, and the pattern does
02846   not contain any explicit matches for \r or \n, and the newline option is CRLF
02847   or ANY or ANYCRLF, advance the match position by one more character. */
02848 
02849   if (current_subject[-1] == '\r' &&
02850       current_subject < end_subject &&
02851       *current_subject == '\n' &&
02852       (re->options & PCRE_HASCRORLF) == 0 &&
02853         (md->nltype == NLTYPE_ANY ||
02854          md->nltype == NLTYPE_ANYCRLF ||
02855          md->nllen == 2))
02856     current_subject++;
02857 
02858   }   /* "Bumpalong" loop */
02859 
02860 return PCRE_ERROR_NOMATCH;
02861 }

PCRE_EXP_DECL int pcre_exec const pcre ,
const pcre_extra ,
PCRE_SPTR  ,
int  ,
int  ,
int  ,
int *  ,
int 
 

Definition at line 4510 of file pcre_exec.cpp.

References _pcre_default_tables, _pcre_try_flipped(), match_data::callout_data, pcre_extra::callout_data, match_data::ctypes, ctypes_offset, match_data::end_subject, match_data::endonly, FALSE, flags, pcre_extra::flags, match_data::hitend, ims, match_data::lcc, lcc_offset, real_pcre::magic_number, MAGIC_NUMBER, match_data::match_limit, MATCH_LIMIT, pcre_extra::match_limit, match_data::match_limit_recursion, MATCH_LIMIT_RECURSION, pcre_extra::match_limit_recursion, real_pcre::name_count, real_pcre::name_entry_size, real_pcre::name_table_offset, newline, NEWLINE, match_data::notbol, match_data::notempty, match_data::noteol, NULL, real_pcre::options, match_data::partial, PCRE_ANCHORED, PCRE_DOLLAR_ENDONLY, PCRE_ERROR_BADCOUNT, PCRE_ERROR_BADMAGIC, PCRE_ERROR_BADNEWLINE, PCRE_ERROR_BADOPTION, PCRE_ERROR_NULL, PCRE_EXTRA_CALLOUT_DATA, PCRE_EXTRA_MATCH_LIMIT, PCRE_EXTRA_MATCH_LIMIT_RECURSION, PCRE_EXTRA_STUDY_DATA, PCRE_EXTRA_TABLES, PCRE_FIRSTLINE, PCRE_NEWLINE_ANY, PCRE_NEWLINE_ANYCRLF, PCRE_NEWLINE_BITS, PCRE_NEWLINE_CR, PCRE_NEWLINE_LF, PCRE_NOTBOL, PCRE_NOTEMPTY, PCRE_NOTEOL, PCRE_PARTIAL, PCRE_STARTLINE, PCRE_UTF8, PUBLIC_EXEC_OPTIONS, match_data::recursive, match_data::start_code, match_data::start_offset, match_data::start_subject, pcre_extra::study_data, real_pcre::tables, pcre_extra::tables, USPTR, and match_data::utf8.

Referenced by check_match_limit(), avmplus::RegExpObject::exec(), grep_or_recurse(), pcregrep(), regexec(), and avmplus::RegExpObject::replace().

04513 {
04514 int rc, resetcount, ocount;
04515 int first_byte = -1;
04516 int req_byte = -1;
04517 int req_byte2 = -1;
04518 int newline;
04519 unsigned long int ims;
04520 BOOL using_temporary_offsets = FALSE;
04521 BOOL anchored;
04522 BOOL startline;
04523 BOOL firstline;
04524 BOOL first_byte_caseless = FALSE;
04525 BOOL req_byte_caseless = FALSE;
04526 BOOL utf8;
04527 match_data match_block;
04528 match_data *md = &match_block;
04529 const uschar *tables;
04530 const uschar *start_bits = NULL;
04531 USPTR start_match = (USPTR)subject + start_offset;
04532 USPTR end_subject;
04533 USPTR req_byte_ptr = start_match - 1;
04534 
04535 pcre_study_data internal_study;
04536 const pcre_study_data *study;
04537 
04538 real_pcre internal_re;
04539 const real_pcre *external_re = (const real_pcre *)argument_re;
04540 const real_pcre *re = external_re;
04541 
04542 /* Plausibility checks */
04543 
04544 if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
04545 if (re == NULL || subject == NULL ||
04546    (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
04547 if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
04548 
04549 /* Fish out the optional data from the extra_data structure, first setting
04550 the default values. */
04551 
04552 study = NULL;
04553 md->match_limit = MATCH_LIMIT;
04554 md->match_limit_recursion = MATCH_LIMIT_RECURSION;
04555 md->callout_data = NULL;
04556 
04557 /* The table pointer is always in native byte order. */
04558 
04559 tables = external_re->tables;
04560 
04561 if (extra_data != NULL)
04562   {
04563   register unsigned int flags = extra_data->flags;
04564   if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
04565     study = (const pcre_study_data *)extra_data->study_data;
04566   if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
04567     md->match_limit = extra_data->match_limit;
04568   if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
04569     md->match_limit_recursion = extra_data->match_limit_recursion;
04570   if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
04571     md->callout_data = extra_data->callout_data;
04572   if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
04573   }
04574 
04575 /* If the exec call supplied NULL for tables, use the inbuilt ones. This
04576 is a feature that makes it possible to save compiled regex and re-use them
04577 in other programs later. */
04578 
04579 if (tables == NULL) tables = _pcre_default_tables;
04580 
04581 /* Check that the first field in the block is the magic number. If it is not,
04582 test for a regex that was compiled on a host of opposite endianness. If this is
04583 the case, flipped values are put in internal_re and internal_study if there was
04584 study data too. */
04585 
04586 if (re->magic_number != MAGIC_NUMBER)
04587   {
04588   re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
04589   if (re == NULL) return PCRE_ERROR_BADMAGIC;
04590   if (study != NULL) study = &internal_study;
04591   }
04592 
04593 /* Set up other data */
04594 
04595 anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
04596 startline = (re->options & PCRE_STARTLINE) != 0;
04597 firstline = (re->options & PCRE_FIRSTLINE) != 0;
04598 
04599 /* The code starts after the real_pcre block and the capture name table. */
04600 
04601 md->start_code = (const uschar *)external_re + re->name_table_offset +
04602   re->name_count * re->name_entry_size;
04603 
04604 md->start_subject = (USPTR)subject;
04605 md->start_offset = start_offset;
04606 md->end_subject = md->start_subject + length;
04607 end_subject = md->end_subject;
04608 
04609 md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
04610 utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
04611 
04612 md->notbol = (options & PCRE_NOTBOL) != 0;
04613 md->noteol = (options & PCRE_NOTEOL) != 0;
04614 md->notempty = (options & PCRE_NOTEMPTY) != 0;
04615 md->partial = (options & PCRE_PARTIAL) != 0;
04616 md->hitend = FALSE;
04617 
04618 md->recursive = NULL;                   /* No recursion at top level */
04619 
04620 md->lcc = tables + lcc_offset;
04621 md->ctypes = tables + ctypes_offset;
04622 
04623 /* Handle different types of newline. The three bits give eight cases. If
04624 nothing is set at run time, whatever was used at compile time applies. */
04625 
04626 switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
04627        PCRE_NEWLINE_BITS)
04628   {
04629   case 0: newline = NEWLINE; break;   /* Compile-time default */
04630   case PCRE_NEWLINE_CR: newline = '\r'; break;
04631   case PCRE_NEWLINE_LF: newline = '\n'; break;
04632   case PCRE_NEWLINE_CR+
04633        PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
04634   case PCRE_NEWLINE_ANY: newline = -1; break;
04635   case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
04636   default: return PCRE_ERROR_BADNEWLINE;
04637   }
04638 
04639 if (newline == -2)
04640   {
04641   md->nltype = NLTYPE_ANYCRLF;
04642   }
04643 else if (newline < 0)
04644   {
04645   md->nltype = NLTYPE_ANY;
04646   }
04647 else
04648   {
04649   md->nltype = NLTYPE_FIXED;
04650   if (newline > 255)
04651     {
04652     md->nllen = 2;
04653     md->nl[0] = (newline >> 8) & 255;
04654     md->nl[1] = newline & 255;
04655     }
04656   else
04657     {
04658     md->nllen = 1;
04659     md->nl[0] = newline;
04660     }
04661   }
04662 
04663 /* Partial matching is supported only for a restricted set of regexes at the
04664 moment. */
04665 
04666 if (md->partial && (re->options & PCRE_NOPARTIAL) != 0)
04667   return PCRE_ERROR_BADPARTIAL;
04668 
04669 /* Check a UTF-8 string if required. Unfortunately there's no way of passing
04670 back the character offset. */
04671 
04672 #ifdef SUPPORT_UTF8
04673 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
04674   {
04675   if (_pcre_valid_utf8((uschar *)subject, length) >= 0)
04676     return PCRE_ERROR_BADUTF8;
04677   if (start_offset > 0 && start_offset < length)
04678     {
04679     int tb = ((uschar *)subject)[start_offset];
04680     if (tb > 127)
04681       {
04682       tb &= 0xc0;
04683       if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET;
04684       }
04685     }
04686   }
04687 #endif
04688 
04689 /* The ims options can vary during the matching as a result of the presence
04690 of (?ims) items in the pattern. They are kept in a local variable so that
04691 restoring at the exit of a group is easy. */
04692 
04693 ims = re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL);
04694 
04695 /* If the expression has got more back references than the offsets supplied can
04696 hold, we get a temporary chunk of working store to use during the matching.
04697 Otherwise, we can use the vector supplied, rounding down its size to a multiple
04698 of 3. */
04699 
04700 ocount = offsetcount - (offsetcount % 3);
04701 
04702 if (re->top_backref > 0 && re->top_backref >= ocount/3)
04703   {
04704   ocount = re->top_backref * 3 + 3;
04705   md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
04706   if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
04707   using_temporary_offsets = TRUE;
04708   DPRINTF(("Got memory to hold back references\n"));
04709   }
04710 else md->offset_vector = offsets;
04711 
04712 md->offset_end = ocount;
04713 md->offset_max = (2*ocount)/3;
04714 md->offset_overflow = FALSE;
04715 md->capture_last = -1;
04716 
04717 if (ES3_Compatible_Behavior)
04718     md->end_offset_top = 2;
04719 
04720 
04721 /* Compute the minimum number of offsets that we need to reset each time. Doing
04722 this makes a huge difference to execution time when there aren't many brackets
04723 in the pattern. */
04724 
04725 resetcount = 2 + re->top_bracket * 2;
04726 if (resetcount > offsetcount) resetcount = ocount;
04727 
04728 /* Reset the working variable associated with each extraction. These should
04729 never be used unless previously set, but they get saved and restored, and so we
04730 initialize them to avoid reading uninitialized locations. */
04731 
04732 if (md->offset_vector != NULL)
04733   {
04734   register int *iptr = md->offset_vector + ocount;
04735   register int *iend = iptr - resetcount/2 + 1;
04736   while (--iptr >= iend) *iptr = -1;
04737   }
04738 
04739 /* Set up the first character to match, if available. The first_byte value is
04740 never set for an anchored regular expression, but the anchoring may be forced
04741 at run time, so we have to test for anchoring. The first char may be unset for
04742 an unanchored pattern, of course. If there's no first char and the pattern was
04743 studied, there may be a bitmap of possible first characters. */
04744 
04745 if (!anchored)
04746   {
04747   if ((re->options & PCRE_FIRSTSET) != 0)
04748     {
04749     first_byte = re->first_byte & 255;
04750     if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
04751       first_byte = md->lcc[first_byte];
04752     }
04753   else
04754     if (!startline && study != NULL &&
04755       (study->options & PCRE_STUDY_MAPPED) != 0)
04756         start_bits = study->start_bits;
04757   }
04758 
04759 /* For anchored or unanchored matches, there may be a "last known required
04760 character" set. */
04761 
04762 if ((re->options & PCRE_REQCHSET) != 0)
04763   {
04764   req_byte = re->req_byte & 255;
04765   req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
04766   req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */
04767   }
04768 
04769 
04770 /* ==========================================================================*/
04771 
04772 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
04773 the loop runs just once. */
04774 
04775 for(;;)
04776   {
04777   USPTR save_end_subject = end_subject;
04778   USPTR new_start_match;
04779 
04780   /* Reset the maximum number of extractions we might see. */
04781 
04782   if (md->offset_vector != NULL)
04783     {
04784     register int *iptr = md->offset_vector;
04785     register int *iend = iptr + resetcount;
04786     while (iptr < iend) *iptr++ = -1;
04787     }
04788 
04789   /* Advance to a unique first char if possible. If firstline is TRUE, the
04790   start of the match is constrained to the first line of a multiline string.
04791   That is, the match must be before or at the first newline. Implement this by
04792   temporarily adjusting end_subject so that we stop scanning at a newline. If
04793   the match fails at the newline, later code breaks this loop. */
04794 
04795   if (firstline)
04796     {
04797     USPTR t = start_match;
04798     while (t < md->end_subject && !IS_NEWLINE(t)) t++;
04799     end_subject = t;
04800     }
04801 
04802   /* Now test for a unique first byte */
04803 
04804   if (first_byte >= 0)
04805     {
04806     if (first_byte_caseless)
04807       while (start_match < end_subject &&
04808              md->lcc[*start_match] != first_byte)
04809         start_match++;
04810     else
04811       while (start_match < end_subject && *start_match != first_byte)
04812         start_match++;
04813     }
04814 
04815   /* Or to just after a linebreak for a multiline match if possible */
04816 
04817   else if (startline)
04818     {
04819     if (start_match > md->start_subject + start_offset)
04820       {
04821       while (start_match <= end_subject && !WAS_NEWLINE(start_match))
04822         start_match++;
04823 
04824       /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
04825       and we are now at a LF, advance the match position by one more character.
04826       */
04827 
04828       if (start_match[-1] == '\r' &&
04829            (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
04830            start_match < end_subject &&
04831            *start_match == '\n')
04832         start_match++;
04833       }
04834     }
04835 
04836   /* Or to a non-unique first char after study */
04837 
04838   else if (start_bits != NULL)
04839     {
04840     while (start_match < end_subject)
04841       {
04842       register unsigned int c = *start_match;
04843       if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++; else break;
04844       }
04845     }
04846 
04847   /* Restore fudged end_subject */
04848 
04849   end_subject = save_end_subject;
04850 
04851 #ifdef PCRE_DEBUG  /* Sigh. Some compilers never learn. */
04852   printf(">>>> Match against: ");
04853   pchars(start_match, end_subject - start_match, TRUE, md);
04854   printf("\n");
04855 #endif
04856 
04857   /* If req_byte is set, we know that that character must appear in the subject
04858   for the match to succeed. If the first character is set, req_byte must be
04859   later in the subject; otherwise the test starts at the match point. This
04860   optimization can save a huge amount of backtracking in patterns with nested
04861   unlimited repeats that aren't going to match. Writing separate code for
04862   cased/caseless versions makes it go faster, as does using an autoincrement
04863   and backing off on a match.
04864 
04865   HOWEVER: when the subject string is very, very long, searching to its end can
04866   take a long time, and give bad performance on quite ordinary patterns. This
04867   showed up when somebody was matching something like /^\d+C/ on a 32-megabyte
04868   string... so we don't do this when the string is sufficiently long.
04869 
04870   ALSO: this processing is disabled when partial matching is requested.
04871   */
04872 
04873   if (req_byte >= 0 &&
04874       end_subject - start_match < REQ_BYTE_MAX &&
04875       !md->partial)
04876     {
04877     register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
04878 
04879     /* We don't need to repeat the search if we haven't yet reached the
04880     place we found it at last time. */
04881 
04882     if (p > req_byte_ptr)
04883       {
04884       if (req_byte_caseless)
04885         {
04886         while (p < end_subject)
04887           {
04888           register int pp = *p++;
04889           if (pp == req_byte || pp == req_byte2) { p--; break; }
04890           }
04891         }
04892       else
04893         {
04894         while (p < end_subject)
04895           {
04896           if (*p++ == req_byte) { p--; break; }
04897           }
04898         }
04899 
04900       /* If we can't find the required character, break the matching loop,
04901       forcing a match failure. */
04902 
04903       if (p >= end_subject)
04904         {
04905         rc = MATCH_NOMATCH;
04906         break;
04907         }
04908 
04909       /* If we have found the required character, save the point where we
04910       found it, so that we don't search again next time round the loop if
04911       the start hasn't passed this character yet. */
04912 
04913       req_byte_ptr = p;
04914       }
04915     }
04916 
04917   /* OK, we can now run the match. */
04918 
04919   md->start_match_ptr = start_match;
04920   md->match_call_count = 0;
04921   rc = match(start_match, md->start_code, start_match, 2, md, ims, NULL, 0, 0);
04922 
04923   switch(rc)
04924     {
04925     /* NOMATCH and PRUNE advance by one character. THEN at this level acts
04926     exactly like PRUNE. */
04927 
04928     case MATCH_NOMATCH:
04929     case MATCH_PRUNE:
04930     case MATCH_THEN:
04931     new_start_match = start_match + 1;
04932 #ifdef SUPPORT_UTF8
04933     if (utf8)
04934       while(new_start_match < end_subject && (*new_start_match & 0xc0) == 0x80)
04935         new_start_match++;
04936 #endif
04937     break;
04938 
04939     /* SKIP passes back the next starting point explicitly. */
04940 
04941     case MATCH_SKIP:
04942     new_start_match = md->start_match_ptr;
04943     break;
04944 
04945     /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
04946 
04947     case MATCH_COMMIT:
04948     rc = MATCH_NOMATCH;
04949     goto ENDLOOP;
04950 
04951     /* Any other return is some kind of error. */
04952 
04953     default:
04954     goto ENDLOOP;
04955     }
04956 
04957   /* Control reaches here for the various types of "no match at this point"
04958   result. Reset the code to MATCH_NOMATCH for subsequent checking. */
04959 
04960   rc = MATCH_NOMATCH;
04961 
04962   /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
04963   newline in the subject (though it may continue over the newline). Therefore,
04964   if we have just failed to match, starting at a newline, do not continue. */
04965 
04966   if (firstline && IS_NEWLINE(start_match)) break;
04967 
04968   /* Advance to new matching position */
04969 
04970   start_match = new_start_match;
04971 
04972   /* Break the loop if the pattern is anchored or if we have passed the end of
04973   the subject. */
04974 
04975   if (anchored || start_match > end_subject) break;
04976 
04977   /* If we have just passed a CR and we are now at a LF, and the pattern does
04978   not contain any explicit matches for \r or \n, and the newline option is CRLF
04979   or ANY or ANYCRLF, advance the match position by one more character. */
04980 
04981   if (start_match[-1] == '\r' &&
04982       start_match < end_subject &&
04983       *start_match == '\n' &&
04984       (re->options & PCRE_HASCRORLF) == 0 &&
04985         (md->nltype == NLTYPE_ANY ||
04986          md->nltype == NLTYPE_ANYCRLF ||
04987          md->nllen == 2))
04988     start_match++;
04989 
04990   }   /* End of for(;;) "bumpalong" loop */
04991 
04992 /* ==========================================================================*/
04993 
04994 /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
04995 conditions is true:
04996 
04997 (1) The pattern is anchored or the match was failed by (*COMMIT);
04998 
04999 (2) We are past the end of the subject;
05000 
05001 (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
05002     this option requests that a match occur at or before the first newline in
05003     the subject.
05004 
05005 When we have a match and the offset vector is big enough to deal with any
05006 backreferences, captured substring offsets will already be set up. In the case
05007 where we had to get some local store to hold offsets for backreference
05008 processing, copy those that we can. In this case there need not be overflow if
05009 certain parts of the pattern were not used, even though there are more
05010 capturing parentheses than vector slots. */
05011 
05012 ENDLOOP:
05013 
05014 if (rc == MATCH_MATCH)
05015   {
05016   if (using_temporary_offsets)
05017     {
05018     if (offsetcount >= 4)
05019       {
05020       memcpy(offsets + 2, md->offset_vector + 2,
05021         (offsetcount - 2) * sizeof(int));
05022       DPRINTF(("Copied offsets from temporary memory\n"));
05023       }
05024     if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;
05025     DPRINTF(("Freeing temporary memory\n"));
05026     (pcre_free)(md->offset_vector);
05027     }
05028 
05029   /* Set the return code to the number of captured strings, or 0 if there are
05030   too many to fit into the vector. */
05031 
05032     // ECMAScript compatible behavior means always returning as many elements as there were groups (+1).  PERL behavior is to return only as many results as the number of groups actually visited (+1)
05033     rc = md->offset_overflow? 0 : (ES3_Compatible_Behavior ? resetcount/2 : md->end_offset_top/2);
05034 
05035 //  rc = md->offset_overflow? 0 : md->end_offset_top/2;
05036 
05037   /* If there is space, set up the whole thing as substring 0. The value of
05038   md->start_match_ptr might be modified if \K was encountered on the success
05039   matching path. */
05040 
05041   if (offsetcount < 2) rc = 0; else
05042     {
05043     offsets[0] = md->start_match_ptr - md->start_subject;
05044     offsets[1] = md->end_match_ptr - md->start_subject;
05045     }
05046 
05047   DPRINTF((">>>> returning %d\n", rc));
05048   return rc;
05049   }
05050 
05051 /* Control gets here if there has been an error, or if the overall match
05052 attempt has failed at all permitted starting positions. */
05053 
05054 if (using_temporary_offsets)
05055   {
05056   DPRINTF(("Freeing temporary memory\n"));
05057   (pcre_free)(md->offset_vector);
05058   }
05059 
05060 if (rc != MATCH_NOMATCH)
05061   {
05062   DPRINTF((">>>> error: returning %d\n", rc));
05063   return rc;
05064   }
05065 else if (md->partial && md->hitend)
05066   {
05067   DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
05068   return PCRE_ERROR_PARTIAL;
05069   }
05070 else
05071   {
05072   DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
05073   return PCRE_ERROR_NOMATCH;
05074   }
05075 }

PCRE_EXP_DECL void pcre_free_substring const char *   ) 
 

Definition at line 460 of file pcre_get.cpp.

References pcre_free.

00461 {
00462 (pcre_free)((void *)pointer);
00463 }

PCRE_EXP_DECL void pcre_free_substring_list const char **   ) 
 

Definition at line 357 of file pcre_get.cpp.

References pcre_free.

00358 {
00359 (pcre_free)((void *)pointer);
00360 }

PCRE_EXP_DECL int pcre_fullinfo const pcre ,
const pcre_extra ,
int  ,
void * 
 

Definition at line 69 of file pcre_fullinfo.cpp.

References _pcre_default_tables, _pcre_try_flipped(), real_pcre::first_byte, pcre_extra::flags, real_pcre::magic_number, MAGIC_NUMBER, real_pcre::name_count, real_pcre::name_entry_size, real_pcre::name_table_offset, NULL, real_pcre::options, pcre_study_data::options, PCRE_ERROR_BADMAGIC, PCRE_ERROR_BADOPTION, PCRE_ERROR_NULL, PCRE_EXTRA_STUDY_DATA, PCRE_FIRSTSET, PCRE_HASCRORLF, PCRE_INFO_BACKREFMAX, PCRE_INFO_CAPTURECOUNT, PCRE_INFO_DEFAULT_TABLES, PCRE_INFO_FIRSTBYTE, PCRE_INFO_FIRSTTABLE, PCRE_INFO_HASCRORLF, PCRE_INFO_JCHANGED, PCRE_INFO_LASTLITERAL, PCRE_INFO_NAMECOUNT, PCRE_INFO_NAMEENTRYSIZE, PCRE_INFO_NAMETABLE, PCRE_INFO_OKPARTIAL, PCRE_INFO_OPTIONS, PCRE_INFO_SIZE, PCRE_INFO_STUDYSIZE, PCRE_JCHANGED, PCRE_NOPARTIAL, PCRE_REQCHSET, PCRE_STARTLINE, PCRE_STUDY_MAPPED, PUBLIC_OPTIONS, real_pcre::req_byte, real_pcre::size, pcre_study_data::size, pcre_extra::study_data, real_pcre::top_backref, and real_pcre::top_bracket.

Referenced by new_info(), pcre_get_stringnumber(), pcre_get_stringtable_entries(), and pcre_study().

00071 {
00072 real_pcre internal_re;
00073 pcre_study_data internal_study;
00074 const real_pcre *re = (const real_pcre *)argument_re;
00075 const pcre_study_data *study = NULL;
00076 
00077 if (re == NULL || where == NULL) return PCRE_ERROR_NULL;
00078 
00079 if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_STUDY_DATA) != 0)
00080   study = (const pcre_study_data *)extra_data->study_data;
00081 
00082 if (re->magic_number != MAGIC_NUMBER)
00083   {
00084   re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
00085   if (re == NULL) return PCRE_ERROR_BADMAGIC;
00086   if (study != NULL) study = &internal_study;
00087   }
00088 
00089 switch (what)
00090   {
00091   case PCRE_INFO_OPTIONS:
00092   *((unsigned long int *)where) = re->options & PUBLIC_OPTIONS;
00093   break;
00094 
00095   case PCRE_INFO_SIZE:
00096   *((size_t *)where) = re->size;
00097   break;
00098 
00099   case PCRE_INFO_STUDYSIZE:
00100   *((size_t *)where) = (study == NULL)? 0 : study->size;
00101   break;
00102 
00103   case PCRE_INFO_CAPTURECOUNT:
00104   *((int *)where) = re->top_bracket;
00105   break;
00106 
00107   case PCRE_INFO_BACKREFMAX:
00108   *((int *)where) = re->top_backref;
00109   break;
00110 
00111   case PCRE_INFO_FIRSTBYTE:
00112   *((int *)where) =
00113     ((re->options & PCRE_FIRSTSET) != 0)? re->first_byte :
00114     ((re->options & PCRE_STARTLINE) != 0)? -1 : -2;
00115   break;
00116 
00117   /* Make sure we pass back the pointer to the bit vector in the external
00118   block, not the internal copy (with flipped integer fields). */
00119 
00120   case PCRE_INFO_FIRSTTABLE:
00121   *((const uschar **)where) =
00122     (study != NULL && (study->options & PCRE_STUDY_MAPPED) != 0)?
00123       ((const pcre_study_data *)extra_data->study_data)->start_bits : NULL;
00124   break;
00125 
00126   case PCRE_INFO_LASTLITERAL:
00127   *((int *)where) =
00128     ((re->options & PCRE_REQCHSET) != 0)? re->req_byte : -1;
00129   break;
00130 
00131   case PCRE_INFO_NAMEENTRYSIZE:
00132   *((int *)where) = re->name_entry_size;
00133   break;
00134 
00135   case PCRE_INFO_NAMECOUNT:
00136   *((int *)where) = re->name_count;
00137   break;
00138 
00139   case PCRE_INFO_NAMETABLE:
00140   *((const uschar **)where) = (const uschar *)re + re->name_table_offset;
00141   break;
00142 
00143   case PCRE_INFO_DEFAULT_TABLES:
00144   *((const uschar **)where) = (const uschar *)(_pcre_default_tables);
00145   break;
00146 
00147   case PCRE_INFO_OKPARTIAL:
00148   *((int *)where) = (re->options & PCRE_NOPARTIAL) == 0;
00149   break;
00150 
00151   case PCRE_INFO_JCHANGED:
00152   *((int *)where) = (re->options & PCRE_JCHANGED) != 0;
00153   break;
00154 
00155   case PCRE_INFO_HASCRORLF:
00156   *((int *)where) = (re->options & PCRE_HASCRORLF) != 0;
00157   break;
00158 
00159   default: return PCRE_ERROR_BADOPTION;
00160   }
00161 
00162 return 0;
00163 }

PCRE_EXP_DECL int pcre_get_named_substring const pcre ,
const char *  ,
int *  ,
int  ,
const char *  ,
const char ** 
 

Definition at line 437 of file pcre_get.cpp.

References get_first_set(), and pcre_get_substring().

00439 {
00440 int n = get_first_set(code, stringname, ovector);
00441 if (n <= 0) return n;
00442 return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
00443 }

PCRE_EXP_DECL int pcre_get_stringnumber const pcre ,
const char * 
 

Definition at line 69 of file pcre_get.cpp.

References NULL, PCRE_ERROR_NOSUBSTRING, pcre_fullinfo(), PCRE_INFO_NAMECOUNT, PCRE_INFO_NAMEENTRYSIZE, and PCRE_INFO_NAMETABLE.

Referenced by get_first_set().

00070 {
00071 int rc;
00072 int entrysize;
00073 int top, bot;
00074 uschar *nametable;
00075 
00076 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
00077   return rc;
00078 if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
00079 
00080 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
00081   return rc;
00082 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
00083   return rc;
00084 
00085 bot = 0;
00086 while (top > bot)
00087   {
00088   int mid = (top + bot) / 2;
00089   uschar *entry = nametable + entrysize*mid;
00090   int c = strcmp(stringname, (char *)(entry + 2));
00091   if (c == 0) return (entry[0] << 8) + entry[1];
00092   if (c > 0) bot = mid + 1; else top = mid;
00093   }
00094 
00095 return PCRE_ERROR_NOSUBSTRING;
00096 }

PCRE_EXP_DECL int pcre_get_stringtable_entries const pcre ,
const char *  ,
char **  ,
char ** 
 

Definition at line 118 of file pcre_get.cpp.

References NULL, PCRE_ERROR_NOSUBSTRING, pcre_fullinfo(), PCRE_INFO_NAMECOUNT, PCRE_INFO_NAMEENTRYSIZE, and PCRE_INFO_NAMETABLE.

Referenced by get_first_set().

00120 {
00121 int rc;
00122 int entrysize;
00123 int top, bot;
00124 uschar *nametable, *lastentry;
00125 
00126 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
00127   return rc;
00128 if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
00129 
00130 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
00131   return rc;
00132 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
00133   return rc;
00134 
00135 lastentry = nametable + entrysize * (top - 1);
00136 bot = 0;
00137 while (top > bot)
00138   {
00139   int mid = (top + bot) / 2;
00140   uschar *entry = nametable + entrysize*mid;
00141   int c = strcmp(stringname, (char *)(entry + 2));
00142   if (c == 0)
00143     {
00144     uschar *first = entry;
00145     uschar *last = entry;
00146     while (first > nametable)
00147       {
00148       if (strcmp(stringname, (char *)(first - entrysize + 2)) != 0) break;
00149       first -= entrysize;
00150       }
00151     while (last < lastentry)
00152       {
00153       if (strcmp(stringname, (char *)(last + entrysize + 2)) != 0) break;
00154       last += entrysize;
00155       }
00156     *firstptr = (char *)first;
00157     *lastptr = (char *)last;
00158     return entrysize;
00159     }
00160   if (c > 0) bot = mid + 1; else top = mid;
00161   }
00162 
00163 return PCRE_ERROR_NOSUBSTRING;
00164 }

PCRE_EXP_DECL int pcre_get_substring const char *  ,
int *  ,
int  ,
int  ,
const char ** 
 

Definition at line 390 of file pcre_get.cpp.

References NULL, PCRE_ERROR_NOMEMORY, PCRE_ERROR_NOSUBSTRING, and pcre_malloc.

Referenced by pcre_get_named_substring().

00392 {
00393 int yield;
00394 char *substring;
00395 if (stringnumber < 0 || stringnumber >= stringcount)
00396   return PCRE_ERROR_NOSUBSTRING;
00397 stringnumber *= 2;
00398 yield = ovector[stringnumber+1] - ovector[stringnumber];
00399 substring = (char *)(pcre_malloc)(yield + 1);
00400 if (substring == NULL) return PCRE_ERROR_NOMEMORY;
00401 memcpy(substring, subject + ovector[stringnumber], yield);
00402 substring[yield] = 0;
00403 *stringptr = substring;
00404 return yield;
00405 }

PCRE_EXP_DECL int pcre_get_substring_list const char *  ,
int *  ,
int  ,
const char *** 
 

Definition at line 312 of file pcre_get.cpp.

References util::threadpool::i, NULL, PCRE_ERROR_NOMEMORY, and pcre_malloc.

00314 {
00315 int i;
00316 int size = sizeof(char *);
00317 int double_count = stringcount * 2;
00318 char **stringlist;
00319 char *p;
00320 
00321 for (i = 0; i < double_count; i += 2)
00322   size += sizeof(char *) + ovector[i+1] - ovector[i] + 1;
00323 
00324 stringlist = (char **)(pcre_malloc)(size);
00325 if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
00326 
00327 *listptr = (const char **)stringlist;
00328 p = (char *)(stringlist + stringcount + 1);
00329 
00330 for (i = 0; i < double_count; i += 2)
00331   {
00332   int len = ovector[i+1] - ovector[i];
00333   memcpy(p, subject + ovector[i], len);
00334   *stringlist++ = p;
00335   p += len;
00336   *p++ = 0;
00337   }
00338 
00339 *stringlist = NULL;
00340 return 0;
00341 }

PCRE_EXP_DECL int pcre_info const pcre ,
int *  ,
int * 
 

Definition at line 76 of file pcre_info.cpp.

References _pcre_try_flipped(), real_pcre::first_byte, real_pcre::magic_number, MAGIC_NUMBER, NULL, real_pcre::options, PCRE_ERROR_BADMAGIC, PCRE_ERROR_NULL, PCRE_FIRSTSET, PCRE_STARTLINE, PUBLIC_OPTIONS, and real_pcre::top_bracket.

Referenced by regcomp().

00077 {
00078 real_pcre internal_re;
00079 const real_pcre *re = (const real_pcre *)argument_re;
00080 if (re == NULL) return PCRE_ERROR_NULL;
00081 if (re->magic_number != MAGIC_NUMBER)
00082   {
00083   re = _pcre_try_flipped(re, &internal_re, NULL, NULL);
00084   if (re == NULL) return PCRE_ERROR_BADMAGIC;
00085   }
00086 if (optptr != NULL) *optptr = (int)(re->options & PUBLIC_OPTIONS);
00087 if (first_byte != NULL)
00088   *first_byte = ((re->options & PCRE_FIRSTSET) != 0)? re->first_byte :
00089      ((re->options & PCRE_STARTLINE) != 0)? -1 : -2;
00090 return re->top_bracket;
00091 }

PCRE_EXP_DECL const unsigned char* pcre_maketables void   ) 
 

Definition at line 70 of file pcre_maketables.cpp.

References cbit_cntrl, cbit_digit, cbit_graph, cbit_length, cbit_lower, cbit_print, cbit_punct, cbit_space, cbit_upper, cbit_word, cbit_xdigit, util::threadpool::i, NULL, pcre_malloc, and tables_length.

Referenced by main().

00071 {
00072 unsigned char *yield, *p;
00073 int i;
00074 
00075 #ifndef DFTABLES
00076 yield = (unsigned char*)(pcre_malloc)(tables_length);
00077 #else
00078 yield = (unsigned char*)malloc(tables_length);
00079 #endif
00080 
00081 if (yield == NULL) return NULL;
00082 p = yield;
00083 
00084 /* First comes the lower casing table */
00085 
00086 for (i = 0; i < 256; i++) *p++ = tolower(i);
00087 
00088 /* Next the case-flipping table */
00089 
00090 for (i = 0; i < 256; i++) *p++ = islower(i)? toupper(i) : tolower(i);
00091 
00092 /* Then the character class tables. Don't try to be clever and save effort on
00093 exclusive ones - in some locales things may be different. Note that the table
00094 for "space" includes everything "isspace" gives, including VT in the default
00095 locale. This makes it work for the POSIX class [:space:]. Note also that it is
00096 possible for a character to be alnum or alpha without being lower or upper,
00097 such as "male and female ordinals" (\xAA and \xBA) in the fr_FR locale (at
00098 least under Debian Linux's locales as of 12/2005). So we must test for alnum
00099 specially. */
00100 
00101 memset(p, 0, cbit_length);
00102 for (i = 0; i < 256; i++)
00103   {
00104   if (isdigit(i)) p[cbit_digit  + i/8] |= 1 << (i&7);
00105   if (isupper(i)) p[cbit_upper  + i/8] |= 1 << (i&7);
00106   if (islower(i)) p[cbit_lower  + i/8] |= 1 << (i&7);
00107   if (isalnum(i)) p[cbit_word   + i/8] |= 1 << (i&7);
00108   if (i == '_')   p[cbit_word   + i/8] |= 1 << (i&7);
00109   if (isspace(i)) p[cbit_space  + i/8] |= 1 << (i&7);
00110   if (isxdigit(i))p[cbit_xdigit + i/8] |= 1 << (i&7);
00111   if (isgraph(i)) p[cbit_graph  + i/8] |= 1 << (i&7);
00112   if (isprint(i)) p[cbit_print  + i/8] |= 1 << (i&7);
00113   if (ispunct(i)) p[cbit_punct  + i/8] |= 1 << (i&7);
00114   if (iscntrl(i)) p[cbit_cntrl  + i/8] |= 1 << (i&7);
00115   }
00116 p += cbit_length;
00117 
00118 /* Finally, the character type table. In this, we exclude VT from the white
00119 space chars, because Perl doesn't recognize it as such for \s and for comments
00120 within regexes. */
00121 
00122 for (i = 0; i < 256; i++)
00123   {
00124   int x = 0;
00125   if (i != 0x0b && isspace(i)) x += ctype_space;
00126   if (isalpha(i)) x += ctype_letter;
00127   if (isdigit(i)) x += ctype_digit;
00128   if (isxdigit(i)) x += ctype_xdigit;
00129   if (isalnum(i) || i == '_') x += ctype_word;
00130 
00131   /* Note: strchr includes the terminating zero in the characters it considers.
00132   In this instance, that is ok because we want binary zero to be flagged as a
00133   meta-character, which in this sense is any character that terminates a run
00134   of data characters. */
00135 
00136   if (strchr("\\*+?{^.$|()[", i) != 0) x += ctype_meta;
00137   *p++ = x;
00138   }
00139 
00140 return yield;
00141 }

PCRE_EXP_DECL int pcre_refcount pcre ,
int 
 

Definition at line 72 of file pcre_refcount.cpp.

References NULL, PCRE_ERROR_NULL, and real_pcre::ref_count.

00073 {
00074 real_pcre *re = (real_pcre *)argument_re;
00075 if (re == NULL) return PCRE_ERROR_NULL;
00076 re->ref_count = (-adjust > re->ref_count)? 0 :
00077                 (adjust + re->ref_count > 65535)? 65535 :
00078                 re->ref_count + adjust;
00079 return re->ref_count;
00080 }

PCRE_EXP_DECL pcre_extra* pcre_study const pcre ,
int  ,
const char ** 
 

Definition at line 499 of file pcre_study.cpp.

References compile_data::cbits, cbits_offset, compile_data::ctypes, ctypes_offset, compile_data::fcc, fcc_offset, pcre_extra::flags, compile_data::lcc, lcc_offset, real_pcre::magic_number, MAGIC_NUMBER, real_pcre::name_count, real_pcre::name_entry_size, real_pcre::name_table_offset, NULL, real_pcre::options, pcre_study_data::options, PCRE_ANCHORED, PCRE_CASELESS, PCRE_EXTRA_STUDY_DATA, PCRE_FIRSTSET, pcre_fullinfo(), PCRE_INFO_DEFAULT_TABLES, pcre_malloc, PCRE_STARTLINE, PCRE_STUDY_MAPPED, PCRE_UTF8, PUBLIC_STUDY_OPTIONS, set_start_bits(), pcre_study_data::size, SSB_DONE, pcre_study_data::start_bits, pcre_extra::study_data, and real_pcre::tables.

00500 {
00501 uschar start_bits[32];
00502 pcre_extra *extra;
00503 pcre_study_data *study;
00504 const uschar *tables;
00505 uschar *code;
00506 compile_data compile_block;
00507 const real_pcre *re = (const real_pcre *)external_re;
00508 
00509 *errorptr = NULL;
00510 
00511 if (re == NULL || re->magic_number != MAGIC_NUMBER)
00512   {
00513   *errorptr = "argument is not a compiled regular expression";
00514   return NULL;
00515   }
00516 
00517 if ((options & ~PUBLIC_STUDY_OPTIONS) != 0)
00518   {
00519   *errorptr = "unknown or incorrect option bit(s) set";
00520   return NULL;
00521   }
00522 
00523 code = (uschar *)re + re->name_table_offset +
00524   (re->name_count * re->name_entry_size);
00525 
00526 /* For an anchored pattern, or an unanchored pattern that has a first char, or
00527 a multiline pattern that matches only at "line starts", no further processing
00528 at present. */
00529 
00530 if ((re->options & (PCRE_ANCHORED|PCRE_FIRSTSET|PCRE_STARTLINE)) != 0)
00531   return NULL;
00532 
00533 /* Set the character tables in the block that is passed around */
00534 
00535 tables = re->tables;
00536 if (tables == NULL)
00537   (void)pcre_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,
00538   (void *)(&tables));
00539 
00540 compile_block.lcc = tables + lcc_offset;
00541 compile_block.fcc = tables + fcc_offset;
00542 compile_block.cbits = tables + cbits_offset;
00543 compile_block.ctypes = tables + ctypes_offset;
00544 
00545 /* See if we can find a fixed set of initial characters for the pattern. */
00546 
00547 memset(start_bits, 0, 32 * sizeof(uschar));
00548 if (set_start_bits(code, start_bits, (re->options & PCRE_CASELESS) != 0,
00549   (re->options & PCRE_UTF8) != 0, &compile_block) != SSB_DONE) return NULL;
00550 
00551 /* Get a pcre_extra block and a pcre_study_data block. The study data is put in
00552 the latter, which is pointed to by the former, which may also get additional
00553 data set later by the calling program. At the moment, the size of
00554 pcre_study_data is fixed. We nevertheless save it in a field for returning via
00555 the pcre_fullinfo() function so that if it becomes variable in the future, we
00556 don't have to change that code. */
00557 
00558 extra = (pcre_extra *)(pcre_malloc)
00559   (sizeof(pcre_extra) + sizeof(pcre_study_data));
00560 
00561 if (extra == NULL)
00562   {
00563   *errorptr = "failed to get memory";
00564   return NULL;
00565   }
00566 
00567 study = (pcre_study_data *)((char *)extra + sizeof(pcre_extra));
00568 extra->flags = PCRE_EXTRA_STUDY_DATA;
00569 extra->study_data = study;
00570 
00571 study->size = sizeof(pcre_study_data);
00572 study->options = PCRE_STUDY_MAPPED;
00573 memcpy(study->start_bits, start_bits, sizeof(start_bits));
00574 
00575 return extra;
00576 }

PCRE_EXP_DECL const char* pcre_version void   ) 
 

Definition at line 83 of file pcre_version.cpp.

References PCRE_DATE, PCRE_MAJOR, PCRE_PRERELEASE, and XSTRING.

Referenced by handle_option(), and main().

00084 {
00085 return (XSTRING(Z PCRE_PRERELEASE)[1] == 0)?
00086   XSTRING(PCRE_MAJOR.PCRE_MINOR PCRE_DATE) :
00087   XSTRING(PCRE_MAJOR.PCRE_MINOR) XSTRING(PCRE_PRERELEASE PCRE_DATE);
00088 }


Variable Documentation

PCRE_EXP_DECL int(* pcre_callout)(pcre_callout_block *)
 

Definition at line 256 of file pcre.h.

PCRE_EXP_DECL void(* pcre_free)(void *)
 

Definition at line 253 of file pcre.h.

Referenced by main(), pcre_compile2(), pcre_free_substring(), pcre_free_substring_list(), avmplus::RegExpClass::RegExpClass(), regfree(), and avmplus::RegExpObject::~RegExpObject().

PCRE_EXP_DECL void*(* pcre_malloc)(size_t)
 

Definition at line 252 of file pcre.h.

Referenced by main(), pcre_compile2(), pcre_get_substring(), pcre_get_substring_list(), pcre_maketables(), pcre_study(), and avmplus::RegExpClass::RegExpClass().

PCRE_EXP_DECL void(* pcre_stack_free)(void *)
 

Definition at line 255 of file pcre.h.

Referenced by main().

PCRE_EXP_DECL void*(* pcre_stack_malloc)(size_t)
 

Definition at line 254 of file pcre.h.

Referenced by main(), and match().


Generated on Sun Oct 12 18:50:40 2008 for Tamarin by  doxygen 1.4.6