1
0
mirror of https://github.com/nmap/nmap.git synced 2025-12-06 04:31:29 +00:00

Upgrading from 6.4 to 6.7

This commit is contained in:
fyodor
2006-11-06 02:11:37 +00:00
parent 8e553db834
commit b39aa96073
30 changed files with 2160 additions and 1392 deletions

View File

@@ -8,7 +8,7 @@ Email domain: cam.ac.uk
University of Cambridge Computing Service,
Cambridge, England. Phone: +44 1223 334714.
Copyright (c) 1997-2005 University of Cambridge
Copyright (c) 1997-2006 University of Cambridge
All rights reserved
@@ -17,7 +17,7 @@ THE C++ WRAPPER LIBRARY
Written by: Google Inc.
Copyright (c) 2005 Google Inc
Copyright (c) 2006 Google Inc
All rights reserved
####

View File

@@ -22,7 +22,7 @@ Email domain: cam.ac.uk
University of Cambridge Computing Service,
Cambridge, England. Phone: +44 1223 334714.
Copyright (c) 1997-2005 University of Cambridge
Copyright (c) 1997-2006 University of Cambridge
All rights reserved.
@@ -31,7 +31,7 @@ THE C++ WRAPPER FUNCTIONS
Contributed by: Google Inc.
Copyright (c) 2005, Google Inc.
Copyright (c) 2006, Google Inc.
All rights reserved.

View File

@@ -83,6 +83,7 @@ CXX = @CXX@
CFLAGS = @CFLAGS@
CXXFLAGS = @CXXFLAGS@
LDFLAGS = @LDFLAGS@
CXXLDFLAGS = @CXXLDFLAGS@
CC_FOR_BUILD = @CC_FOR_BUILD@
CFLAGS_FOR_BUILD = @CFLAGS_FOR_BUILD@
@@ -94,7 +95,7 @@ UCP = @UCP@
NEWLINE = @NEWLINE@
POSIX_MALLOC_THRESHOLD = @POSIX_MALLOC_THRESHOLD@
LINK_SIZE = @LINK_SIZE@
MATCH_LIMIT = @MATCH_LIMIT@
MATCH_LIMIT = @MATCH_LIMIT@ @MATCH_LIMIT_RECURSION@
NO_RECURSE = @NO_RECURSE@
EBCDIC = @EBCDIC@
@@ -139,83 +140,83 @@ pcre_chartables.@OBJEXT@: pcre_chartables.c
$(CC) -c $(CFLAGS) $(INCLS) $(DEFS) \
$(POSIX_MALLOC_THRESHOLD) pcre_chartables.c
pcre_compile.@OBJEXT@: Makefile config.h pcre.h \
pcre_compile.@OBJEXT@: Makefile config.h $(top_srcdir)/pcre.h \
$(top_srcdir)/pcre_internal.h $(top_srcdir)/pcre_compile.c
$(CC) -c $(CFLAGS) $(INCLS) $(DEFS) $(POSIX_MALLOC_THRESHOLD) \
$(top_srcdir)/pcre_compile.c
pcre_config.@OBJEXT@: Makefile config.h pcre.h \
pcre_config.@OBJEXT@: Makefile config.h $(top_srcdir)/pcre.h \
$(top_srcdir)/pcre_internal.h $(top_srcdir)/pcre_config.c
$(CC) -c $(CFLAGS) $(INCLS) $(DEFS) $(POSIX_MALLOC_THRESHOLD) \
$(top_srcdir)/pcre_config.c
pcre_dfa_exec.@OBJEXT@: Makefile config.h pcre.h \
pcre_dfa_exec.@OBJEXT@: Makefile config.h $(top_srcdir)/pcre.h \
$(top_srcdir)/pcre_internal.h $(top_srcdir)/pcre_dfa_exec.c
$(CC) -c $(CFLAGS) $(INCLS) $(DEFS) $(POSIX_MALLOC_THRESHOLD) \
$(top_srcdir)/pcre_dfa_exec.c
pcre_exec.@OBJEXT@: Makefile config.h pcre.h \
pcre_exec.@OBJEXT@: Makefile config.h $(top_srcdir)/pcre.h \
$(top_srcdir)/pcre_internal.h $(top_srcdir)/pcre_exec.c
$(CC) -c $(CFLAGS) $(INCLS) $(DEFS) $(POSIX_MALLOC_THRESHOLD) \
$(top_srcdir)/pcre_exec.c
pcre_fullinfo.@OBJEXT@: Makefile config.h pcre.h \
pcre_fullinfo.@OBJEXT@: Makefile config.h $(top_srcdir)/pcre.h \
$(top_srcdir)/pcre_internal.h $(top_srcdir)/pcre_fullinfo.c
$(CC) -c $(CFLAGS) $(INCLS) $(DEFS) $(POSIX_MALLOC_THRESHOLD) \
$(top_srcdir)/pcre_fullinfo.c
pcre_get.@OBJEXT@: Makefile config.h pcre.h \
pcre_get.@OBJEXT@: Makefile config.h $(top_srcdir)/pcre.h \
$(top_srcdir)/pcre_internal.h $(top_srcdir)/pcre_get.c
$(CC) -c $(CFLAGS) $(INCLS) $(DEFS) $(POSIX_MALLOC_THRESHOLD) \
$(top_srcdir)/pcre_get.c
pcre_globals.@OBJEXT@: Makefile config.h pcre.h \
pcre_globals.@OBJEXT@: Makefile config.h $(top_srcdir)/pcre.h \
$(top_srcdir)/pcre_internal.h $(top_srcdir)/pcre_globals.c
$(CC) -c $(CFLAGS) $(INCLS) $(DEFS) $(POSIX_MALLOC_THRESHOLD) \
$(top_srcdir)/pcre_globals.c
pcre_info.@OBJEXT@: Makefile config.h pcre.h \
pcre_info.@OBJEXT@: Makefile config.h $(top_srcdir)/pcre.h \
$(top_srcdir)/pcre_internal.h $(top_srcdir)/pcre_info.c
$(CC) -c $(CFLAGS) $(INCLS) $(DEFS) $(POSIX_MALLOC_THRESHOLD) \
$(top_srcdir)/pcre_info.c
pcre_maketables.@OBJEXT@: Makefile config.h pcre.h \
pcre_maketables.@OBJEXT@: Makefile config.h $(top_srcdir)/pcre.h \
$(top_srcdir)/pcre_internal.h $(top_srcdir)/pcre_maketables.c
$(CC) -c $(CFLAGS) $(INCLS) $(DEFS) $(POSIX_MALLOC_THRESHOLD) \
$(top_srcdir)/pcre_maketables.c
pcre_refcount.@OBJEXT@: Makefile config.h pcre.h \
pcre_refcount.@OBJEXT@: Makefile config.h $(top_srcdir)/pcre.h \
$(top_srcdir)/pcre_internal.h $(top_srcdir)/pcre_refcount.c
$(CC) -c $(CFLAGS) $(INCLS) $(DEFS) $(POSIX_MALLOC_THRESHOLD) \
$(top_srcdir)/pcre_refcount.c
pcre_study.@OBJEXT@: Makefile config.h pcre.h \
pcre_study.@OBJEXT@: Makefile config.h $(top_srcdir)/pcre.h \
$(top_srcdir)/pcre_internal.h $(top_srcdir)/pcre_study.c
$(CC) -c $(CFLAGS) $(INCLS) $(DEFS) $(POSIX_MALLOC_THRESHOLD) \
$(top_srcdir)/pcre_study.c
pcre_tables.@OBJEXT@: Makefile config.h pcre.h \
pcre_tables.@OBJEXT@: Makefile config.h $(top_srcdir)/pcre.h \
$(top_srcdir)/pcre_internal.h $(top_srcdir)/pcre_tables.c
$(CC) -c $(CFLAGS) $(INCLS) $(DEFS) $(POSIX_MALLOC_THRESHOLD) \
$(top_srcdir)/pcre_tables.c
pcre_try_flipped.@OBJEXT@: Makefile config.h pcre.h \
pcre_try_flipped.@OBJEXT@: Makefile config.h $(top_srcdir)/pcre.h \
$(top_srcdir)/pcre_internal.h $(top_srcdir)/pcre_try_flipped.c
$(CC) -c $(CFLAGS) $(INCLS) $(DEFS) $(POSIX_MALLOC_THRESHOLD) \
$(top_srcdir)/pcre_try_flipped.c
pcre_version.@OBJEXT@: Makefile config.h pcre.h \
pcre_version.@OBJEXT@: Makefile config.h $(top_srcdir)/pcre.h \
$(top_srcdir)/pcre_internal.h $(top_srcdir)/pcre_version.c
$(CC) -c $(CFLAGS) $(INCLS) $(DEFS) $(POSIX_MALLOC_THRESHOLD) \
$(top_srcdir)/pcre_version.c
pcre_xclass.@OBJEXT@: Makefile config.h pcre.h \
pcre_xclass.@OBJEXT@: Makefile config.h $(top_srcdir)/pcre.h \
$(top_srcdir)/pcre_internal.h $(top_srcdir)/pcre_xclass.c
$(CC) -c $(CFLAGS) $(INCLS) $(DEFS) $(POSIX_MALLOC_THRESHOLD) \
$(top_srcdir)/pcre_xclass.c
pcreposix.@OBJEXT@: $(top_srcdir)/pcreposix.c $(top_srcdir)/pcreposix.h \
$(top_srcdir)/pcre_internal.h pcre.h config.h Makefile
$(top_srcdir)/pcre_internal.h $(top_srcdir)/pcre.h config.h Makefile
@$(LTCOMPILE) $(POSIX_MALLOC_THRESHOLD) $(top_srcdir)/pcreposix.c
$(TARGET): $(OBJ)
@@ -231,7 +232,7 @@ pcre_chartables.c: dftables@BUILD_EXEEXT@
dftables.@BUILD_OBJEXT@: $(top_srcdir)/dftables.c \
$(top_srcdir)/pcre_maketables.c $(top_srcdir)/pcre_internal.h \
pcre.h config.h Makefile
$(top_srcdir)/pcre.h config.h Makefile
$(CC) -c $(CFLAGS) $(INCLS) $(DEFS) -I. $(top_srcdir)/dftables.c
dftables@BUILD_EXEEXT@: dftables.@BUILD_OBJEXT@
@@ -247,7 +248,7 @@ clean:; -rm -rf *.@OBJEXT@ *.lo *.a *.la .libs pcretest@EXEEXT@ pcre_str
distclean: clean
-rm -f pcre_chartables.c libtool pcre-config libpcre.pc \
pcre.h pcre_stringpiece.h pcrecpp.h \
pcre_stringpiece.h pcrecpparg.h \
dftables@EXEEXT@ RunGrepTest RunTest \
Makefile config.h config.status config.log config.cache

View File

@@ -11,8 +11,9 @@ the Contrib directory on the ftp site that you may find useful. See
If you want to compile PCRE for a non-Unix system (or perhaps, more strictly,
for a system that does not support "configure" and "make" files), note that
PCRE consists entirely of code written in Standard C, and so should compile
successfully on any system that has a Standard C compiler and library.
the basic PCRE library consists entirely of code written in Standard C, and so
should compile successfully on any system that has a Standard C compiler and
library. The C++ wrapper functions are a separate issue (see below).
GENERIC INSTRUCTIONS FOR THE C LIBRARY
@@ -34,27 +35,16 @@ your compiler gives to '\n'.
rem Use write, because notepad cannot handle UNIX files. Change values.
write config.h
(2) Copy or rename the file pcre.in as pcre.h, and change the macro definitions
for PCRE_MAJOR, PCRE_MINOR, and PCRE_DATE near its start to the values set in
configure.in.
rem Mark Tetrode's commands
copy pcre.in pcre.h
rem Read values from configure.in
write configure.in
rem Change values
write pcre.h
(3) Compile dftables.c as a stand-alone program, and then run it with
(2) Compile dftables.c as a stand-alone program, and then run it with
the single argument "pcre_chartables.c". This generates a set of standard
character tables and writes them to that file.
rem Mark Tetrode's commands
rem Compile & run
cl -DSUPPORT_UTF8 -DSUPPORT_UCP dftables.c
dftables.exe chartables.c
dftables.exe pcre_chartables.c
(4) Compile the following source files:
(3) Compile the following source files:
pcre_chartables.c
pcre_compile.c
@@ -67,12 +57,11 @@ character tables and writes them to that file.
pcre_info.c
pcre_maketables.c
pcre_ord2utf8.c
pcre_printint.c
pcre_refcount.c
pcre_study.c
pcre_tables.c
pcre_try_flipped.c
pcre_ucp_findchar.c
pcre_ucp_searchfuncs.c
pcre_valid_utf8.c
pcre_version.c
pcre_xclass.c
@@ -88,7 +77,7 @@ shared libraries, you may have to do this once for each type.
cl -DSUPPORT_UTF8 -DSUPPORT_UCP -DPOSIX_MALLOC_THRESHOLD=10 /c maketables.c get.c study.c pcre.c
lib /OUT:pcre.lib maketables.obj get.obj study.obj pcre.obj
(5) Similarly, compile pcreposix.c and link it (on its own) as the pcreposix
(4) Similarly, compile pcreposix.c and link it (on its own) as the pcreposix
library.
rem Mark Tetrode's commands, for a static library
@@ -96,14 +85,14 @@ library.
cl -DSUPPORT_UTF8 -DSUPPORT_UCP -DPOSIX_MALLOC_THRESHOLD=10 /c pcreposix.c
lib /OUT:pcreposix.lib pcreposix.obj
(6) Compile the test program pcretest.c. This needs the functions in the
(5) Compile the test program pcretest.c. This needs the functions in the
pcre and pcreposix libraries when linking.
rem Mark Tetrode's commands
rem compile & link
cl /F0x400000 pcretest.c pcre.lib pcreposix.lib
(7) Run pcretest on the testinput files in the testdata directory, and check
(6) Run pcretest on the testinput files in the testdata directory, and check
that the output matches the corresponding testoutput files. You must use the
-i option when checking testinput2. Note that the supplied files are in Unix
format, with just LF characters as line terminators. You may need to edit them
@@ -126,6 +115,9 @@ to change this if your system uses a different convention.
Note that there are now three more tests (7, 8, 9) that did not exist when Mark
wrote those comments. The test the new pcre_dfa_exec() function.
(7) If you want to use the pcregrep command, compile and link pcregrep.c; it
uses only the basic PCRE library.
THE C++ WRAPPER FUNCTIONS

View File

@@ -34,7 +34,7 @@ Documentation for PCRE
----------------------
If you install PCRE in the normal way, you will end up with an installed set of
man pages whose names all start with "pcre". The one that is called "pcre"
man pages whose names all start with "pcre". The one that is just called "pcre"
lists all the others. In addition to these man pages, the PCRE documentation is
supplied in two other forms; however, as there is no standard place to install
them, they are left in the doc directory of the unpacked source distribution.
@@ -68,6 +68,9 @@ others are pointers to URLs containing relevant files.
Building PCRE on a Unix-like system
-----------------------------------
If you are using HP's ANSI C++ compiler (aCC), please see the special note
in the section entitled "Using HP's ANSI C++ compiler (aCC)" below.
To build PCRE on a Unix-like system, first run the "configure" command from the
PCRE distribution directory, with your current directory set to the directory
where you want the files to be created. This command is a standard GNU
@@ -91,6 +94,10 @@ into /source/pcre/pcre-xxx, but you want to build it in /build/pcre/pcre-xxx:
cd /build/pcre/pcre-xxx
/source/pcre/pcre-xxx/configure
PCRE is written in C and is normally compiled as a C library. However, it is
possible to build it as a C++ library, though the provided building apparatus
does not have any features to support this.
There are some optional features that can be included or omitted from the PCRE
library. You can read more about them in the pcrebuild man page.
@@ -107,15 +114,17 @@ library. You can read more about them in the pcrebuild man page.
. If, in addition to support for UTF-8 character strings, you want to include
support for the \P, \p, and \X sequences that recognize Unicode character
properties, you must add --enable-unicode-properties to the "configure"
command. This adds about 90K to the size of the library (in the form of a
command. This adds about 30K to the size of the library (in the form of a
property table); only the basic two-letter properties such as Lu are
supported.
. You can build PCRE to recognized CR or NL as the newline character, instead
of whatever your compiler uses for "\n", by adding --newline-is-cr or
--newline-is-nl to the "configure" command, respectively. Only do this if you
really understand what you are doing. On traditional Unix-like systems, the
newline character is NL.
. You can build PCRE to recognize either CR or LF or the sequence CRLF as
indicating the end of a line. Whatever you specify at build time is the
default; the caller of PCRE can change the selection at run time. The default
newline indicator is a single LF character (the Unix standard). You can
specify the default newline indicator by adding --newline-is-cr or
--newline-is-lf or --newline-is-crlf to the "configure" command,
respectively.
. When called via the POSIX interface, PCRE uses malloc() to get additional
storage for processing capturing parentheses if there are more than 10 of
@@ -135,6 +144,16 @@ library. You can read more about them in the pcrebuild man page.
pcre_exec() can supply their own value. There is discussion on the pcreapi
man page.
. There is a separate counter that limits the depth of recursive function calls
during a matching process. This also has a default of ten million, which is
essentially "unlimited". You can change the default by setting, for example,
--with-match-limit-recursion=500000
Recursive function calls use up the runtime stack; running out of stack can
cause programs to crash in strange ways. There is a discussion about stack
sizes in the pcrestack man page.
. The default maximum compiled pattern size is around 64K. You can increase
this by adding --with-link-size=3 to the "configure" command. You can
increase it even more by setting --with-link-size=4, but this is unlikely
@@ -158,7 +177,6 @@ library. You can read more about them in the pcrebuild man page.
The "configure" script builds eight files for the basic C library:
. pcre.h is the header file for C programs that call PCRE
. Makefile is the makefile that builds the library
. config.h contains build-time configuration options for the library
. pcre-config is a script that shows the settings of "configure" options
@@ -262,6 +280,22 @@ when calling the "configure" command. If they are not specified, they default
to the values of CC and CFLAGS.
Using HP's ANSI C++ compiler (aCC)
----------------------------------
Unless C++ support is disabled by specifiying the "--disable-cpp" option of the
"configure" script, you *must* include the "-AA" option in the CXXFLAGS
environment variable in order for the C++ components to compile correctly.
Also, note that the aCC compiler on PA-RISC platforms may have a defect whereby
needed libraries fail to get included when specifying the "-AA" compiler
option. If you experience unresolved symbols when linking the C++ programs,
use the workaround of specifying the following environment variable prior to
running the "configure" script:
CXXLDFLAGS="-lstd_v2 -lCsup_v2"
Building on non-Unix systems
----------------------------
@@ -409,28 +443,28 @@ The distribution should contain the following files:
pcre_info.c )
pcre_maketables.c )
pcre_ord2utf8.c )
pcre_printint.c )
pcre_refcount.c )
pcre_study.c )
pcre_tables.c )
pcre_try_flipped.c )
pcre_ucp_findchar.c )
pcre_ucp_searchfuncs.c)
pcre_valid_utf8.c )
pcre_version.c )
pcre_xclass.c )
ucp_findchar.c )
ucp.h ) source for the code that is used for
ucpinternal.h ) Unicode property handling
ucptable.c )
ucptypetable.c )
pcre.in "source" for the header for the external API; pcre.h
is built from this by "configure"
pcre_printint.src ) debugging function that is #included in pcretest, and
) can also be #included in pcre_compile()
pcre.h the public PCRE header file
pcreposix.h header for the external POSIX wrapper API
pcre_internal.h header for internal use
ucp.h ) headers concerned with
ucpinternal.h ) Unicode property handling
config.in template for config.h, which is built by configure
pcrecpp.h.in "source" for the header file for the C++ wrapper
pcrecpp.h the header file for the C++ wrapper
pcrecpparg.h.in "source" for another C++ header file
pcrecpp.cc )
pcre_scanner.cc ) source for the C++ wrapper library
@@ -453,8 +487,9 @@ The distribution should contain the following files:
RunGrepTest.in template for a Unix shell script for pcregrep tests
config.guess ) files used by libtool,
config.sub ) used only when building a shared library
config.h.in "source" for the config.h header file
configure a configuring shell script (built by autoconf)
configure.in the autoconf input used to build configure
configure.ac the autoconf input used to build configure
doc/Tech.Notes notes on the encoding
doc/*.3 man page sources for the PCRE functions
doc/*.1 man page sources for pcregrep and pcretest
@@ -482,7 +517,6 @@ The distribution should contain the following files:
libpcre.def
libpcreposix.def
pcre.def
(D) Auxiliary file for VPASCAL
@@ -491,4 +525,4 @@ The distribution should contain the following files:
Philip Hazel
Email local part: ph10
Email domain: cam.ac.uk
August 2005
June 2006

View File

@@ -1,15 +1,16 @@
/* On Unix systems config.in is converted by configure into config.h. PCRE is
written in Standard C, but there are a few non-standard things it can cope
with, allowing it to run on SunOS4 and other "close to standard" systems.
/* On Unix-like systems config.in is converted by "configure" into config.h.
Some other environments also support the use of "configure". PCRE is written in
Standard C, but there are a few non-standard things it can cope with, allowing
it to run on SunOS4 and other "close to standard" systems.
On a non-Unix system you should just copy this file into config.h, and set up
the macros the way you need them. You should normally change the definitions of
HAVE_STRERROR and HAVE_MEMMOVE to 1. Unfortunately, because of the way autoconf
works, these cannot be made the defaults. If your system has bcopy() and not
memmove(), change the definition of HAVE_BCOPY instead of HAVE_MEMMOVE. If your
system has neither bcopy() nor memmove(), leave them both as 0; an emulation
function will be used. */
On a non-Unix-like system you should just copy this file into config.h, and set
up the macros the way you need them. You should normally change the definitions
of HAVE_STRERROR and HAVE_MEMMOVE to 1. Unfortunately, because of the way
autoconf works, these cannot be made the defaults. If your system has bcopy()
and not memmove(), change the definition of HAVE_BCOPY instead of HAVE_MEMMOVE.
If your system has neither bcopy() nor memmove(), leave them both as 0; an
emulation function will be used. */
/* If you are compiling for a system that uses EBCDIC instead of ASCII
character codes, define this macro as 1. On systems that can use "configure",
@@ -19,76 +20,70 @@ this can be done via --enable-ebcdic. */
#define EBCDIC 0
#endif
/* If you are compiling for a system that needs some magic to be inserted
before the definition of an exported function, define this macro to contain the
relevant magic. It apears at the start of every exported function. */
/* If you are compiling for a system other than a Unix-like system or Win32,
and it needs some magic to be inserted before the definition of a function that
is exported by the library, define this macro to contain the relevant magic. If
you do not define this macro, it defaults to "extern" for a C compiler and
"extern C" for a C++ compiler on non-Win32 systems. This macro apears at the
start of every exported function that is part of the external API. It does not
appear on functions that are "external" in the C sense, but which are internal
to the library. */
#define PCRE_EXPORT
/* #define PCRE_DATA_SCOPE */
/* Define to empty if the "const" keyword does not work. */
/* Define the following macro to empty if the "const" keyword does not work. */
#undef const
/* Define to "unsigned" if <stddef.h> doesn't define size_t. */
/* Define the following macro to "unsigned" if <stddef.h> does not define
size_t. */
#undef size_t
/* The following two definitions are mainly for the benefit of SunOS4, which
doesn't have the strerror() or memmove() functions that should be present in
does not have the strerror() or memmove() functions that should be present in
all Standard C libraries. The macros HAVE_STRERROR and HAVE_MEMMOVE should
normally be defined with the value 1 for other systems, but unfortunately we
can't make this the default because "configure" files generated by autoconf
cannot make this the default because "configure" files generated by autoconf
will only change 0 to 1; they won't change 1 to 0 if the functions are not
found. */
#define HAVE_STRERROR 0
#define HAVE_MEMMOVE 0
/* There are some non-Unix systems that don't even have bcopy(). If this macro
is false, an emulation is used. If HAVE_MEMMOVE is set to 1, the value of
/* There are some non-Unix-like systems that don't even have bcopy(). If this
macro is false, an emulation is used. If HAVE_MEMMOVE is set to 1, the value of
HAVE_BCOPY is not relevant. */
#define HAVE_BCOPY 0
/* The value of NEWLINE determines the newline character. The default is to
leave it up to the compiler, but some sites want to force a particular value.
On Unix systems, "configure" can be used to override this default. */
On Unix-like systems, "configure" can be used to override this default. */
#ifndef NEWLINE
#define NEWLINE '\n'
#endif
/* The value of LINK_SIZE determines the number of bytes used to store
links as offsets within the compiled regex. The default is 2, which allows for
compiled patterns up to 64K long. This covers the vast majority of cases.
However, PCRE can also be compiled to use 3 or 4 bytes instead. This allows for
longer patterns in extreme cases. On Unix systems, "configure" can be used to
override this default. */
/* The value of LINK_SIZE determines the number of bytes used to store links as
offsets within the compiled regex. The default is 2, which allows for compiled
patterns up to 64K long. This covers the vast majority of cases. However, PCRE
can also be compiled to use 3 or 4 bytes instead. This allows for longer
patterns in extreme cases. On systems that support it, "configure" can be used
to override this default. */
#ifndef LINK_SIZE
#define LINK_SIZE 2
#endif
/* The value of MATCH_LIMIT determines the default number of times the match()
function can be called during a single execution of pcre_exec(). (There is a
runtime method of setting a different limit.) The limit exists in order to
catch runaway regular expressions that take for ever to determine that they do
not match. The default is set very large so that it does not accidentally catch
legitimate cases. On Unix systems, "configure" can be used to override this
default default. */
#ifndef MATCH_LIMIT
#define MATCH_LIMIT 10000000
#endif
/* When calling PCRE via the POSIX interface, additional working storage is
required for holding the pointers to capturing substrings because PCRE requires
three integers per substring, whereas the POSIX interface provides only two. If
the number of expected substrings is small, the wrapper function uses space on
the stack, because this is faster than using malloc() for each call. The
threshold above which the stack is no longer use is defined by POSIX_MALLOC_
THRESHOLD. On Unix systems, "configure" can be used to override this default.
*/
threshold above which the stack is no longer used is defined by POSIX_MALLOC_
THRESHOLD. On systems that support it, "configure" can be used to override this
default. */
#ifndef POSIX_MALLOC_THRESHOLD
#define POSIX_MALLOC_THRESHOLD 10
@@ -97,11 +92,52 @@ THRESHOLD. On Unix systems, "configure" can be used to override this default.
/* PCRE uses recursive function calls to handle backtracking while matching.
This can sometimes be a problem on systems that have stacks of limited size.
Define NO_RECURSE to get a version that doesn't use recursion in the match()
function; instead it creates its own stack by steam using pcre_recurse_malloc
to get memory. For more detail, see comments and other stuff just above the
match() function. On Unix systems, "configure" can be used to set this in the
Makefile (use --disable-stack-for-recursion). */
function; instead it creates its own stack by steam using pcre_recurse_malloc()
to obtain memory from the heap. For more detail, see the comments and other
stuff just above the match() function. On systems that support it, "configure"
can be used to set this in the Makefile (use --disable-stack-for-recursion). */
/* #define NO_RECURSE */
/* The value of MATCH_LIMIT determines the default number of times the internal
match() function can be called during a single execution of pcre_exec(). There
is a runtime interface for setting a different limit. The limit exists in order
to catch runaway regular expressions that take for ever to determine that they
do not match. The default is set very large so that it does not accidentally
catch legitimate cases. On systems that support it, "configure" can be used to
override this default default. */
#ifndef MATCH_LIMIT
#define MATCH_LIMIT 10000000
#endif
/* The above limit applies to all calls of match(), whether or not they
increase the recursion depth. In some environments it is desirable to limit the
depth of recursive calls of match() more strictly, in order to restrict the
maximum amount of stack (or heap, if NO_RECURSE is defined) that is used. The
value of MATCH_LIMIT_RECURSION applies only to recursive calls of match(). To
have any useful effect, it must be less than the value of MATCH_LIMIT. There is
a runtime method for setting a different limit. On systems that support it,
"configure" can be used to override this default default. */
#ifndef MATCH_LIMIT_RECURSION
#define MATCH_LIMIT_RECURSION MATCH_LIMIT
#endif
/* These three limits are parameterized just in case anybody ever wants to
change them. Care must be taken if they are increased, because they guard
against integer overflow caused by enormously large patterns. */
#ifndef MAX_NAME_SIZE
#define MAX_NAME_SIZE 32
#endif
#ifndef MAX_NAME_COUNT
#define MAX_NAME_COUNT 10000
#endif
#ifndef MAX_DUPLENGTH
#define MAX_DUPLENGTH 30000
#endif
/* End */

52
libpcre/configure vendored
View File

@@ -272,6 +272,7 @@ PACKAGE_STRING=
PACKAGE_BUGREPORT=
ac_unique_file="dftables.c"
ac_unique_file="pcre.h"
# Factoring default headers for most tests.
ac_includes_default="\
#include <stdio.h>
@@ -309,7 +310,7 @@ ac_includes_default="\
# include <unistd.h>
#endif"
ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC EXEEXT OBJEXT RANLIB ac_ct_RANLIB CPP EGREP pcre_have_long_long pcre_have_ulong_long build build_cpu build_vendor build_os host host_cpu host_vendor host_os BUILD_EXEEXT BUILD_OBJEXT CC_FOR_BUILD CFLAGS_FOR_BUILD EBCDIC HAVE_MEMMOVE HAVE_STRERROR LINK_SIZE MATCH_LIMIT NEWLINE NO_RECURSE PCRE_MAJOR PCRE_MINOR PCRE_DATE PCRE_VERSION PCRE_LIB_VERSION PCRE_POSIXLIB_VERSION POSIX_MALLOC_THRESHOLD UCP UTF8 POSIX_OBJ POSIX_LOBJ POSIX_LIB LIBOBJS LTLIBOBJS'
ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC EXEEXT OBJEXT RANLIB ac_ct_RANLIB CPP EGREP pcre_have_long_long pcre_have_ulong_long build build_cpu build_vendor build_os host host_cpu host_vendor host_os BUILD_EXEEXT BUILD_OBJEXT CC_FOR_BUILD CFLAGS_FOR_BUILD CXXLDFLAGS EBCDIC HAVE_MEMMOVE HAVE_STRERROR LINK_SIZE MATCH_LIMIT MATCH_LIMIT_RECURSION NEWLINE NO_RECURSE PCRE_LIB_VERSION PCRE_POSIXLIB_VERSION PCRE_VERSION POSIX_MALLOC_THRESHOLD UCP UTF8 POSIX_OBJ POSIX_LOBJ POSIX_LIB LIBOBJS LTLIBOBJS'
ac_subst_files=''
# Initialize some variables set by options.
@@ -848,15 +849,17 @@ Optional Features:
--enable-unicode-properties enable Unicode properties support
--enable-newline-is-cr use CR as the newline character
--enable-newline-is-lf use LF as the newline character
--enable-newline-is-crlf use CRLF as the newline sequence
--enable-ebcdic assume EBCDIC coding rather than ASCII
--disable-stack-for-recursion disable use of stack recursion when matching
Optional Packages:
--with-PACKAGE[=ARG] use PACKAGE [ARG=yes]
--without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no)
--with-posix-malloc-threshold=5 threshold for POSIX malloc usage
--with-posix-malloc-threshold=10 threshold for POSIX malloc usage
--with-link-size=2 internal link size (2, 3, or 4 allowed)
--with-match-limit=10000000 default limit on internal looping)
--with-match-limit=10000000 default limit on internal looping
--with-match-limit-recursion=10000000 default limit on internal recursion
Some influential environment variables:
CC C compiler command
@@ -1307,18 +1310,13 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
ac_config_headers="$ac_config_headers config.h"
PCRE_MAJOR=6
PCRE_MINOR=4
PCRE_DATE=05-Sep-2005
PCRE_VERSION=${PCRE_MAJOR}.${PCRE_MINOR}
POSIX_MALLOC_THRESHOLD=-DPOSIX_MALLOC_THRESHOLD=10
@@ -1326,6 +1324,12 @@ PCRE_LIB_VERSION=0:1:0
PCRE_POSIXLIB_VERSION=0:0:0
PCRE_MAJOR=`grep '#define PCRE_MAJOR' ${srcdir}/pcre.h | cut -c 29-`
PCRE_MINOR=`grep '#define PCRE_MINOR' ${srcdir}/pcre.h | cut -c 29-`
PCRE_PRERELEASE=`grep '#define PCRE_PRERELEASE' ${srcdir}/pcre.h | cut -c 29-`
PCRE_VERSION=${PCRE_MAJOR}.${PCRE_MINOR}${PCRE_PRERELEASE}
ac_ext=c
ac_cpp='$CPP $CPPFLAGS'
ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
@@ -3447,6 +3451,16 @@ fi
fi;
# Check whether --enable-newline-is-crlf or --disable-newline-is-crlf was given.
if test "${enable_newline_is_crlf+set}" = set; then
enableval="$enable_newline_is_crlf"
if test "$enableval" = "yes"; then
NEWLINE=-DNEWLINE=3338
fi
fi;
# Check whether --enable-ebcdic or --disable-ebcdic was given.
if test "${enable_ebcdic+set}" = set; then
enableval="$enable_ebcdic"
@@ -3495,6 +3509,15 @@ if test "${with_match_limit+set}" = set; then
fi;
# Check whether --with-match-limit-recursion or --without-match-limit-recursion was given.
if test "${with_match_limit_recursion+set}" = set; then
withval="$with_match_limit_recursion"
MATCH_LIMIT_RECURSION=-DMATCH_LIMIT_RECURSION=$withval
fi;
if test "$UCP" != "" ; then
UTF8=-DSUPPORT_UTF8
fi
@@ -3615,7 +3638,6 @@ esac
if test "x$enable_shared" = "xno" ; then
@@ -3626,7 +3648,7 @@ _ACEOF
fi
ac_config_files="$ac_config_files Makefile pcre.h:pcre.h.in"
ac_config_files="$ac_config_files Makefile"
cat >confcache <<\_ACEOF
# This file is a shell script that caches the results of configure
# tests run on this system so they can be shared between configure
@@ -4152,7 +4174,6 @@ do
case "$ac_config_target" in
# Handling of arguments.
"Makefile" ) CONFIG_FILES="$CONFIG_FILES Makefile" ;;
"pcre.h" ) CONFIG_FILES="$CONFIG_FILES pcre.h:pcre.h.in" ;;
"config.h" ) CONFIG_HEADERS="$CONFIG_HEADERS config.h" ;;
*) { { echo "$as_me:$LINENO: error: invalid argument: $ac_config_target" >&5
echo "$as_me: error: invalid argument: $ac_config_target" >&2;}
@@ -4263,19 +4284,18 @@ s,@BUILD_EXEEXT@,$BUILD_EXEEXT,;t t
s,@BUILD_OBJEXT@,$BUILD_OBJEXT,;t t
s,@CC_FOR_BUILD@,$CC_FOR_BUILD,;t t
s,@CFLAGS_FOR_BUILD@,$CFLAGS_FOR_BUILD,;t t
s,@CXXLDFLAGS@,$CXXLDFLAGS,;t t
s,@EBCDIC@,$EBCDIC,;t t
s,@HAVE_MEMMOVE@,$HAVE_MEMMOVE,;t t
s,@HAVE_STRERROR@,$HAVE_STRERROR,;t t
s,@LINK_SIZE@,$LINK_SIZE,;t t
s,@MATCH_LIMIT@,$MATCH_LIMIT,;t t
s,@MATCH_LIMIT_RECURSION@,$MATCH_LIMIT_RECURSION,;t t
s,@NEWLINE@,$NEWLINE,;t t
s,@NO_RECURSE@,$NO_RECURSE,;t t
s,@PCRE_MAJOR@,$PCRE_MAJOR,;t t
s,@PCRE_MINOR@,$PCRE_MINOR,;t t
s,@PCRE_DATE@,$PCRE_DATE,;t t
s,@PCRE_VERSION@,$PCRE_VERSION,;t t
s,@PCRE_LIB_VERSION@,$PCRE_LIB_VERSION,;t t
s,@PCRE_POSIXLIB_VERSION@,$PCRE_POSIXLIB_VERSION,;t t
s,@PCRE_VERSION@,$PCRE_VERSION,;t t
s,@POSIX_MALLOC_THRESHOLD@,$POSIX_MALLOC_THRESHOLD,;t t
s,@UCP@,$UCP,;t t
s,@UTF8@,$UTF8,;t t

View File

@@ -13,27 +13,16 @@ dnl This is required at the start; the name is the name of a file
dnl it should be seeing, to verify it is in the same directory.
AC_INIT(dftables.c)
AC_CONFIG_SRCDIR([pcre.h])
dnl A safety precaution
AC_PREREQ(2.57)
dnl Arrange to build config.h from config.h.in. Note that pcre.h is
dnl built differently, as it is just a "substitution" file.
dnl Arrange to build config.h from config.h.in.
dnl Manual says this macro should come right after AC_INIT.
AC_CONFIG_HEADER(config.h)
dnl Provide the current PCRE version information. Do not use numbers
dnl with leading zeros for the minor version, as they end up in a C
dnl macro, and may be treated as octal constants. Stick to single
dnl digits for minor numbers less than 10. There are unlikely to be
dnl that many releases anyway.
PCRE_MAJOR=6
PCRE_MINOR=4
PCRE_DATE=05-Sep-2005
PCRE_VERSION=${PCRE_MAJOR}.${PCRE_MINOR}
dnl Default values for miscellaneous macros
POSIX_MALLOC_THRESHOLD=-DPOSIX_MALLOC_THRESHOLD=10
@@ -44,6 +33,14 @@ dnl are built by default on Unix systems.
PCRE_LIB_VERSION=0:1:0
PCRE_POSIXLIB_VERSION=0:0:0
dnl Find the PCRE version from the pcre.h file. The PCRE_VERSION variable is
dnl substituted in pcre-config.in.
PCRE_MAJOR=`grep '#define PCRE_MAJOR' ${srcdir}/pcre.h | cut -c 29-`
PCRE_MINOR=`grep '#define PCRE_MINOR' ${srcdir}/pcre.h | cut -c 29-`
PCRE_PRERELEASE=`grep '#define PCRE_PRERELEASE' ${srcdir}/pcre.h | cut -c 29-`
PCRE_VERSION=${PCRE_MAJOR}.${PCRE_MINOR}${PCRE_PRERELEASE}
dnl Checks for programs.
AC_PROG_CC
@@ -120,6 +117,15 @@ if test "$enableval" = "yes"; then
fi
)
dnl Handle --enable-newline-is-crlf
AC_ARG_ENABLE(newline-is-crlf,
[ --enable-newline-is-crlf use CRLF as the newline sequence],
if test "$enableval" = "yes"; then
NEWLINE=-DNEWLINE=3338
fi
)
dnl Handle --enable-ebcdic
AC_ARG_ENABLE(ebcdic,
@@ -145,7 +151,7 @@ dnl I've done.
dnl Handle --with-posix-malloc-threshold=n
AC_ARG_WITH(posix-malloc-threshold,
[ --with-posix-malloc-threshold=5 threshold for POSIX malloc usage],
[ --with-posix-malloc-threshold=10 threshold for POSIX malloc usage],
POSIX_MALLOC_THRESHOLD=-DPOSIX_MALLOC_THRESHOLD=$withval
)
@@ -156,13 +162,20 @@ AC_ARG_WITH(link-size,
LINK_SIZE=-DLINK_SIZE=$withval
)
dnl Handle --with-match_limit=n
dnl Handle --with-match-limit=n
AC_ARG_WITH(match-limit,
[ --with-match-limit=10000000 default limit on internal looping)],
[ --with-match-limit=10000000 default limit on internal looping],
MATCH_LIMIT=-DMATCH_LIMIT=$withval
)
dnl Handle --with-match-limit_recursion=n
AC_ARG_WITH(match-limit-recursion,
[ --with-match-limit-recursion=10000000 default limit on internal recursion],
MATCH_LIMIT_RECURSION=-DMATCH_LIMIT_RECURSION=$withval
)
dnl Unicode character property support implies UTF-8 support
if test "$UCP" != "" ; then
@@ -187,19 +200,18 @@ AC_SUBST(BUILD_EXEEXT)
AC_SUBST(BUILD_OBJEXT)
AC_SUBST(CC_FOR_BUILD)
AC_SUBST(CFLAGS_FOR_BUILD)
AC_SUBST(CXXLDFLAGS)
AC_SUBST(EBCDIC)
AC_SUBST(HAVE_MEMMOVE)
AC_SUBST(HAVE_STRERROR)
AC_SUBST(LINK_SIZE)
AC_SUBST(MATCH_LIMIT)
AC_SUBST(MATCH_LIMIT_RECURSION)
AC_SUBST(NEWLINE)
AC_SUBST(NO_RECURSE)
AC_SUBST(PCRE_MAJOR)
AC_SUBST(PCRE_MINOR)
AC_SUBST(PCRE_DATE)
AC_SUBST(PCRE_VERSION)
AC_SUBST(PCRE_LIB_VERSION)
AC_SUBST(PCRE_POSIXLIB_VERSION)
AC_SUBST(PCRE_VERSION)
AC_SUBST(POSIX_MALLOC_THRESHOLD)
AC_SUBST(UCP)
AC_SUBST(UTF8)
@@ -214,4 +226,4 @@ if test "x$enable_shared" = "xno" ; then
fi
dnl This must be last; it determines what files are written as well as config.h
AC_OUTPUT(Makefile pcre.h:pcre.h.in)
AC_OUTPUT(Makefile )

View File

@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2005 University of Cambridge
Copyright (c) 1997-2006 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without

View File

@@ -2,8 +2,8 @@
* Perl-Compatible Regular Expressions *
*************************************************/
/* In its original form, this is the .in file that is transformed by
"configure" into pcre.h.
/* This is the public header file for the PCRE library, to be #included by
applications that call the PCRE functions.
Copyright (c) 1997-2005 University of Cambridge
@@ -39,17 +39,40 @@ POSSIBILITY OF SUCH DAMAGE.
#ifndef _PCRE_H
#define _PCRE_H
/* The file pcre.h is build by "configure". Do not edit it; instead
make changes to pcre.in. */
/* The current PCRE version information. */
/* NOTES FOR FUTURE MAINTAINERS: Do not use numbers with leading zeros, because
they may be treated as octal constants. The PCRE_PRERELEASE feature is for
identifying release candidates. It might be defined as -RC2, for example. In
real releases, it should be defined empty. Do not change the alignment of these
statments. The code in ./configure greps out the version numbers by using "cut"
to get values from column 29 onwards. These are substituted into pcre-config
and libpcre.pc. The values are not put into configure.ac and substituted here
(which would simplify this issue) because that makes life harder for those who
cannot run ./configure. As it now stands, this file need not be edited in that
circumstance. */
#define PCRE_MAJOR 6
#define PCRE_MINOR 3
#define PCRE_DATE 15-Aug-2005
#define PCRE_MINOR 7
#define PCRE_PRERELEASE
#define PCRE_DATE 04-Jul-2006
/* Win32 uses DLL by default; it needs special stuff for exported functions. */
/* Removed some defines here as I always compile staticly */
/* Win32 uses DLL by default; it needs special stuff for exported functions
when building PCRE. */
/* For other operating systems, we use the standard "extern". */
#ifdef _WIN32
# ifdef PCRE_DEFINITION
# ifdef DLL_EXPORT
# define PCRE_DATA_SCOPE __declspec(dllexport)
# endif
# else
# ifndef PCRE_STATIC
# define PCRE_DATA_SCOPE extern __declspec(dllimport)
# endif
# endif
#endif
/* Otherwise, we use the standard "extern". */
#ifndef PCRE_DATA_SCOPE
# ifdef __cplusplus
@@ -91,6 +114,10 @@ extern "C" {
#define PCRE_DFA_SHORTEST 0x00010000
#define PCRE_DFA_RESTART 0x00020000
#define PCRE_FIRSTLINE 0x00040000
#define PCRE_DUPNAMES 0x00080000
#define PCRE_NEWLINE_CR 0x00100000
#define PCRE_NEWLINE_LF 0x00200000
#define PCRE_NEWLINE_CRLF 0x00300000
/* Exec-time and get/set-time error codes */
@@ -114,6 +141,7 @@ extern "C" {
#define PCRE_ERROR_DFA_UMLIMIT (-18)
#define PCRE_ERROR_DFA_WSSIZE (-19)
#define PCRE_ERROR_DFA_RECURSE (-20)
#define PCRE_ERROR_RECURSIONLIMIT (-21)
/* Request types for pcre_fullinfo() */
@@ -131,7 +159,8 @@ extern "C" {
#define PCRE_INFO_STUDYSIZE 10
#define PCRE_INFO_DEFAULT_TABLES 11
/* Request types for pcre_config() */
/* Request types for pcre_config(). Do not re-arrange, in order to remain
compatible. */
#define PCRE_CONFIG_UTF8 0
#define PCRE_CONFIG_NEWLINE 1
@@ -140,19 +169,30 @@ extern "C" {
#define PCRE_CONFIG_MATCH_LIMIT 4
#define PCRE_CONFIG_STACKRECURSE 5
#define PCRE_CONFIG_UNICODE_PROPERTIES 6
#define PCRE_CONFIG_MATCH_LIMIT_RECURSION 7
/* Bit flags for the pcre_extra structure */
/* Bit flags for the pcre_extra structure. Do not re-arrange or redefine
these bits, just add new ones on the end, in order to remain compatible. */
#define PCRE_EXTRA_STUDY_DATA 0x0001
#define PCRE_EXTRA_MATCH_LIMIT 0x0002
#define PCRE_EXTRA_CALLOUT_DATA 0x0004
#define PCRE_EXTRA_TABLES 0x0008
#define PCRE_EXTRA_STUDY_DATA 0x0001
#define PCRE_EXTRA_MATCH_LIMIT 0x0002
#define PCRE_EXTRA_CALLOUT_DATA 0x0004
#define PCRE_EXTRA_TABLES 0x0008
#define PCRE_EXTRA_MATCH_LIMIT_RECURSION 0x0010
/* Types */
struct real_pcre; /* declaration; the definition is private */
typedef struct real_pcre pcre;
/* When PCRE is compiled as a C++ library, the subject pointer type can be
replaced with a custom type. For conventional use, the public interface is a
const char *. */
#ifndef PCRE_SPTR
#define PCRE_SPTR const char *
#endif
/* The structure for passing additional data to pcre_exec(). This is defined in
such as way as to be extensible. Always add new fields at the end, in order to
remain compatible. */
@@ -163,6 +203,7 @@ typedef struct pcre_extra {
unsigned long int match_limit; /* Maximum number of calls to match() */
void *callout_data; /* Data passed back in callouts */
const unsigned char *tables; /* Pointer to character tables */
unsigned long int match_limit_recursion; /* Max recursive calls to match() */
} pcre_extra;
/* The structure for passing out data via the pcre_callout_function. We use a
@@ -175,7 +216,7 @@ typedef struct pcre_callout_block {
/* ------------------------ Version 0 ------------------------------- */
int callout_number; /* Number compiled into pattern */
int *offset_vector; /* The offset vector */
const char *subject; /* The subject being matched */
PCRE_SPTR subject; /* The subject being matched */
int subject_length; /* The length of the subject */
int start_match; /* Offset to start of this match attempt */
int current_position; /* Where we currently are in the subject */
@@ -221,7 +262,7 @@ PCRE_DATA_SCOPE int pcre_copy_substring(const char *, int *, int, int, char *,
int);
PCRE_DATA_SCOPE int pcre_dfa_exec(const pcre *, const pcre_extra *,
const char *, int, int, int, int *, int , int *, int);
PCRE_DATA_SCOPE int pcre_exec(const pcre *, const pcre_extra *, const char *,
PCRE_DATA_SCOPE int pcre_exec(const pcre *, const pcre_extra *, PCRE_SPTR,
int, int, int, int *, int);
PCRE_DATA_SCOPE void pcre_free_substring(const char *);
PCRE_DATA_SCOPE void pcre_free_substring_list(const char **);
@@ -230,6 +271,8 @@ PCRE_DATA_SCOPE int pcre_fullinfo(const pcre *, const pcre_extra *, int,
PCRE_DATA_SCOPE int pcre_get_named_substring(const pcre *, const char *,
int *, int, const char *, const char **);
PCRE_DATA_SCOPE int pcre_get_stringnumber(const pcre *, const char *);
PCRE_DATA_SCOPE int pcre_get_stringtable_entries(const pcre *, const char *,
char **, char **);
PCRE_DATA_SCOPE int pcre_get_substring(const char *, int *, int, int,
const char **);
PCRE_DATA_SCOPE int pcre_get_substring_list(const char *, int *, int,

File diff suppressed because it is too large Load Diff

View File

@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2005 University of Cambridge
Copyright (c) 1997-2006 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -58,7 +58,7 @@ Arguments:
Returns: 0 if data returned, negative on error
*/
PCRE_EXPORT int
PCRE_DATA_SCOPE int
pcre_config(int what, void *where)
{
switch (what)
@@ -95,6 +95,10 @@ switch (what)
*((unsigned int *)where) = MATCH_LIMIT;
break;
case PCRE_CONFIG_MATCH_LIMIT_RECURSION:
*((unsigned int *)where) = MATCH_LIMIT_RECURSION;
break;
case PCRE_CONFIG_STACKRECURSE:
#ifdef NO_RECURSE
*((int *)where) = 0;

View File

@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2005 University of Cambridge
Copyright (c) 1997-2006 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -43,6 +43,7 @@ alternative matching function that uses a DFA algorithm. This is NOT Perl-
compatible, but it has advantages in certain applications. */
#define NLBLOCK md /* The block containing newline information */
#include "pcre_internal.h"
@@ -288,7 +289,9 @@ const uschar *start_subject = md->start_subject;
const uschar *end_subject = md->end_subject;
const uschar *start_code = md->start_code;
#ifdef SUPPORT_UTF8
BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;
#endif
rlevel++;
offsetcount &= (-2);
@@ -421,7 +424,8 @@ ptr = current_subject;
for (;;)
{
int i, j;
int c, d, clen, dlen;
int clen, dlen;
unsigned int c, d;
/* Make the new state list into the active state list and empty the
new state list. */
@@ -480,7 +484,7 @@ for (;;)
const uschar *code;
int state_offset = current_state->offset;
int count, codevalue;
int chartype, othercase;
int chartype, script;
#ifdef DEBUG
printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
@@ -645,7 +649,10 @@ for (;;)
/*-----------------------------------------------------------------*/
case OP_CIRC:
if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||
((ims & PCRE_MULTILINE) != 0 && ptr[-1] == NEWLINE))
((ims & PCRE_MULTILINE) != 0 &&
ptr >= start_subject + md->nllen &&
ptr != end_subject &&
IS_NEWLINE(ptr - md->nllen)))
{ ADD_ACTIVE(state_offset + 1, 0); }
break;
@@ -679,13 +686,16 @@ for (;;)
/*-----------------------------------------------------------------*/
case OP_ANY:
if (clen > 0 && (c != NEWLINE || (ims & PCRE_DOTALL) != 0))
if (clen > 0 && ((ims & PCRE_DOTALL) != 0 ||
ptr > end_subject - md->nllen ||
!IS_NEWLINE(ptr)))
{ ADD_NEW(state_offset + 1, 0); }
break;
/*-----------------------------------------------------------------*/
case OP_EODN:
if (clen == 0 || (c == NEWLINE && ptr + 1 == end_subject))
if (clen == 0 ||
(ptr == end_subject - md->nllen && IS_NEWLINE(ptr)))
{ ADD_ACTIVE(state_offset + 1, 0); }
break;
@@ -693,11 +703,14 @@ for (;;)
case OP_DOLL:
if ((md->moptions & PCRE_NOTEOL) == 0)
{
if (clen == 0 || (c == NEWLINE && (ptr + 1 == end_subject ||
(ims & PCRE_MULTILINE) != 0)))
if (clen == 0 ||
(ptr <= end_subject - md->nllen && IS_NEWLINE(ptr) &&
((ims & PCRE_MULTILINE) != 0 || ptr == end_subject - md->nllen)
))
{ ADD_ACTIVE(state_offset + 1, 0); }
}
else if (c == NEWLINE && (ims & PCRE_MULTILINE) != 0)
else if ((ims & PCRE_MULTILINE) != 0 &&
ptr <= end_subject - md->nllen && IS_NEWLINE(ptr))
{ ADD_ACTIVE(state_offset + 1, 0); }
break;
@@ -757,19 +770,38 @@ for (;;)
case OP_NOTPROP:
if (clen > 0)
{
int rqdtype, category;
category = _pcre_ucp_findchar(c, &chartype, &othercase);
rqdtype = code[1];
if (rqdtype >= 128)
BOOL OK;
int category = _pcre_ucp_findprop(c, &chartype, &script);
switch(code[1])
{
if ((rqdtype - 128 == category) == (codevalue == OP_PROP))
{ ADD_NEW(state_offset + 2, 0); }
}
else
{
if ((rqdtype == chartype) == (codevalue == OP_PROP))
{ ADD_NEW(state_offset + 2, 0); }
case PT_ANY:
OK = TRUE;
break;
case PT_LAMP:
OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
break;
case PT_GC:
OK = category == code[2];
break;
case PT_PC:
OK = chartype == code[2];
break;
case PT_SC:
OK = script == code[2];
break;
/* Should never occur, but keep compilers from grumbling. */
default:
OK = codevalue != OP_PROP;
break;
}
if (OK == (codevalue == OP_PROP)) { ADD_NEW(state_offset + 3, 0); }
}
break;
#endif
@@ -790,7 +822,11 @@ for (;;)
{
if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
(c < 256 &&
(d != OP_ANY || c != '\n' || (ims & PCRE_DOTALL) != 0) &&
(d != OP_ANY ||
(ims & PCRE_DOTALL) != 0 ||
ptr > end_subject - md->nllen ||
!IS_NEWLINE(ptr)
) &&
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
{
count++;
@@ -807,7 +843,11 @@ for (;;)
{
if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
(c < 256 &&
(d != OP_ANY || c != '\n' || (ims & PCRE_DOTALL) != 0) &&
(d != OP_ANY ||
(ims & PCRE_DOTALL) != 0 ||
ptr > end_subject - md->nllen ||
!IS_NEWLINE(ptr)
) &&
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
{
ADD_NEW(state_offset + 2, 0);
@@ -823,7 +863,11 @@ for (;;)
{
if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
(c < 256 &&
(d != OP_ANY || c != '\n' || (ims & PCRE_DOTALL) != 0) &&
(d != OP_ANY ||
(ims & PCRE_DOTALL) != 0 ||
ptr > end_subject - md->nllen ||
!IS_NEWLINE(ptr)
) &&
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
{
ADD_NEW(state_offset, 0);
@@ -842,7 +886,11 @@ for (;;)
{
if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
(c < 256 &&
(d != OP_ANY || c != '\n' || (ims & PCRE_DOTALL) != 0) &&
(d != OP_ANY ||
(ims & PCRE_DOTALL) != 0 ||
ptr > end_subject - md->nllen ||
!IS_NEWLINE(ptr)
) &&
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
{
if (++count >= GET2(code, 1))
@@ -862,14 +910,41 @@ for (;;)
case OP_PROP_EXTRA + OP_TYPEPLUS:
case OP_PROP_EXTRA + OP_TYPEMINPLUS:
count = current_state->count; /* Already matched */
if (count > 0) { ADD_ACTIVE(state_offset + 3, 0); }
if (count > 0) { ADD_ACTIVE(state_offset + 4, 0); }
if (clen > 0)
{
int category = _pcre_ucp_findchar(c, &chartype, &othercase);
int rqdtype = code[2];
if ((d == OP_PROP) ==
(rqdtype == ((rqdtype >= 128)? (category + 128) : chartype)))
{ count++; ADD_NEW(state_offset, count); }
BOOL OK;
int category = _pcre_ucp_findprop(c, &chartype, &script);
switch(code[2])
{
case PT_ANY:
OK = TRUE;
break;
case PT_LAMP:
OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
break;
case PT_GC:
OK = category == code[3];
break;
case PT_PC:
OK = chartype == code[3];
break;
case PT_SC:
OK = script == code[3];
break;
/* Should never occur, but keep compilers from grumbling. */
default:
OK = codevalue != OP_PROP;
break;
}
if (OK == (d == OP_PROP)) { count++; ADD_NEW(state_offset, count); }
}
break;
@@ -878,7 +953,7 @@ for (;;)
case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:
count = current_state->count; /* Already matched */
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
if (clen > 0 && _pcre_ucp_findchar(c, &chartype, &othercase) != ucp_M)
if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
{
const uschar *nptr = ptr + clen;
int ncount = 0;
@@ -887,7 +962,7 @@ for (;;)
int nd;
int ndlen = 1;
GETCHARLEN(nd, nptr, ndlen);
if (_pcre_ucp_findchar(nd, &chartype, &othercase) != ucp_M) break;
if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;
ncount++;
nptr += ndlen;
}
@@ -899,7 +974,7 @@ for (;;)
/*-----------------------------------------------------------------*/
case OP_PROP_EXTRA + OP_TYPEQUERY:
case OP_PROP_EXTRA + OP_TYPEMINQUERY:
count = 3;
count = 4;
goto QS1;
case OP_PROP_EXTRA + OP_TYPESTAR:
@@ -908,14 +983,41 @@ for (;;)
QS1:
ADD_ACTIVE(state_offset + 3, 0);
ADD_ACTIVE(state_offset + 4, 0);
if (clen > 0)
{
int category = _pcre_ucp_findchar(c, &chartype, &othercase);
int rqdtype = code[2];
if ((d == OP_PROP) ==
(rqdtype == ((rqdtype >= 128)? (category + 128) : chartype)))
{ ADD_NEW(state_offset + count, 0); }
BOOL OK;
int category = _pcre_ucp_findprop(c, &chartype, &script);
switch(code[2])
{
case PT_ANY:
OK = TRUE;
break;
case PT_LAMP:
OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
break;
case PT_GC:
OK = category == code[3];
break;
case PT_PC:
OK = chartype == code[3];
break;
case PT_SC:
OK = script == code[3];
break;
/* Should never occur, but keep compilers from grumbling. */
default:
OK = codevalue != OP_PROP;
break;
}
if (OK == (d == OP_PROP)) { ADD_NEW(state_offset + count, 0); }
}
break;
@@ -932,7 +1034,7 @@ for (;;)
QS2:
ADD_ACTIVE(state_offset + 2, 0);
if (clen > 0 && _pcre_ucp_findchar(c, &chartype, &othercase) != ucp_M)
if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
{
const uschar *nptr = ptr + clen;
int ncount = 0;
@@ -941,7 +1043,7 @@ for (;;)
int nd;
int ndlen = 1;
GETCHARLEN(nd, nptr, ndlen);
if (_pcre_ucp_findchar(nd, &chartype, &othercase) != ucp_M) break;
if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;
ncount++;
nptr += ndlen;
}
@@ -954,17 +1056,45 @@ for (;;)
case OP_PROP_EXTRA + OP_TYPEUPTO:
case OP_PROP_EXTRA + OP_TYPEMINUPTO:
if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
{ ADD_ACTIVE(state_offset + 5, 0); }
{ ADD_ACTIVE(state_offset + 6, 0); }
count = current_state->count; /* Number already matched */
if (clen > 0)
{
int category = _pcre_ucp_findchar(c, &chartype, &othercase);
int rqdtype = code[4];
if ((d == OP_PROP) ==
(rqdtype == ((rqdtype >= 128)? (category + 128) : chartype)))
BOOL OK;
int category = _pcre_ucp_findprop(c, &chartype, &script);
switch(code[4])
{
case PT_ANY:
OK = TRUE;
break;
case PT_LAMP:
OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
break;
case PT_GC:
OK = category == code[5];
break;
case PT_PC:
OK = chartype == code[5];
break;
case PT_SC:
OK = script == code[5];
break;
/* Should never occur, but keep compilers from grumbling. */
default:
OK = codevalue != OP_PROP;
break;
}
if (OK == (d == OP_PROP))
{
if (++count >= GET2(code, 1))
{ ADD_NEW(state_offset + 5, 0); }
{ ADD_NEW(state_offset + 6, 0); }
else
{ ADD_NEW(state_offset, count); }
}
@@ -978,7 +1108,7 @@ for (;;)
if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
{ ADD_ACTIVE(state_offset + 4, 0); }
count = current_state->count; /* Number already matched */
if (clen > 0 && _pcre_ucp_findchar(c, &chartype, &othercase) != ucp_M)
if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
{
const uschar *nptr = ptr + clen;
int ncount = 0;
@@ -987,7 +1117,7 @@ for (;;)
int nd;
int ndlen = 1;
GETCHARLEN(nd, nptr, ndlen);
if (_pcre_ucp_findchar(nd, &chartype, &othercase) != ucp_M) break;
if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;
ncount++;
nptr += ndlen;
}
@@ -1018,17 +1148,17 @@ for (;;)
{
if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
{
int othercase;
if (c < 128) othercase = fcc[c]; else
/* If we have Unicode property support, we can use it to test the
other case of the character, if there is one. The result of
_pcre_ucp_findchar() is < 0 if the char isn't found, and othercase is
returned as zero if there isn't another case. */
other case of the character. */
#ifdef SUPPORT_UCP
if (_pcre_ucp_findchar(c, &chartype, &othercase) < 0)
#endif
othercase = _pcre_ucp_othercase(c);
#else
othercase = -1;
#endif
if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
}
@@ -1050,7 +1180,7 @@ for (;;)
to wait for them to pass before continuing. */
case OP_EXTUNI:
if (clen > 0 && _pcre_ucp_findchar(c, &chartype, &othercase) != ucp_M)
if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
{
const uschar *nptr = ptr + clen;
int ncount = 0;
@@ -1058,7 +1188,7 @@ for (;;)
{
int nclen = 1;
GETCHARLEN(c, nptr, nclen);
if (_pcre_ucp_findchar(c, &chartype, &othercase) != ucp_M) break;
if (_pcre_ucp_findprop(c, &chartype, &script) != ucp_M) break;
ncount++;
nptr += nclen;
}
@@ -1093,10 +1223,10 @@ for (;;)
if ((ims & PCRE_CASELESS) != 0)
{
#ifdef SUPPORT_UTF8
if (utf8 && c >= 128)
if (utf8 && d >= 128)
{
#ifdef SUPPORT_UCP
if (_pcre_ucp_findchar(d, &chartype, &otherd) < 0) otherd = -1;
otherd = _pcre_ucp_othercase(d);
#endif /* SUPPORT_UCP */
}
else
@@ -1117,13 +1247,13 @@ for (;;)
if (clen > 0)
{
int otherd = -1;
if ((ims && PCRE_CASELESS) != 0)
if ((ims & PCRE_CASELESS) != 0)
{
#ifdef SUPPORT_UTF8
if (utf8 && c >= 128)
if (utf8 && d >= 128)
{
#ifdef SUPPORT_UCP
if (_pcre_ucp_findchar(c, &chartype, &otherd) < 0) otherd = -1;
otherd = _pcre_ucp_othercase(d);
#endif /* SUPPORT_UCP */
}
else
@@ -1144,13 +1274,13 @@ for (;;)
if (clen > 0)
{
int otherd = -1;
if ((ims && PCRE_CASELESS) != 0)
if ((ims & PCRE_CASELESS) != 0)
{
#ifdef SUPPORT_UTF8
if (utf8 && c >= 128)
if (utf8 && d >= 128)
{
#ifdef SUPPORT_UCP
if (_pcre_ucp_findchar(c, &chartype, &otherd) < 0) otherd = -1;
otherd = _pcre_ucp_othercase(d);
#endif /* SUPPORT_UCP */
}
else
@@ -1178,10 +1308,10 @@ for (;;)
if ((ims & PCRE_CASELESS) != 0)
{
#ifdef SUPPORT_UTF8
if (utf8 && c >= 128)
if (utf8 && d >= 128)
{
#ifdef SUPPORT_UCP
if (_pcre_ucp_findchar(d, &chartype, &otherd) < 0) otherd = -1;
otherd = _pcre_ucp_othercase(d);
#endif /* SUPPORT_UCP */
}
else
@@ -1267,7 +1397,8 @@ for (;;)
{ ADD_ACTIVE(next_state_offset + 5, 0); }
if (isinclass)
{
if (++count >= GET2(ecode, 3))
int max = GET2(ecode, 3);
if (++count >= max && max != 0) /* Max 0 => no limit */
{ ADD_NEW(next_state_offset + 5, 0); }
else
{ ADD_NEW(state_offset, count); }
@@ -1519,7 +1650,7 @@ for (;;)
cb.version = 1; /* Version 1 of the callout block */
cb.callout_number = code[1];
cb.offset_vector = offsets;
cb.subject = (char *)start_subject;
cb.subject = (PCRE_SPTR)start_subject;
cb.subject_length = end_subject - start_subject;
cb.start_match = current_subject - start_subject;
cb.current_position = ptr - start_subject;
@@ -1567,7 +1698,7 @@ for (;;)
DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
"%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count,
rlevel*2-2, SP));
return match_count;
break; /* In effect, "return", but see the comment below */
}
/* One or more states are active for the next character. */
@@ -1575,11 +1706,13 @@ for (;;)
ptr += clen; /* Advance to next subject character */
} /* Loop to move along the subject string */
/* Control never gets here, but we must keep the compiler happy. */
/* Control gets here from "break" a few lines above. We do it this way because
if we use "return" above, we have compiler trouble. Some compilers warn if
there's nothing here because they think the function doesn't return a value. On
the other hand, if we put a dummy statement here, some more clever compilers
complain that it can't be reached. Sigh. */
DPRINTF(("%.*s+++ Unexpected end of internal_dfa_exec %d +++\n"
"%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, rlevel*2-2, SP));
return PCRE_ERROR_NOMATCH;
return match_count;
}
@@ -1611,13 +1744,14 @@ Returns: > 0 => number of match offset pairs placed in offsets
< -1 => some kind of unexpected problem
*/
PCRE_EXPORT int
PCRE_DATA_SCOPE int
pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
const char *subject, int length, int start_offset, int options, int *offsets,
int offsetcount, int *workspace, int wscount)
{
real_pcre *re = (real_pcre *)argument_re;
dfa_match_data match_block;
dfa_match_data *md = &match_block;
BOOL utf8, anchored, startline, firstline;
const uschar *current_subject, *end_subject, *lcc;
@@ -1632,6 +1766,7 @@ BOOL req_byte_caseless = FALSE;
int first_byte = -1;
int req_byte = -1;
int req_byte2 = -1;
int newline;
/* Plausibility checks */
@@ -1646,8 +1781,8 @@ flipping, so we scan the extra_data block first. This may set two fields in the
match block, so we must initialize them beforehand. However, the other fields
in the match block must not be set until after the byte flipping. */
match_block.tables = re->tables;
match_block.callout_data = NULL;
md->tables = re->tables;
md->callout_data = NULL;
if (extra_data != NULL)
{
@@ -1655,10 +1790,12 @@ if (extra_data != NULL)
if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
study = (const pcre_study_data *)extra_data->study_data;
if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0) return PCRE_ERROR_DFA_UMLIMIT;
if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
return PCRE_ERROR_DFA_UMLIMIT;
if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
match_block.callout_data = extra_data->callout_data;
md->callout_data = extra_data->callout_data;
if ((flags & PCRE_EXTRA_TABLES) != 0)
match_block.tables = extra_data->tables;
md->tables = extra_data->tables;
}
/* Check that the first field in the block is the magic number. If it is not,
@@ -1679,17 +1816,48 @@ current_subject = (const unsigned char *)subject + start_offset;
end_subject = (const unsigned char *)subject + length;
req_byte_ptr = current_subject - 1;
#ifdef SUPPORT_UTF8
utf8 = (re->options & PCRE_UTF8) != 0;
anchored = (options & PCRE_ANCHORED) != 0 || (re->options & PCRE_ANCHORED) != 0;
#else
utf8 = FALSE;
#endif
anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||
(re->options & PCRE_ANCHORED) != 0;
/* The remaining fixed data for passing around. */
match_block.start_code = (const uschar *)argument_re +
md->start_code = (const uschar *)argument_re +
re->name_table_offset + re->name_count * re->name_entry_size;
match_block.start_subject = (const unsigned char *)subject;
match_block.end_subject = end_subject;
match_block.moptions = options;
match_block.poptions = re->options;
md->start_subject = (const unsigned char *)subject;
md->end_subject = end_subject;
md->moptions = options;
md->poptions = re->options;
/* Handle different types of newline. The two bits give four cases. If nothing
is set at run time, whatever was used at compile time applies. */
switch ((((options & PCRE_NEWLINE_CRLF) == 0)? re->options : options) &
PCRE_NEWLINE_CRLF)
{
default: newline = NEWLINE; break; /* Compile-time default */
case PCRE_NEWLINE_CR: newline = '\r'; break;
case PCRE_NEWLINE_LF: newline = '\n'; break;
case PCRE_NEWLINE_CR+
PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
}
if (newline > 255)
{
md->nllen = 2;
md->nl[0] = (newline >> 8) & 255;
md->nl[1] = newline & 255;
}
else
{
md->nllen = 1;
md->nl[0] = newline;
}
/* Check a UTF-8 string if required. Unfortunately there's no way of passing
back the character offset. */
@@ -1715,12 +1883,12 @@ if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
is a feature that makes it possible to save compiled regex and re-use them
in other programs later. */
if (match_block.tables == NULL) match_block.tables = _pcre_default_tables;
if (md->tables == NULL) md->tables = _pcre_default_tables;
/* The lower casing table and the "must be at the start of a line" flag are
used in a loop when finding where to start. */
lcc = match_block.tables + lcc_offset;
lcc = md->tables + lcc_offset;
startline = (re->options & PCRE_STARTLINE) != 0;
firstline = (re->options & PCRE_FIRSTLINE) != 0;
@@ -1753,7 +1921,7 @@ if ((re->options & PCRE_REQCHSET) != 0)
{
req_byte = re->req_byte & 255;
req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
req_byte2 = (match_block.tables + fcc_offset)[req_byte]; /* case flipped */
req_byte2 = (md->tables + fcc_offset)[req_byte]; /* case flipped */
}
/* Call the main matching function, looping for a non-anchored regex after a
@@ -1771,14 +1939,14 @@ for (;;)
/* Advance to a unique first char if possible. If firstline is TRUE, the
start of the match is constrained to the first line of a multiline string.
Implement this by temporarily adjusting end_subject so that we stop scanning
at a newline. If the match fails at the newline, later code breaks this loop.
*/
Implement this by temporarily adjusting end_subject so that we stop
scanning at a newline. If the match fails at the newline, later code breaks
this loop. */
if (firstline)
{
const uschar *t = current_subject;
while (t < save_end_subject && *t != '\n') t++;
while (t <= save_end_subject - md->nllen && !IS_NEWLINE(t)) t++;
end_subject = t;
}
@@ -1793,13 +1961,15 @@ for (;;)
current_subject++;
}
/* Or to just after \n for a multiline match if possible */
/* Or to just after a linebreak for a multiline match if possible */
else if (startline)
{
if (current_subject > match_block.start_subject + start_offset)
if (current_subject > md->start_subject + md->nllen +
start_offset)
{
while (current_subject < end_subject && current_subject[-1] != NEWLINE)
while (current_subject <= end_subject &&
!IS_NEWLINE(current_subject - md->nllen))
current_subject++;
}
}
@@ -1880,8 +2050,8 @@ for (;;)
/* OK, now we can do the business */
rc = internal_dfa_exec(
&match_block, /* fixed match data */
match_block.start_code, /* this subexpression's code */
md, /* fixed match data */
md->start_code, /* this subexpression's code */
current_subject, /* where we currently are */
start_offset, /* start offset in subject */
offsets, /* offset vector */
@@ -1900,17 +2070,15 @@ for (;;)
/* Advance to the next subject character unless we are at the end of a line
and firstline is set. */
if (firstline && *current_subject == NEWLINE) break;
if (firstline &&
current_subject <= end_subject - md->nllen &&
IS_NEWLINE(current_subject)) break;
current_subject++;
#ifdef SUPPORT_UTF8
if (utf8)
{
while (current_subject < end_subject && (*current_subject & 0xc0) == 0x80)
current_subject++;
}
#endif
if (current_subject > end_subject) break;
}

File diff suppressed because it is too large Load Diff

View File

@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2005 University of Cambridge
Copyright (c) 1997-2006 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -61,7 +61,7 @@ Arguments:
Returns: 0 if data returned, negative on error
*/
PCRE_EXPORT int
PCRE_DATA_SCOPE int
pcre_fullinfo(const pcre *argument_re, const pcre_extra *extra_data, int what,
void *where)
{

View File

@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2005 University of Cambridge
Copyright (c) 1997-2006 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -50,8 +50,8 @@ for these functions came from Scott Wimer. */
* Find number for named string *
*************************************************/
/* This function is used by the two extraction functions below, as well
as being generally available.
/* This function is used by the get_first_set() function below, as well
as being generally available. It assumes that names are unique.
Arguments:
code the compiled regex
@@ -93,6 +93,113 @@ return PCRE_ERROR_NOSUBSTRING;
/*************************************************
* Find (multiple) entries for named string *
*************************************************/
/* This is used by the get_first_set() function below, as well as being
generally available. It is used when duplicated names are permitted.
Arguments:
code the compiled regex
stringname the name whose entries required
firstptr where to put the pointer to the first entry
lastptr where to put the pointer to the last entry
Returns: the length of each entry, or a negative number
(PCRE_ERROR_NOSUBSTRING) if not found
*/
int
pcre_get_stringtable_entries(const pcre *code, const char *stringname,
char **firstptr, char **lastptr)
{
int rc;
int entrysize;
int top, bot;
uschar *nametable, *lastentry;
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
return rc;
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
return rc;
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
return rc;
lastentry = nametable + entrysize * (top - 1);
bot = 0;
while (top > bot)
{
int mid = (top + bot) / 2;
uschar *entry = nametable + entrysize*mid;
int c = strcmp(stringname, (char *)(entry + 2));
if (c == 0)
{
uschar *first = entry;
uschar *last = entry;
while (first > nametable)
{
if (strcmp(stringname, (char *)(first - entrysize + 2)) != 0) break;
first -= entrysize;
}
while (last < lastentry)
{
if (strcmp(stringname, (char *)(last + entrysize + 2)) != 0) break;
last += entrysize;
}
*firstptr = (char *)first;
*lastptr = (char *)last;
return entrysize;
}
if (c > 0) bot = mid + 1; else top = mid;
}
return PCRE_ERROR_NOSUBSTRING;
}
/*************************************************
* Find first set of multiple named strings *
*************************************************/
/* This function allows for duplicate names in the table of named substrings.
It returns the number of the first one that was set in a pattern match.
Arguments:
code the compiled regex
stringname the name of the capturing substring
ovector the vector of matched substrings
Returns: the number of the first that is set,
or the number of the last one if none are set,
or a negative number on error
*/
static int
get_first_set(const pcre *code, const char *stringname, int *ovector)
{
const real_pcre *re = (const real_pcre *)code;
int entrysize;
char *first, *last;
uschar *entry;
if ((re->options & (PCRE_DUPNAMES | PCRE_JCHANGED)) == 0)
return pcre_get_stringnumber(code, stringname);
entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
if (entrysize <= 0) return entrysize;
for (entry = (uschar *)first; entry <= (uschar *)last; entry += entrysize)
{
int n = (entry[0] << 8) + entry[1];
if (ovector[n*2] >= 0) return n;
}
return (first[0] << 8) + first[1];
}
/*************************************************
* Copy captured string to given buffer *
*************************************************/
@@ -142,7 +249,8 @@ return yield;
*************************************************/
/* This function copies a single captured substring into a given buffer,
identifying it by name.
identifying it by name. If the regex permits duplicate names, the first
substring that is set is chosen.
Arguments:
code the compiled regex
@@ -168,7 +276,7 @@ int
pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
int stringcount, const char *stringname, char *buffer, int size)
{
int n = pcre_get_stringnumber(code, stringname);
int n = get_first_set(code, stringname, ovector);
if (n <= 0) return n;
return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
}
@@ -299,7 +407,8 @@ return yield;
*************************************************/
/* This function copies a single captured substring, identified by name, into
new store.
new store. If the regex permits duplicate names, the first substring that is
set is chosen.
Arguments:
code the compiled regex
@@ -324,9 +433,10 @@ int
pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
int stringcount, const char *stringname, const char **stringptr)
{
int n = pcre_get_stringnumber(code, stringname);
int n = get_first_set(code, stringname, ovector);
if (n <= 0) return n;
return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
}

View File

@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2005 University of Cambridge
Copyright (c) 1997-2006 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without

View File

@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2005 University of Cambridge
Copyright (c) 1997-2006 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -68,7 +68,7 @@ Returns: number of capturing subpatterns
or negative values on error
*/
PCRE_EXPORT int
PCRE_DATA_SCOPE int
pcre_info(const pcre *argument_re, int *optptr, int *first_byte)
{
real_pcre internal_re;

View File

@@ -7,7 +7,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2005 University of Cambridge
Copyright (c) 1997-2006 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -121,6 +121,32 @@ Unix, where it is defined in sys/types, so use "uschar" instead. */
typedef unsigned char uschar;
/* PCRE is able to support 3 different kinds of newline (CR, LF, CRLF). The
following macro is used to package up testing for newlines. NLBLOCK is defined
in the various modules to indicate in which datablock the parameters exist. */
#define IS_NEWLINE(p) \
((p)[0] == NLBLOCK->nl[0] && \
(NLBLOCK->nllen == 1 || (p)[1] == NLBLOCK->nl[1]))
/* When PCRE is compiled as a C++ library, the subject pointer can be
replaced with a custom type. This makes it possible, for example, to
allow pcre_exec() to process subject strings that are discontinuous by
using a smart pointer class. It must always be possible to inspect all
of the subject string in pcre_exec() because of the way it
backtracks. Two macros are required in the normal case, for
sign-unspecified and unsigned char pointers. The former is used for
the external interface and appears in pcre.h, which is why its name
must begin with PCRE_. */
#ifdef CUSTOM_SUBJECT_PTR
#define PCRE_SPTR CUSTOM_SUBJECT_PTR
#define USPTR CUSTOM_SUBJECT_PTR
#else
#define PCRE_SPTR const char *
#define USPTR const unsigned char *
#endif
/* Include the public PCRE header and the definitions of UCP character
property values. */
@@ -156,13 +182,14 @@ case in PCRE. */
#if HAVE_BCOPY
#define memmove(a, b, c) bcopy(b, a, c)
#else /* HAVE_BCOPY */
void *
static void *
pcre_memmove(unsigned char *dest, const unsigned char *src, size_t n)
{
int i;
size_t i;
dest += n;
src += n;
for (i = 0; i < n; ++i) *(--dest) = *(--src);
return dest;
}
#define memmove(a, b, c) pcre_memmove(a, b, c)
#endif /* not HAVE_BCOPY */
@@ -368,16 +395,17 @@ Standard C system should have one. */
#define PCRE_IMS (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL)
/* Private options flags start at the most significant end of the four bytes,
but skip the top bit so we can use ints for convenience without getting tangled
with negative values. The public options defined in pcre.h start at the least
significant end. Make sure they don't overlap! */
/* Private options flags start at the most significant end of the four bytes.
The public options defined in pcre.h start at the least significant end. Make
sure they don't overlap! The bits are getting a bit scarce now -- when we run
out, there is a dummy word in the structure that could be used for the private
bits. */
#define PCRE_NOPARTIAL 0x80000000 /* can't use partial with this regex */
#define PCRE_FIRSTSET 0x40000000 /* first_byte is set */
#define PCRE_REQCHSET 0x20000000 /* req_byte is set */
#define PCRE_STARTLINE 0x10000000 /* start after \n for multiline */
#define PCRE_ICHANGED 0x08000000 /* i option changes within regex */
#define PCRE_NOPARTIAL 0x04000000 /* can't use partial with this regex */
#define PCRE_JCHANGED 0x08000000 /* j option changes within regex */
/* Options for the "extra" block produced by pcre_study(). */
@@ -389,15 +417,17 @@ time, run time, or study time, respectively. */
#define PUBLIC_OPTIONS \
(PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \
PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \
PCRE_NO_AUTO_CAPTURE|PCRE_NO_UTF8_CHECK|PCRE_AUTO_CALLOUT|PCRE_FIRSTLINE)
PCRE_NO_AUTO_CAPTURE|PCRE_NO_UTF8_CHECK|PCRE_AUTO_CALLOUT|PCRE_FIRSTLINE| \
PCRE_DUPNAMES|PCRE_NEWLINE_CR|PCRE_NEWLINE_LF)
#define PUBLIC_EXEC_OPTIONS \
(PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK| \
PCRE_PARTIAL)
PCRE_PARTIAL|PCRE_NEWLINE_CR|PCRE_NEWLINE_LF)
#define PUBLIC_DFA_EXEC_OPTIONS \
(PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK| \
PCRE_PARTIAL|PCRE_DFA_SHORTEST|PCRE_DFA_RESTART)
PCRE_PARTIAL|PCRE_DFA_SHORTEST|PCRE_DFA_RESTART|PCRE_NEWLINE_CR| \
PCRE_NEWLINE_LF)
#define PUBLIC_STUDY_OPTIONS 0 /* None defined */
@@ -456,6 +486,26 @@ ESC_n is defined as yet another macro, which is set in config.h to either \n
#define ESC_tee '\t'
#endif
/* Codes for different types of Unicode property */
#define PT_ANY 0 /* Any property - matches all chars */
#define PT_LAMP 1 /* L& - the union of Lu, Ll, Lt */
#define PT_GC 2 /* General characteristic (e.g. L) */
#define PT_PC 3 /* Particular characteristic (e.g. Lu) */
#define PT_SC 4 /* Script (e.g. Han) */
/* Flag bits and data types for the extended class (OP_XCLASS) for classes that
contain UTF-8 characters with values greater than 255. */
#define XCL_NOT 0x01 /* Flag: this is a negative class */
#define XCL_MAP 0x02 /* Flag: a 32-byte map is present */
#define XCL_END 0 /* Marks end of individual items */
#define XCL_SINGLE 1 /* Single item (one multibyte char) follows */
#define XCL_RANGE 2 /* A range (two multibyte chars) follows */
#define XCL_PROP 3 /* Unicode property (2-byte property code follows) */
#define XCL_NOTPROP 4 /* Unicode inverted property (ditto) */
/* These are escaped items that aren't just an encoding of a particular data
value such as \n. They must have non-zero values, as check_escape() returns
their negation. Also, they must appear in the same order as in the opcode
@@ -471,19 +521,6 @@ enum { ESC_A = 1, ESC_G, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s, ESC_W,
ESC_w, ESC_dum1, ESC_C, ESC_P, ESC_p, ESC_X, ESC_Z, ESC_z, ESC_E,
ESC_Q, ESC_REF };
/* Flag bits and data types for the extended class (OP_XCLASS) for classes that
contain UTF-8 characters with values greater than 255. */
#define XCL_NOT 0x01 /* Flag: this is a negative class */
#define XCL_MAP 0x02 /* Flag: a 32-byte map is present */
#define XCL_END 0 /* Marks end of individual items */
#define XCL_SINGLE 1 /* Single item (one multibyte char) follows */
#define XCL_RANGE 2 /* A range (two multibyte chars) follows */
#define XCL_PROP 3 /* Unicode property (one property code) follows */
#define XCL_NOTPROP 4 /* Unicode inverted property (ditto) */
/* Opcode table: OP_BRA must be last, as all values >= it are used for brackets
that extract substrings. Starting from 1 (i.e. after OP_END), the values up to
OP_EOD must correspond in order to the list of escapes immediately above.
@@ -518,7 +555,7 @@ enum {
OP_DOLL, /* 20 End of line - varies with multiline switch */
OP_CHAR, /* 21 Match one character, casefully */
OP_CHARNC, /* 22 Match one character, caselessly */
OP_NOT, /* 23 Match anything but the following char */
OP_NOT, /* 23 Match one character, not the following one */
OP_STAR, /* 24 The maximizing and minimizing versions of */
OP_MINSTAR, /* 25 all these opcodes must come in pairs, with */
@@ -647,7 +684,7 @@ in UTF-8 mode. The code that uses this table must know about such things. */
1, /* End */ \
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* \A, \G, \B, \B, \D, \d, \S, \s, \W, \w */ \
1, 1, /* Any, Anybyte */ \
2, 2, 1, /* NOTPROP, PROP, EXTUNI */ \
3, 3, 1, /* NOTPROP, PROP, EXTUNI */ \
1, 1, 2, 1, 1, /* \Z, \z, Opt, ^, $ */ \
2, /* Char - the minimum length */ \
2, /* Charnc - the minimum length */ \
@@ -698,7 +735,8 @@ enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9,
ERR10, ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19,
ERR20, ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR28, ERR29,
ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39,
ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47 };
ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
ERR50, ERR51 };
/* The real format of the start of the pcre block; the index of names and the
code vector run on as long as necessary after the end. We store an explicit
@@ -762,6 +800,8 @@ typedef struct compile_data {
unsigned int backref_map; /* Bitmap of low back refs */
int req_varyopt; /* "After variable item" flag for reqbyte */
BOOL nopartial; /* Set TRUE if partial won't work */
int nllen; /* 1 or 2 for newline string length */
uschar nl[4]; /* Newline string */
} compile_data;
/* Structure for maintaining a chain of pointers to the currently incomplete
@@ -779,18 +819,18 @@ typedef struct recursion_info {
struct recursion_info *prevrec; /* Previous recursion record (or NULL) */
int group_num; /* Number of group that was called */
const uschar *after_call; /* "Return value": points after the call in the expr */
const uschar *save_start; /* Old value of md->start_match */
USPTR save_start; /* Old value of md->start_match */
int *offset_save; /* Pointer to start of saved offsets */
int saved_max; /* Number of saved offsets */
} recursion_info;
/* When compiling in a mode that doesn't use recursive calls to match(),
a structure is used to remember local variables on the heap. It is defined in
pcre.c, close to the match() function, so that it is easy to keep it in step
with any changes of local variable. However, the pointer to the current frame
must be saved in some "static" place over a longjmp(). We declare the
structure here so that we can put a pointer in the match_data structure.
NOTE: This isn't used for a "normal" compilation of pcre. */
pcre_exec.c, close to the match() function, so that it is easy to keep it in
step with any changes of local variable. However, the pointer to the current
frame must be saved in some "static" place over a longjmp(). We declare the
structure here so that we can put a pointer in the match_data structure. NOTE:
This isn't used for a "normal" compilation of pcre. */
struct heapframe;
@@ -799,10 +839,13 @@ doing traditional NFA matching, so that they are thread-safe. */
typedef struct match_data {
unsigned long int match_call_count; /* As it says */
unsigned long int match_limit;/* As it says */
unsigned long int match_limit; /* As it says */
unsigned long int match_limit_recursion; /* As it says */
int *offset_vector; /* Offset vector */
int offset_end; /* One past the end */
int offset_max; /* The maximum usable for return data */
int nllen; /* 1 or 2 for newline string length */
uschar nl[4]; /* Newline string */
const uschar *lcc; /* Points to lower casing table */
const uschar *ctypes; /* Points to table of type maps */
BOOL offset_overflow; /* Set if too many extractions */
@@ -814,10 +857,10 @@ typedef struct match_data {
BOOL partial; /* PARTIAL flag */
BOOL hitend; /* Hit the end of the subject at some point */
const uschar *start_code; /* For use when recursing */
const uschar *start_subject; /* Start of the subject string */
const uschar *end_subject; /* End of the subject string */
const uschar *start_match; /* Start of this match attempt */
const uschar *end_match_ptr; /* Subject position at end match */
USPTR start_subject; /* Start of the subject string */
USPTR end_subject; /* End of the subject string */
USPTR start_match; /* Start of this match attempt */
USPTR end_match_ptr; /* Subject position at end match */
int end_offset_top; /* Highwater mark at end of match */
int capture_last; /* Most recent capture number */
int start_offset; /* The start offset value */
@@ -836,6 +879,8 @@ typedef struct dfa_match_data {
const uschar *tables; /* Character tables */
int moptions; /* Match options */
int poptions; /* Pattern options */
int nllen; /* 1 or 2 for newline string length */
uschar nl[4]; /* Newline string */
void *callout_data; /* To pass back to callouts */
} dfa_match_data;
@@ -872,12 +917,13 @@ total length. */
#define ctypes_offset (cbits_offset + cbit_length)
#define tables_length (ctypes_offset + 256)
/* Layout of the UCP type table that translates property names into codes for
pcre_ucp_findchar(). */
/* Layout of the UCP type table that translates property names into types and
codes. */
typedef struct {
const char *name;
int value;
pcre_uint16 type;
pcre_uint16 value;
} ucp_type_table;
@@ -908,7 +954,8 @@ sense, but are not part of the PCRE public API. */
extern int _pcre_ord2utf8(int, uschar *);
extern real_pcre * _pcre_try_flipped(const real_pcre *, real_pcre *,
const pcre_study_data *, pcre_study_data *);
extern int _pcre_ucp_findchar(const int, int *, int *);
extern int _pcre_ucp_findprop(const unsigned int, int *, int *);
extern int _pcre_ucp_othercase(const int);
extern int _pcre_valid_utf8(const uschar *, int);
extern BOOL _pcre_xclass(int, const uschar *);

View File

@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2005 University of Cambridge
Copyright (c) 1997-2006 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -86,29 +86,22 @@ for (i = 0; i < 256; i++) *p++ = tolower(i);
for (i = 0; i < 256; i++) *p++ = islower(i)? toupper(i) : tolower(i);
/* Then the character class tables. Don't try to be clever and save effort
on exclusive ones - in some locales things may be different. Note that the
table for "space" includes everything "isspace" gives, including VT in the
default locale. This makes it work for the POSIX class [:space:]. */
/* Then the character class tables. Don't try to be clever and save effort on
exclusive ones - in some locales things may be different. Note that the table
for "space" includes everything "isspace" gives, including VT in the default
locale. This makes it work for the POSIX class [:space:]. Note also that it is
possible for a character to be alnum or alpha without being lower or upper,
such as "male and female ordinals" (\xAA and \xBA) in the fr_FR locale (at
least under Debian Linux's locales as of 12/2005). So we must test for alnum
specially. */
memset(p, 0, cbit_length);
for (i = 0; i < 256; i++)
{
if (isdigit(i))
{
p[cbit_digit + i/8] |= 1 << (i&7);
p[cbit_word + i/8] |= 1 << (i&7);
}
if (isupper(i))
{
p[cbit_upper + i/8] |= 1 << (i&7);
p[cbit_word + i/8] |= 1 << (i&7);
}
if (islower(i))
{
p[cbit_lower + i/8] |= 1 << (i&7);
p[cbit_word + i/8] |= 1 << (i&7);
}
if (isdigit(i)) p[cbit_digit + i/8] |= 1 << (i&7);
if (isupper(i)) p[cbit_upper + i/8] |= 1 << (i&7);
if (islower(i)) p[cbit_lower + i/8] |= 1 << (i&7);
if (isalnum(i)) p[cbit_word + i/8] |= 1 << (i&7);
if (i == '_') p[cbit_word + i/8] |= 1 << (i&7);
if (isspace(i)) p[cbit_space + i/8] |= 1 << (i&7);
if (isxdigit(i))p[cbit_xdigit + i/8] |= 1 << (i&7);
@@ -137,7 +130,9 @@ for (i = 0; i < 256; i++)
meta-character, which in this sense is any character that terminates a run
of data characters. */
if (strchr("*+?{^.$|()[", i) != 0) x += ctype_meta; *p++ = x; }
if (strchr("*+?{^.$|()[", i) != 0) x += ctype_meta;
*p++ = x;
}
return yield;
}

View File

@@ -1,454 +0,0 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2005 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains a PCRE private debugging function for printing out the
internal form of a compiled regular expression, along with some supporting
local functions. This source file is used in two places:
(1) It is #included by pcre_compile.c when it is compiled in debugging mode
(DEBUG defined in pcre_internal.h). It is not included in production compiles.
(2) It is always #included by pcretest.c, which can be asked to print out a
compiled regex for debugging purposes. */
static const char *OP_names[] = { OP_NAME_LIST };
/*************************************************
* Print single- or multi-byte character *
*************************************************/
static int
print_char(FILE *f, uschar *ptr, BOOL utf8)
{
int c = *ptr;
if (!utf8 || (c & 0xc0) != 0xc0)
{
if (isprint(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
return 0;
}
else
{
int i;
int a = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */
int s = 6*a;
c = (c & _pcre_utf8_table3[a]) << s;
for (i = 1; i <= a; i++)
{
/* This is a check for malformed UTF-8; it should only occur if the sanity
check has been turned off. Rather than swallow random bytes, just stop if
we hit a bad one. Print it with \X instead of \x as an indication. */
if ((ptr[i] & 0xc0) != 0x80)
{
fprintf(f, "\\X{%x}", c);
return i - 1;
}
/* The byte is OK */
s -= 6;
c |= (ptr[i] & 0x3f) << s;
}
if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c);
return a;
}
}
/*************************************************
* Find Unicode property name *
*************************************************/
static const char *
get_ucpname(int property)
{
#ifdef SUPPORT_UCP
int i;
for (i = _pcre_utt_size; i >= 0; i--)
{
if (property == _pcre_utt[i].value) break;
}
return (i >= 0)? _pcre_utt[i].name : "??";
#else
return "??";
#endif
}
/*************************************************
* Print compiled regex *
*************************************************/
/* Make this function work for a regex with integers either byte order.
However, we assume that what we are passed is a compiled regex. */
static void
pcre_printint(pcre *external_re, FILE *f)
{
real_pcre *re = (real_pcre *)external_re;
uschar *codestart, *code;
BOOL utf8;
unsigned int options = re->options;
int offset = re->name_table_offset;
int count = re->name_count;
int size = re->name_entry_size;
if (re->magic_number != MAGIC_NUMBER)
{
offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff);
count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff);
size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff);
options = ((options << 24) & 0xff000000) |
((options << 8) & 0x00ff0000) |
((options >> 8) & 0x0000ff00) |
((options >> 24) & 0x000000ff);
}
code = codestart = (uschar *)re + offset + count * size;
utf8 = (options & PCRE_UTF8) != 0;
for(;;)
{
uschar *ccode;
int c;
int extra = 0;
fprintf(f, "%3d ", (int)(code - codestart));
if (*code >= OP_BRA)
{
if (*code - OP_BRA > EXTRACT_BASIC_MAX)
fprintf(f, "%3d Bra extra\n", GET(code, 1));
else
fprintf(f, "%3d Bra %d\n", GET(code, 1), *code - OP_BRA);
code += _pcre_OP_lengths[OP_BRA];
continue;
}
switch(*code)
{
case OP_END:
fprintf(f, " %s\n", OP_names[*code]);
fprintf(f, "------------------------------------------------------------------\n");
return;
case OP_OPT:
fprintf(f, " %.2x %s", code[1], OP_names[*code]);
break;
case OP_CHAR:
{
fprintf(f, " ");
do
{
code++;
code += 1 + print_char(f, code, utf8);
}
while (*code == OP_CHAR);
fprintf(f, "\n");
continue;
}
break;
case OP_CHARNC:
{
fprintf(f, " NC ");
do
{
code++;
code += 1 + print_char(f, code, utf8);
}
while (*code == OP_CHARNC);
fprintf(f, "\n");
continue;
}
break;
case OP_KETRMAX:
case OP_KETRMIN:
case OP_ALT:
case OP_KET:
case OP_ASSERT:
case OP_ASSERT_NOT:
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
case OP_ONCE:
case OP_COND:
case OP_REVERSE:
fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
break;
case OP_BRANUMBER:
printf("%3d %s", GET2(code, 1), OP_names[*code]);
break;
case OP_CREF:
if (GET2(code, 1) == CREF_RECURSE)
fprintf(f, " Cond recurse");
else
fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
break;
case OP_STAR:
case OP_MINSTAR:
case OP_PLUS:
case OP_MINPLUS:
case OP_QUERY:
case OP_MINQUERY:
case OP_TYPESTAR:
case OP_TYPEMINSTAR:
case OP_TYPEPLUS:
case OP_TYPEMINPLUS:
case OP_TYPEQUERY:
case OP_TYPEMINQUERY:
fprintf(f, " ");
if (*code >= OP_TYPESTAR)
{
fprintf(f, "%s", OP_names[code[1]]);
if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
{
fprintf(f, " %s ", get_ucpname(code[2]));
extra = 1;
}
}
else extra = print_char(f, code+1, utf8);
fprintf(f, "%s", OP_names[*code]);
break;
case OP_EXACT:
case OP_UPTO:
case OP_MINUPTO:
fprintf(f, " ");
extra = print_char(f, code+3, utf8);
fprintf(f, "{");
if (*code != OP_EXACT) fprintf(f, ",");
fprintf(f, "%d}", GET2(code,1));
if (*code == OP_MINUPTO) fprintf(f, "?");
break;
case OP_TYPEEXACT:
case OP_TYPEUPTO:
case OP_TYPEMINUPTO:
fprintf(f, " %s", OP_names[code[3]]);
if (code[3] == OP_PROP || code[3] == OP_NOTPROP)
{
fprintf(f, " %s ", get_ucpname(code[4]));
extra = 1;
}
fprintf(f, "{");
if (*code != OP_TYPEEXACT) fprintf(f, "0,");
fprintf(f, "%d}", GET2(code,1));
if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
break;
case OP_NOT:
if (isprint(c = code[1])) fprintf(f, " [^%c]", c);
else fprintf(f, " [^\\x%02x]", c);
break;
case OP_NOTSTAR:
case OP_NOTMINSTAR:
case OP_NOTPLUS:
case OP_NOTMINPLUS:
case OP_NOTQUERY:
case OP_NOTMINQUERY:
if (isprint(c = code[1])) fprintf(f, " [^%c]", c);
else fprintf(f, " [^\\x%02x]", c);
fprintf(f, "%s", OP_names[*code]);
break;
case OP_NOTEXACT:
case OP_NOTUPTO:
case OP_NOTMINUPTO:
if (isprint(c = code[3])) fprintf(f, " [^%c]{", c);
else fprintf(f, " [^\\x%02x]{", c);
if (*code != OP_NOTEXACT) fprintf(f, "0,");
fprintf(f, "%d}", GET2(code,1));
if (*code == OP_NOTMINUPTO) fprintf(f, "?");
break;
case OP_RECURSE:
fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
break;
case OP_REF:
fprintf(f, " \\%d", GET2(code,1));
ccode = code + _pcre_OP_lengths[*code];
goto CLASS_REF_REPEAT;
case OP_CALLOUT:
fprintf(f, " %s %d %d %d", OP_names[*code], code[1], GET(code,2),
GET(code, 2 + LINK_SIZE));
break;
case OP_PROP:
case OP_NOTPROP:
fprintf(f, " %s %s", OP_names[*code], get_ucpname(code[1]));
break;
/* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in
having this code always here, and it makes it less messy without all those
#ifdefs. */
case OP_CLASS:
case OP_NCLASS:
case OP_XCLASS:
{
int i, min, max;
BOOL printmap;
fprintf(f, " [");
if (*code == OP_XCLASS)
{
extra = GET(code, 1);
ccode = code + LINK_SIZE + 1;
printmap = (*ccode & XCL_MAP) != 0;
if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^");
}
else
{
printmap = TRUE;
ccode = code + 1;
}
/* Print a bit map */
if (printmap)
{
for (i = 0; i < 256; i++)
{
if ((ccode[i/8] & (1 << (i&7))) != 0)
{
int j;
for (j = i+1; j < 256; j++)
if ((ccode[j/8] & (1 << (j&7))) == 0) break;
if (i == '-' || i == ']') fprintf(f, "\\");
if (isprint(i)) fprintf(f, "%c", i); else fprintf(f, "\\x%02x", i);
if (--j > i)
{
if (j != i + 1) fprintf(f, "-");
if (j == '-' || j == ']') fprintf(f, "\\");
if (isprint(j)) fprintf(f, "%c", j); else fprintf(f, "\\x%02x", j);
}
i = j;
}
}
ccode += 32;
}
/* For an XCLASS there is always some additional data */
if (*code == OP_XCLASS)
{
int ch;
while ((ch = *ccode++) != XCL_END)
{
if (ch == XCL_PROP)
{
fprintf(f, "\\p{%s}", get_ucpname(*ccode++));
}
else if (ch == XCL_NOTPROP)
{
fprintf(f, "\\P{%s}", get_ucpname(*ccode++));
}
else
{
ccode += 1 + print_char(f, ccode, TRUE);
if (ch == XCL_RANGE)
{
fprintf(f, "-");
ccode += 1 + print_char(f, ccode, TRUE);
}
}
}
}
/* Indicate a non-UTF8 class which was created by negation */
fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
/* Handle repeats after a class or a back reference */
CLASS_REF_REPEAT:
switch(*ccode)
{
case OP_CRSTAR:
case OP_CRMINSTAR:
case OP_CRPLUS:
case OP_CRMINPLUS:
case OP_CRQUERY:
case OP_CRMINQUERY:
fprintf(f, "%s", OP_names[*ccode]);
extra += _pcre_OP_lengths[*ccode];
break;
case OP_CRRANGE:
case OP_CRMINRANGE:
min = GET2(ccode,1);
max = GET2(ccode,3);
if (max == 0) fprintf(f, "{%d,}", min);
else fprintf(f, "{%d,%d}", min, max);
if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
extra += _pcre_OP_lengths[*ccode];
break;
}
}
break;
/* Anything else is just an item with no data*/
default:
fprintf(f, " %s", OP_names[*code]);
break;
}
code += _pcre_OP_lengths[*code] + extra;
fprintf(f, "\n");
}
}
/* End of pcre_printint.src */

View File

@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2005 University of Cambridge
Copyright (c) 1997-2006 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -63,7 +63,7 @@ Returns: the (possibly updated) count value (a non-negative number), or
a negative error number
*/
PCRE_EXPORT int
PCRE_DATA_SCOPE int
pcre_refcount(pcre *argument_re, int adjust)
{
real_pcre *re = (real_pcre *)argument_re;

View File

@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2005 University of Cambridge
Copyright (c) 1997-2006 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -95,6 +95,13 @@ set_start_bits(const uschar *code, uschar *start_bits, BOOL caseless,
{
register int c;
#if 0
/* ========================================================================= */
/* The following comment and code was inserted in January 1999. In May 2006,
when it was observed to cause compiler warnings about unused values, I took it
out again. If anybody is still using OS/2, they will have to put it back
manually. */
/* This next statement and the later reference to dummy are here in order to
trick the optimizer of the IBM C compiler for OS/2 into generating correct
code. Apparently IBM isn't going to fix the problem, and we would rather not
@@ -102,6 +109,8 @@ disable optimization (in this module it actually makes a big difference, and
the pcre module can use all the optimization it can get). */
volatile int dummy;
/* ========================================================================= */
#endif
do
{
@@ -159,7 +168,11 @@ do
case OP_BRAMINZERO:
if (!set_start_bits(++tcode, start_bits, caseless, utf8, cd))
return FALSE;
/* =========================================================================
See the comment at the head of this function concerning the next line,
which was an old fudge for the benefit of OS/2.
dummy = 1;
========================================================================= */
do tcode += GET(tcode,1); while (*tcode == OP_ALT);
tcode += 1+LINK_SIZE;
break;
@@ -215,15 +228,29 @@ do
try_next = FALSE;
break;
/* The cbit_space table has vertical tab as whitespace; we have to
discard it. */
case OP_NOT_WHITESPACE:
for (c = 0; c < 32; c++)
start_bits[c] |= ~cd->cbits[c+cbit_space];
{
int d = cd->cbits[c+cbit_space];
if (c == 1) d &= ~0x08;
start_bits[c] |= ~d;
}
try_next = FALSE;
break;
/* The cbit_space table has vertical tab as whitespace; we have to
discard it. */
case OP_WHITESPACE:
for (c = 0; c < 32; c++)
start_bits[c] |= cd->cbits[c+cbit_space];
{
int d = cd->cbits[c+cbit_space];
if (c == 1) d &= ~0x08;
start_bits[c] |= d;
}
try_next = FALSE;
break;
@@ -277,14 +304,28 @@ do
start_bits[c] |= cd->cbits[c+cbit_digit];
break;
/* The cbit_space table has vertical tab as whitespace; we have to
discard it. */
case OP_NOT_WHITESPACE:
for (c = 0; c < 32; c++)
start_bits[c] |= ~cd->cbits[c+cbit_space];
{
int d = cd->cbits[c+cbit_space];
if (c == 1) d &= ~0x08;
start_bits[c] |= ~d;
}
break;
/* The cbit_space table has vertical tab as whitespace; we have to
discard it. */
case OP_WHITESPACE:
for (c = 0; c < 32; c++)
start_bits[c] |= cd->cbits[c+cbit_space];
{
int d = cd->cbits[c+cbit_space];
if (c == 1) d &= ~0x08;
start_bits[c] |= d;
}
break;
case OP_NOT_WORDCHAR:
@@ -401,17 +442,16 @@ Returns: pointer to a pcre_extra block, with study_data filled in and the
NULL on error or if no optimization possible
*/
PCRE_EXPORT pcre_extra *
PCRE_DATA_SCOPE pcre_extra *
pcre_study(const pcre *external_re, int options, const char **errorptr)
{
uschar start_bits[32];
pcre_extra *extra;
pcre_study_data *study;
const uschar *tables;
const real_pcre *re = (const real_pcre *)external_re;
uschar *code = (uschar *)re + re->name_table_offset +
(re->name_count * re->name_entry_size);
uschar *code;
compile_data compile_block;
const real_pcre *re = (const real_pcre *)external_re;
*errorptr = NULL;
@@ -427,6 +467,9 @@ if ((options & ~PUBLIC_STUDY_OPTIONS) != 0)
return NULL;
}
code = (uschar *)re + re->name_table_offset +
(re->name_count * re->name_entry_size);
/* For an anchored pattern, or an unanchored pattern that has a first char, or
a multiline pattern that matches only at "line starts", no further processing
at present. */

View File

@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2005 University of Cambridge
Copyright (c) 1997-2006 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -48,7 +48,7 @@ clashes with the library. */
/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
the definition is next to the definition of the opcodes in internal.h. */
the definition is next to the definition of the opcodes in pcre_internal.h. */
const uschar _pcre_OP_lengths[] = { OP_LENGTHS };
@@ -82,47 +82,110 @@ const uschar _pcre_utf8_table4[] = {
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
/* This table translates Unicode property names into code values for the
ucp_findchar() function. */
/* This table translates Unicode property names into type and code values. It
is searched by binary chop, so must be in collating sequence of name. */
const ucp_type_table _pcre_utt[] = {
{ "C", 128 + ucp_C },
{ "Cc", ucp_Cc },
{ "Cf", ucp_Cf },
{ "Cn", ucp_Cn },
{ "Co", ucp_Co },
{ "Cs", ucp_Cs },
{ "L", 128 + ucp_L },
{ "Ll", ucp_Ll },
{ "Lm", ucp_Lm },
{ "Lo", ucp_Lo },
{ "Lt", ucp_Lt },
{ "Lu", ucp_Lu },
{ "M", 128 + ucp_M },
{ "Mc", ucp_Mc },
{ "Me", ucp_Me },
{ "Mn", ucp_Mn },
{ "N", 128 + ucp_N },
{ "Nd", ucp_Nd },
{ "Nl", ucp_Nl },
{ "No", ucp_No },
{ "P", 128 + ucp_P },
{ "Pc", ucp_Pc },
{ "Pd", ucp_Pd },
{ "Pe", ucp_Pe },
{ "Pf", ucp_Pf },
{ "Pi", ucp_Pi },
{ "Po", ucp_Po },
{ "Ps", ucp_Ps },
{ "S", 128 + ucp_S },
{ "Sc", ucp_Sc },
{ "Sk", ucp_Sk },
{ "Sm", ucp_Sm },
{ "So", ucp_So },
{ "Z", 128 + ucp_Z },
{ "Zl", ucp_Zl },
{ "Zp", ucp_Zp },
{ "Zs", ucp_Zs }
{ "Any", PT_ANY, 0 },
{ "Arabic", PT_SC, ucp_Arabic },
{ "Armenian", PT_SC, ucp_Armenian },
{ "Bengali", PT_SC, ucp_Bengali },
{ "Bopomofo", PT_SC, ucp_Bopomofo },
{ "Braille", PT_SC, ucp_Braille },
{ "Buginese", PT_SC, ucp_Buginese },
{ "Buhid", PT_SC, ucp_Buhid },
{ "C", PT_GC, ucp_C },
{ "Canadian_Aboriginal", PT_SC, ucp_Canadian_Aboriginal },
{ "Cc", PT_PC, ucp_Cc },
{ "Cf", PT_PC, ucp_Cf },
{ "Cherokee", PT_SC, ucp_Cherokee },
{ "Cn", PT_PC, ucp_Cn },
{ "Co", PT_PC, ucp_Co },
{ "Common", PT_SC, ucp_Common },
{ "Coptic", PT_SC, ucp_Coptic },
{ "Cs", PT_PC, ucp_Cs },
{ "Cypriot", PT_SC, ucp_Cypriot },
{ "Cyrillic", PT_SC, ucp_Cyrillic },
{ "Deseret", PT_SC, ucp_Deseret },
{ "Devanagari", PT_SC, ucp_Devanagari },
{ "Ethiopic", PT_SC, ucp_Ethiopic },
{ "Georgian", PT_SC, ucp_Georgian },
{ "Glagolitic", PT_SC, ucp_Glagolitic },
{ "Gothic", PT_SC, ucp_Gothic },
{ "Greek", PT_SC, ucp_Greek },
{ "Gujarati", PT_SC, ucp_Gujarati },
{ "Gurmukhi", PT_SC, ucp_Gurmukhi },
{ "Han", PT_SC, ucp_Han },
{ "Hangul", PT_SC, ucp_Hangul },
{ "Hanunoo", PT_SC, ucp_Hanunoo },
{ "Hebrew", PT_SC, ucp_Hebrew },
{ "Hiragana", PT_SC, ucp_Hiragana },
{ "Inherited", PT_SC, ucp_Inherited },
{ "Kannada", PT_SC, ucp_Kannada },
{ "Katakana", PT_SC, ucp_Katakana },
{ "Kharoshthi", PT_SC, ucp_Kharoshthi },
{ "Khmer", PT_SC, ucp_Khmer },
{ "L", PT_GC, ucp_L },
{ "L&", PT_LAMP, 0 },
{ "Lao", PT_SC, ucp_Lao },
{ "Latin", PT_SC, ucp_Latin },
{ "Limbu", PT_SC, ucp_Limbu },
{ "Linear_B", PT_SC, ucp_Linear_B },
{ "Ll", PT_PC, ucp_Ll },
{ "Lm", PT_PC, ucp_Lm },
{ "Lo", PT_PC, ucp_Lo },
{ "Lt", PT_PC, ucp_Lt },
{ "Lu", PT_PC, ucp_Lu },
{ "M", PT_GC, ucp_M },
{ "Malayalam", PT_SC, ucp_Malayalam },
{ "Mc", PT_PC, ucp_Mc },
{ "Me", PT_PC, ucp_Me },
{ "Mn", PT_PC, ucp_Mn },
{ "Mongolian", PT_SC, ucp_Mongolian },
{ "Myanmar", PT_SC, ucp_Myanmar },
{ "N", PT_GC, ucp_N },
{ "Nd", PT_PC, ucp_Nd },
{ "New_Tai_Lue", PT_SC, ucp_New_Tai_Lue },
{ "Nl", PT_PC, ucp_Nl },
{ "No", PT_PC, ucp_No },
{ "Ogham", PT_SC, ucp_Ogham },
{ "Old_Italic", PT_SC, ucp_Old_Italic },
{ "Old_Persian", PT_SC, ucp_Old_Persian },
{ "Oriya", PT_SC, ucp_Oriya },
{ "Osmanya", PT_SC, ucp_Osmanya },
{ "P", PT_GC, ucp_P },
{ "Pc", PT_PC, ucp_Pc },
{ "Pd", PT_PC, ucp_Pd },
{ "Pe", PT_PC, ucp_Pe },
{ "Pf", PT_PC, ucp_Pf },
{ "Pi", PT_PC, ucp_Pi },
{ "Po", PT_PC, ucp_Po },
{ "Ps", PT_PC, ucp_Ps },
{ "Runic", PT_SC, ucp_Runic },
{ "S", PT_GC, ucp_S },
{ "Sc", PT_PC, ucp_Sc },
{ "Shavian", PT_SC, ucp_Shavian },
{ "Sinhala", PT_SC, ucp_Sinhala },
{ "Sk", PT_PC, ucp_Sk },
{ "Sm", PT_PC, ucp_Sm },
{ "So", PT_PC, ucp_So },
{ "Syloti_Nagri", PT_SC, ucp_Syloti_Nagri },
{ "Syriac", PT_SC, ucp_Syriac },
{ "Tagalog", PT_SC, ucp_Tagalog },
{ "Tagbanwa", PT_SC, ucp_Tagbanwa },
{ "Tai_Le", PT_SC, ucp_Tai_Le },
{ "Tamil", PT_SC, ucp_Tamil },
{ "Telugu", PT_SC, ucp_Telugu },
{ "Thaana", PT_SC, ucp_Thaana },
{ "Thai", PT_SC, ucp_Thai },
{ "Tibetan", PT_SC, ucp_Tibetan },
{ "Tifinagh", PT_SC, ucp_Tifinagh },
{ "Ugaritic", PT_SC, ucp_Ugaritic },
{ "Yi", PT_SC, ucp_Yi },
{ "Z", PT_GC, ucp_Z },
{ "Zl", PT_PC, ucp_Zl },
{ "Zp", PT_PC, ucp_Zp },
{ "Zs", PT_PC, ucp_Zs }
};
const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table);

View File

@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2005 University of Cambridge
Copyright (c) 1997-2006 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -62,8 +62,8 @@ Arguments:
Returns: the flipped value
*/
static long int
byteflip(long int value, int n)
static unsigned long int
byteflip(unsigned long int value, int n)
{
if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
return ((value & 0x000000ff) << 24) |
@@ -94,7 +94,7 @@ Returns: the new block if is is indeed a byte-flipped regex
NULL if it is not
*/
PCRE_EXPORT real_pcre *
real_pcre *
_pcre_try_flipped(const real_pcre *re, real_pcre *internal_re,
const pcre_study_data *study, pcre_study_data *internal_study)
{

View File

@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2005 University of Cambridge
Copyright (c) 1997-2006 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -52,10 +52,13 @@ string that identifies the PCRE version that is in use. */
#define STRING(a) # a
#define XSTRING(s) STRING(s)
PCRE_EXPORT const char *
PCRE_DATA_SCOPE const char *
pcre_version(void)
{
return XSTRING(PCRE_MAJOR) "." XSTRING(PCRE_MINOR) " " XSTRING(PCRE_DATE);
return XSTRING(PCRE_MAJOR)
"." XSTRING(PCRE_MINOR)
XSTRING(PCRE_PRERELEASE)
" " XSTRING(PCRE_DATE);
}
/* End of pcre_version.c */

View File

@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2005 University of Cambridge
Copyright (c) 1997-2006 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -60,7 +60,7 @@ Arguments:
Returns: TRUE if character matches, else FALSE
*/
PCRE_EXPORT BOOL
BOOL
_pcre_xclass(int c, const uschar *data)
{
int t;
@@ -100,17 +100,40 @@ while ((t = *data++) != XCL_END)
#ifdef SUPPORT_UCP
else /* XCL_PROP & XCL_NOTPROP */
{
int chartype, othercase;
int rqdtype = *data++;
int category = _pcre_ucp_findchar(c, &chartype, &othercase);
if (rqdtype >= 128)
int chartype, script;
int category = _pcre_ucp_findprop(c, &chartype, &script);
switch(*data)
{
if ((rqdtype - 128 == category) == (t == XCL_PROP)) return !negated;
}
else
{
if ((rqdtype == chartype) == (t == XCL_PROP)) return !negated;
case PT_ANY:
if (t == XCL_PROP) return !negated;
break;
case PT_LAMP:
if ((chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt) ==
(t == XCL_PROP)) return !negated;
break;
case PT_GC:
if ((data[1] == category) == (t == XCL_PROP)) return !negated;
break;
case PT_PC:
if ((data[1] == chartype) == (t == XCL_PROP)) return !negated;
break;
case PT_SC:
if ((data[1] == script) == (t == XCL_PROP)) return !negated;
break;
/* This should never occur, but compilers may mutter if there is no
default. */
default:
return FALSE;
}
data += 2;
}
#endif /* SUPPORT_UCP */
}

View File

@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2005 University of Cambridge
Copyright (c) 1997-2006 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -77,7 +77,7 @@ static const int eint[] = {
REG_ASSERT, /* internal error: code overflow */
REG_BADPAT, /* unrecognized character after (?< */
REG_BADPAT, /* lookbehind assertion is not fixed length */
REG_BADPAT, /* malformed number after (?( */
REG_BADPAT, /* malformed number or name after (?( */
REG_BADPAT, /* conditional group containe more than two branches */
REG_BADPAT, /* assertion expected after (?( */
REG_BADPAT, /* (?R or (?digits must be followed by ) */
@@ -94,11 +94,15 @@ static const int eint[] = {
REG_BADPAT, /* recursive call could loop indefinitely */
REG_BADPAT, /* unrecognized character after (?P */
REG_BADPAT, /* syntax error after (?P */
REG_BADPAT, /* two named groups have the same name */
REG_BADPAT, /* two named subpatterns have the same name */
REG_BADPAT, /* invalid UTF-8 string */
REG_BADPAT, /* support for \P, \p, and \X has not been compiled */
REG_BADPAT, /* malformed \P or \p sequence */
REG_BADPAT /* unknown property name after \P or \p */
REG_BADPAT, /* unknown property name after \P or \p */
REG_BADPAT, /* subpattern name is too long (maximum 32 characters) */
REG_BADPAT, /* too many named subpatterns (maximum 10,000) */
REG_BADPAT, /* repeated subpattern is too long */
REG_BADPAT /* octal value is greater than \377 (not in UTF-8 mode) */
};
/* Table of texts corresponding to POSIX error codes */
@@ -131,7 +135,7 @@ static const char *const pstring[] = {
* Translate error code to string *
*************************************************/
PCRE_EXPORT size_t
PCRE_DATA_SCOPE size_t
regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
{
const char *message, *addmessage;
@@ -166,7 +170,7 @@ return length + addlength;
* Free store held by a regex *
*************************************************/
PCRE_EXPORT void
PCRE_DATA_SCOPE void
regfree(regex_t *preg)
{
(pcre_free)(preg->re_pcre);
@@ -189,7 +193,7 @@ Returns: 0 on success
various non-zero codes on failure
*/
PCRE_EXPORT int
PCRE_DATA_SCOPE int
regcomp(regex_t *preg, const char *pattern, int cflags)
{
const char *errorptr;
@@ -200,6 +204,8 @@ int options = 0;
if ((cflags & REG_ICASE) != 0) options |= PCRE_CASELESS;
if ((cflags & REG_NEWLINE) != 0) options |= PCRE_MULTILINE;
if ((cflags & REG_DOTALL) != 0) options |= PCRE_DOTALL;
if ((cflags & REG_NOSUB) != 0) options |= PCRE_NO_AUTO_CAPTURE;
if ((cflags & REG_UTF8) != 0) options |= PCRE_UTF8;
preg->re_pcre = pcre_compile2(pattern, options, &errorcode, &errorptr,
&erroffset, NULL);
@@ -223,9 +229,13 @@ substring, so we have to get and release working store instead of just using
the POSIX structures as was done in earlier releases when PCRE needed only 2
ints. However, if the number of possible capturing brackets is small, use a
block of store on the stack, to reduce the use of malloc/free. The threshold is
in a macro that can be changed at configure time. */
in a macro that can be changed at configure time.
PCRE_EXPORT int
If REG_NOSUB was specified at compile time, the PCRE_NO_AUTO_CAPTURE flag will
be set. When this is the case, the nmatch and pmatch arguments are ignored, and
the only result is yes/no/error. */
PCRE_DATA_SCOPE int
regexec(const regex_t *preg, const char *string, size_t nmatch,
regmatch_t pmatch[], int eflags)
{
@@ -234,13 +244,20 @@ int options = 0;
int *ovector = NULL;
int small_ovector[POSIX_MALLOC_THRESHOLD * 3];
BOOL allocated_ovector = FALSE;
BOOL nosub =
(((const pcre *)preg->re_pcre)->options & PCRE_NO_AUTO_CAPTURE) != 0;
if ((eflags & REG_NOTBOL) != 0) options |= PCRE_NOTBOL;
if ((eflags & REG_NOTEOL) != 0) options |= PCRE_NOTEOL;
((regex_t *)preg)->re_erroffset = (size_t)(-1); /* Only has meaning after compile */
if (nmatch > 0)
/* When no string data is being returned, ensure that nmatch is zero.
Otherwise, ensure the vector for holding the return data is large enough. */
if (nosub) nmatch = 0;
else if (nmatch > 0)
{
if (nmatch <= POSIX_MALLOC_THRESHOLD)
{
@@ -248,6 +265,7 @@ if (nmatch > 0)
}
else
{
if (nmatch > INT_MAX/(sizeof(int) * 3)) return REG_ESPACE;
ovector = (int *)malloc(sizeof(int) * nmatch * 3);
if (ovector == NULL) return REG_ESPACE;
allocated_ovector = TRUE;
@@ -262,6 +280,8 @@ if (rc == 0) rc = nmatch; /* All captured slots were filled in */
if (rc >= 0)
{
size_t i;
if (!nosub)
{
for (i = 0; i < (size_t)rc; i++)
{
pmatch[i].rm_so = ovector[i*2];
@@ -269,6 +289,7 @@ if (rc >= 0)
}
if (allocated_ovector) free(ovector);
for (; i < nmatch; i++) pmatch[i].rm_so = pmatch[i].rm_eo = -1;
}
return 0;
}

View File

@@ -9,7 +9,7 @@
Compatible Regular Expression library. It defines the things POSIX says should
be there. I hope.
Copyright (c) 1997-2005 University of Cambridge
Copyright (c) 1997-2006 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -50,22 +50,20 @@ POSSIBILITY OF SUCH DAMAGE.
extern "C" {
#endif
/* Options defined by POSIX. */
/* Options, mostly defined by POSIX, but with a couple of extras. */
#define REG_ICASE 0x01
#define REG_NEWLINE 0x02
#define REG_NOTBOL 0x04
#define REG_NOTEOL 0x08
#define REG_ICASE 0x0001
#define REG_NEWLINE 0x0002
#define REG_NOTBOL 0x0004
#define REG_NOTEOL 0x0008
#define REG_DOTALL 0x0010 /* NOT defined by POSIX. */
#define REG_NOSUB 0x0020
#define REG_UTF8 0x0040 /* NOT defined by POSIX. */
/* Additional options, not defined by POSIX, but somebody wanted them. */
#define REG_DOTALL 0x10
/* These are not used by PCRE, but by defining them we make it easier
/* This is not used by PCRE, but by defining it we make it easier
to slot PCRE into existing programs that make POSIX calls. */
#define REG_EXTENDED 0
#define REG_NOSUB 0
/* Error values. Not all these are relevant or used by the wrapper. */
@@ -107,12 +105,40 @@ typedef struct {
regoff_t rm_eo;
} regmatch_t;
/* Win32 uses DLL by default; it needs special stuff for exported functions
when building PCRE. */
#ifndef PCRE_DATA_SCOPE
#ifdef _WIN32
# ifdef PCRE_DEFINITION
# ifdef DLL_EXPORT
# define PCRE_DATA_SCOPE __declspec(dllexport)
# endif
# else
# ifndef PCRE_STATIC
# define PCRE_DATA_SCOPE extern __declspec(dllimport)
# endif
# endif
#endif
#endif
/* Otherwise, we use the standard "extern". */
#ifndef PCRE_DATA_SCOPE
# ifdef __cplusplus
# define PCRE_DATA_SCOPE extern "C"
# else
# define PCRE_DATA_SCOPE extern
# endif
#endif
/* The functions */
extern int regcomp(regex_t *, const char *, int);
extern int regexec(const regex_t *, const char *, size_t, regmatch_t *, int);
extern size_t regerror(int, const regex_t *, char *, size_t);
extern void regfree(regex_t *);
PCRE_DATA_SCOPE int regcomp(regex_t *, const char *, int);
PCRE_DATA_SCOPE int regexec(const regex_t *, const char *, size_t,
regmatch_t *, int);
PCRE_DATA_SCOPE size_t regerror(int, const regex_t *, char *, size_t);
PCRE_DATA_SCOPE void regfree(regex_t *);
#ifdef __cplusplus
} /* extern "C" */

View File

@@ -1,12 +1,14 @@
/*************************************************
* libucp - Unicode Property Table handler *
* Unicode Property Table handler *
*************************************************/
#ifndef _UCP_H
#define _UCP_H
/* These are the character categories that are returned by ucp_findchar */
/* This file contains definitions of the property values that are returned by
the function _pcre_ucp_findprop(). */
/* These are the general character categories. */
enum {
ucp_C, /* Other */
@@ -18,7 +20,7 @@ enum {
ucp_Z /* Separator */
};
/* These are the detailed character types that are returned by ucp_findchar */
/* These are the particular character types. */
enum {
ucp_Cc, /* Control */
@@ -53,6 +55,72 @@ enum {
ucp_Zs /* Space separator */
};
/* These are the script identifications. */
enum {
ucp_Arabic,
ucp_Armenian,
ucp_Bengali,
ucp_Bopomofo,
ucp_Braille,
ucp_Buginese,
ucp_Buhid,
ucp_Canadian_Aboriginal,
ucp_Cherokee,
ucp_Common,
ucp_Coptic,
ucp_Cypriot,
ucp_Cyrillic,
ucp_Deseret,
ucp_Devanagari,
ucp_Ethiopic,
ucp_Georgian,
ucp_Glagolitic,
ucp_Gothic,
ucp_Greek,
ucp_Gujarati,
ucp_Gurmukhi,
ucp_Han,
ucp_Hangul,
ucp_Hanunoo,
ucp_Hebrew,
ucp_Hiragana,
ucp_Inherited,
ucp_Kannada,
ucp_Katakana,
ucp_Kharoshthi,
ucp_Khmer,
ucp_Lao,
ucp_Latin,
ucp_Limbu,
ucp_Linear_B,
ucp_Malayalam,
ucp_Mongolian,
ucp_Myanmar,
ucp_New_Tai_Lue,
ucp_Ogham,
ucp_Old_Italic,
ucp_Old_Persian,
ucp_Oriya,
ucp_Osmanya,
ucp_Runic,
ucp_Shavian,
ucp_Sinhala,
ucp_Syloti_Nagri,
ucp_Syriac,
ucp_Tagalog,
ucp_Tagbanwa,
ucp_Tai_Le,
ucp_Tamil,
ucp_Telugu,
ucp_Thaana,
ucp_Thai,
ucp_Tibetan,
ucp_Tifinagh,
ucp_Ugaritic,
ucp_Yi
};
#endif
/* End of ucp.h */