mirror of
https://github.com/nmap/nmap.git
synced 2025-12-06 04:31:29 +00:00
Upgrading from 6.4 to 6.7
This commit is contained in:
@@ -8,7 +8,7 @@ Email domain: cam.ac.uk
|
||||
University of Cambridge Computing Service,
|
||||
Cambridge, England. Phone: +44 1223 334714.
|
||||
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
All rights reserved
|
||||
|
||||
|
||||
@@ -17,7 +17,7 @@ THE C++ WRAPPER LIBRARY
|
||||
|
||||
Written by: Google Inc.
|
||||
|
||||
Copyright (c) 2005 Google Inc
|
||||
Copyright (c) 2006 Google Inc
|
||||
All rights reserved
|
||||
|
||||
####
|
||||
|
||||
@@ -22,7 +22,7 @@ Email domain: cam.ac.uk
|
||||
University of Cambridge Computing Service,
|
||||
Cambridge, England. Phone: +44 1223 334714.
|
||||
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
All rights reserved.
|
||||
|
||||
|
||||
@@ -31,7 +31,7 @@ THE C++ WRAPPER FUNCTIONS
|
||||
|
||||
Contributed by: Google Inc.
|
||||
|
||||
Copyright (c) 2005, Google Inc.
|
||||
Copyright (c) 2006, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
|
||||
|
||||
@@ -83,6 +83,7 @@ CXX = @CXX@
|
||||
CFLAGS = @CFLAGS@
|
||||
CXXFLAGS = @CXXFLAGS@
|
||||
LDFLAGS = @LDFLAGS@
|
||||
CXXLDFLAGS = @CXXLDFLAGS@
|
||||
|
||||
CC_FOR_BUILD = @CC_FOR_BUILD@
|
||||
CFLAGS_FOR_BUILD = @CFLAGS_FOR_BUILD@
|
||||
@@ -94,7 +95,7 @@ UCP = @UCP@
|
||||
NEWLINE = @NEWLINE@
|
||||
POSIX_MALLOC_THRESHOLD = @POSIX_MALLOC_THRESHOLD@
|
||||
LINK_SIZE = @LINK_SIZE@
|
||||
MATCH_LIMIT = @MATCH_LIMIT@
|
||||
MATCH_LIMIT = @MATCH_LIMIT@ @MATCH_LIMIT_RECURSION@
|
||||
NO_RECURSE = @NO_RECURSE@
|
||||
EBCDIC = @EBCDIC@
|
||||
|
||||
@@ -139,83 +140,83 @@ pcre_chartables.@OBJEXT@: pcre_chartables.c
|
||||
$(CC) -c $(CFLAGS) $(INCLS) $(DEFS) \
|
||||
$(POSIX_MALLOC_THRESHOLD) pcre_chartables.c
|
||||
|
||||
pcre_compile.@OBJEXT@: Makefile config.h pcre.h \
|
||||
pcre_compile.@OBJEXT@: Makefile config.h $(top_srcdir)/pcre.h \
|
||||
$(top_srcdir)/pcre_internal.h $(top_srcdir)/pcre_compile.c
|
||||
$(CC) -c $(CFLAGS) $(INCLS) $(DEFS) $(POSIX_MALLOC_THRESHOLD) \
|
||||
$(top_srcdir)/pcre_compile.c
|
||||
|
||||
pcre_config.@OBJEXT@: Makefile config.h pcre.h \
|
||||
pcre_config.@OBJEXT@: Makefile config.h $(top_srcdir)/pcre.h \
|
||||
$(top_srcdir)/pcre_internal.h $(top_srcdir)/pcre_config.c
|
||||
$(CC) -c $(CFLAGS) $(INCLS) $(DEFS) $(POSIX_MALLOC_THRESHOLD) \
|
||||
$(top_srcdir)/pcre_config.c
|
||||
|
||||
pcre_dfa_exec.@OBJEXT@: Makefile config.h pcre.h \
|
||||
pcre_dfa_exec.@OBJEXT@: Makefile config.h $(top_srcdir)/pcre.h \
|
||||
$(top_srcdir)/pcre_internal.h $(top_srcdir)/pcre_dfa_exec.c
|
||||
$(CC) -c $(CFLAGS) $(INCLS) $(DEFS) $(POSIX_MALLOC_THRESHOLD) \
|
||||
$(top_srcdir)/pcre_dfa_exec.c
|
||||
|
||||
pcre_exec.@OBJEXT@: Makefile config.h pcre.h \
|
||||
pcre_exec.@OBJEXT@: Makefile config.h $(top_srcdir)/pcre.h \
|
||||
$(top_srcdir)/pcre_internal.h $(top_srcdir)/pcre_exec.c
|
||||
$(CC) -c $(CFLAGS) $(INCLS) $(DEFS) $(POSIX_MALLOC_THRESHOLD) \
|
||||
$(top_srcdir)/pcre_exec.c
|
||||
|
||||
pcre_fullinfo.@OBJEXT@: Makefile config.h pcre.h \
|
||||
pcre_fullinfo.@OBJEXT@: Makefile config.h $(top_srcdir)/pcre.h \
|
||||
$(top_srcdir)/pcre_internal.h $(top_srcdir)/pcre_fullinfo.c
|
||||
$(CC) -c $(CFLAGS) $(INCLS) $(DEFS) $(POSIX_MALLOC_THRESHOLD) \
|
||||
$(top_srcdir)/pcre_fullinfo.c
|
||||
|
||||
pcre_get.@OBJEXT@: Makefile config.h pcre.h \
|
||||
pcre_get.@OBJEXT@: Makefile config.h $(top_srcdir)/pcre.h \
|
||||
$(top_srcdir)/pcre_internal.h $(top_srcdir)/pcre_get.c
|
||||
$(CC) -c $(CFLAGS) $(INCLS) $(DEFS) $(POSIX_MALLOC_THRESHOLD) \
|
||||
$(top_srcdir)/pcre_get.c
|
||||
|
||||
pcre_globals.@OBJEXT@: Makefile config.h pcre.h \
|
||||
pcre_globals.@OBJEXT@: Makefile config.h $(top_srcdir)/pcre.h \
|
||||
$(top_srcdir)/pcre_internal.h $(top_srcdir)/pcre_globals.c
|
||||
$(CC) -c $(CFLAGS) $(INCLS) $(DEFS) $(POSIX_MALLOC_THRESHOLD) \
|
||||
$(top_srcdir)/pcre_globals.c
|
||||
|
||||
pcre_info.@OBJEXT@: Makefile config.h pcre.h \
|
||||
pcre_info.@OBJEXT@: Makefile config.h $(top_srcdir)/pcre.h \
|
||||
$(top_srcdir)/pcre_internal.h $(top_srcdir)/pcre_info.c
|
||||
$(CC) -c $(CFLAGS) $(INCLS) $(DEFS) $(POSIX_MALLOC_THRESHOLD) \
|
||||
$(top_srcdir)/pcre_info.c
|
||||
|
||||
pcre_maketables.@OBJEXT@: Makefile config.h pcre.h \
|
||||
pcre_maketables.@OBJEXT@: Makefile config.h $(top_srcdir)/pcre.h \
|
||||
$(top_srcdir)/pcre_internal.h $(top_srcdir)/pcre_maketables.c
|
||||
$(CC) -c $(CFLAGS) $(INCLS) $(DEFS) $(POSIX_MALLOC_THRESHOLD) \
|
||||
$(top_srcdir)/pcre_maketables.c
|
||||
|
||||
pcre_refcount.@OBJEXT@: Makefile config.h pcre.h \
|
||||
pcre_refcount.@OBJEXT@: Makefile config.h $(top_srcdir)/pcre.h \
|
||||
$(top_srcdir)/pcre_internal.h $(top_srcdir)/pcre_refcount.c
|
||||
$(CC) -c $(CFLAGS) $(INCLS) $(DEFS) $(POSIX_MALLOC_THRESHOLD) \
|
||||
$(top_srcdir)/pcre_refcount.c
|
||||
|
||||
pcre_study.@OBJEXT@: Makefile config.h pcre.h \
|
||||
pcre_study.@OBJEXT@: Makefile config.h $(top_srcdir)/pcre.h \
|
||||
$(top_srcdir)/pcre_internal.h $(top_srcdir)/pcre_study.c
|
||||
$(CC) -c $(CFLAGS) $(INCLS) $(DEFS) $(POSIX_MALLOC_THRESHOLD) \
|
||||
$(top_srcdir)/pcre_study.c
|
||||
|
||||
pcre_tables.@OBJEXT@: Makefile config.h pcre.h \
|
||||
pcre_tables.@OBJEXT@: Makefile config.h $(top_srcdir)/pcre.h \
|
||||
$(top_srcdir)/pcre_internal.h $(top_srcdir)/pcre_tables.c
|
||||
$(CC) -c $(CFLAGS) $(INCLS) $(DEFS) $(POSIX_MALLOC_THRESHOLD) \
|
||||
$(top_srcdir)/pcre_tables.c
|
||||
|
||||
pcre_try_flipped.@OBJEXT@: Makefile config.h pcre.h \
|
||||
pcre_try_flipped.@OBJEXT@: Makefile config.h $(top_srcdir)/pcre.h \
|
||||
$(top_srcdir)/pcre_internal.h $(top_srcdir)/pcre_try_flipped.c
|
||||
$(CC) -c $(CFLAGS) $(INCLS) $(DEFS) $(POSIX_MALLOC_THRESHOLD) \
|
||||
$(top_srcdir)/pcre_try_flipped.c
|
||||
|
||||
pcre_version.@OBJEXT@: Makefile config.h pcre.h \
|
||||
pcre_version.@OBJEXT@: Makefile config.h $(top_srcdir)/pcre.h \
|
||||
$(top_srcdir)/pcre_internal.h $(top_srcdir)/pcre_version.c
|
||||
$(CC) -c $(CFLAGS) $(INCLS) $(DEFS) $(POSIX_MALLOC_THRESHOLD) \
|
||||
$(top_srcdir)/pcre_version.c
|
||||
|
||||
pcre_xclass.@OBJEXT@: Makefile config.h pcre.h \
|
||||
pcre_xclass.@OBJEXT@: Makefile config.h $(top_srcdir)/pcre.h \
|
||||
$(top_srcdir)/pcre_internal.h $(top_srcdir)/pcre_xclass.c
|
||||
$(CC) -c $(CFLAGS) $(INCLS) $(DEFS) $(POSIX_MALLOC_THRESHOLD) \
|
||||
$(top_srcdir)/pcre_xclass.c
|
||||
|
||||
pcreposix.@OBJEXT@: $(top_srcdir)/pcreposix.c $(top_srcdir)/pcreposix.h \
|
||||
$(top_srcdir)/pcre_internal.h pcre.h config.h Makefile
|
||||
$(top_srcdir)/pcre_internal.h $(top_srcdir)/pcre.h config.h Makefile
|
||||
@$(LTCOMPILE) $(POSIX_MALLOC_THRESHOLD) $(top_srcdir)/pcreposix.c
|
||||
|
||||
$(TARGET): $(OBJ)
|
||||
@@ -231,7 +232,7 @@ pcre_chartables.c: dftables@BUILD_EXEEXT@
|
||||
|
||||
dftables.@BUILD_OBJEXT@: $(top_srcdir)/dftables.c \
|
||||
$(top_srcdir)/pcre_maketables.c $(top_srcdir)/pcre_internal.h \
|
||||
pcre.h config.h Makefile
|
||||
$(top_srcdir)/pcre.h config.h Makefile
|
||||
$(CC) -c $(CFLAGS) $(INCLS) $(DEFS) -I. $(top_srcdir)/dftables.c
|
||||
|
||||
dftables@BUILD_EXEEXT@: dftables.@BUILD_OBJEXT@
|
||||
@@ -247,7 +248,7 @@ clean:; -rm -rf *.@OBJEXT@ *.lo *.a *.la .libs pcretest@EXEEXT@ pcre_str
|
||||
|
||||
distclean: clean
|
||||
-rm -f pcre_chartables.c libtool pcre-config libpcre.pc \
|
||||
pcre.h pcre_stringpiece.h pcrecpp.h \
|
||||
pcre_stringpiece.h pcrecpparg.h \
|
||||
dftables@EXEEXT@ RunGrepTest RunTest \
|
||||
Makefile config.h config.status config.log config.cache
|
||||
|
||||
|
||||
@@ -11,8 +11,9 @@ the Contrib directory on the ftp site that you may find useful. See
|
||||
|
||||
If you want to compile PCRE for a non-Unix system (or perhaps, more strictly,
|
||||
for a system that does not support "configure" and "make" files), note that
|
||||
PCRE consists entirely of code written in Standard C, and so should compile
|
||||
successfully on any system that has a Standard C compiler and library.
|
||||
the basic PCRE library consists entirely of code written in Standard C, and so
|
||||
should compile successfully on any system that has a Standard C compiler and
|
||||
library. The C++ wrapper functions are a separate issue (see below).
|
||||
|
||||
|
||||
GENERIC INSTRUCTIONS FOR THE C LIBRARY
|
||||
@@ -34,27 +35,16 @@ your compiler gives to '\n'.
|
||||
rem Use write, because notepad cannot handle UNIX files. Change values.
|
||||
write config.h
|
||||
|
||||
(2) Copy or rename the file pcre.in as pcre.h, and change the macro definitions
|
||||
for PCRE_MAJOR, PCRE_MINOR, and PCRE_DATE near its start to the values set in
|
||||
configure.in.
|
||||
|
||||
rem Mark Tetrode's commands
|
||||
copy pcre.in pcre.h
|
||||
rem Read values from configure.in
|
||||
write configure.in
|
||||
rem Change values
|
||||
write pcre.h
|
||||
|
||||
(3) Compile dftables.c as a stand-alone program, and then run it with
|
||||
(2) Compile dftables.c as a stand-alone program, and then run it with
|
||||
the single argument "pcre_chartables.c". This generates a set of standard
|
||||
character tables and writes them to that file.
|
||||
|
||||
rem Mark Tetrode's commands
|
||||
rem Compile & run
|
||||
cl -DSUPPORT_UTF8 -DSUPPORT_UCP dftables.c
|
||||
dftables.exe chartables.c
|
||||
dftables.exe pcre_chartables.c
|
||||
|
||||
(4) Compile the following source files:
|
||||
(3) Compile the following source files:
|
||||
|
||||
pcre_chartables.c
|
||||
pcre_compile.c
|
||||
@@ -67,12 +57,11 @@ character tables and writes them to that file.
|
||||
pcre_info.c
|
||||
pcre_maketables.c
|
||||
pcre_ord2utf8.c
|
||||
pcre_printint.c
|
||||
pcre_refcount.c
|
||||
pcre_study.c
|
||||
pcre_tables.c
|
||||
pcre_try_flipped.c
|
||||
pcre_ucp_findchar.c
|
||||
pcre_ucp_searchfuncs.c
|
||||
pcre_valid_utf8.c
|
||||
pcre_version.c
|
||||
pcre_xclass.c
|
||||
@@ -88,7 +77,7 @@ shared libraries, you may have to do this once for each type.
|
||||
cl -DSUPPORT_UTF8 -DSUPPORT_UCP -DPOSIX_MALLOC_THRESHOLD=10 /c maketables.c get.c study.c pcre.c
|
||||
lib /OUT:pcre.lib maketables.obj get.obj study.obj pcre.obj
|
||||
|
||||
(5) Similarly, compile pcreposix.c and link it (on its own) as the pcreposix
|
||||
(4) Similarly, compile pcreposix.c and link it (on its own) as the pcreposix
|
||||
library.
|
||||
|
||||
rem Mark Tetrode's commands, for a static library
|
||||
@@ -96,14 +85,14 @@ library.
|
||||
cl -DSUPPORT_UTF8 -DSUPPORT_UCP -DPOSIX_MALLOC_THRESHOLD=10 /c pcreposix.c
|
||||
lib /OUT:pcreposix.lib pcreposix.obj
|
||||
|
||||
(6) Compile the test program pcretest.c. This needs the functions in the
|
||||
(5) Compile the test program pcretest.c. This needs the functions in the
|
||||
pcre and pcreposix libraries when linking.
|
||||
|
||||
rem Mark Tetrode's commands
|
||||
rem compile & link
|
||||
cl /F0x400000 pcretest.c pcre.lib pcreposix.lib
|
||||
|
||||
(7) Run pcretest on the testinput files in the testdata directory, and check
|
||||
(6) Run pcretest on the testinput files in the testdata directory, and check
|
||||
that the output matches the corresponding testoutput files. You must use the
|
||||
-i option when checking testinput2. Note that the supplied files are in Unix
|
||||
format, with just LF characters as line terminators. You may need to edit them
|
||||
@@ -126,6 +115,9 @@ to change this if your system uses a different convention.
|
||||
Note that there are now three more tests (7, 8, 9) that did not exist when Mark
|
||||
wrote those comments. The test the new pcre_dfa_exec() function.
|
||||
|
||||
(7) If you want to use the pcregrep command, compile and link pcregrep.c; it
|
||||
uses only the basic PCRE library.
|
||||
|
||||
|
||||
THE C++ WRAPPER FUNCTIONS
|
||||
|
||||
|
||||
@@ -34,7 +34,7 @@ Documentation for PCRE
|
||||
----------------------
|
||||
|
||||
If you install PCRE in the normal way, you will end up with an installed set of
|
||||
man pages whose names all start with "pcre". The one that is called "pcre"
|
||||
man pages whose names all start with "pcre". The one that is just called "pcre"
|
||||
lists all the others. In addition to these man pages, the PCRE documentation is
|
||||
supplied in two other forms; however, as there is no standard place to install
|
||||
them, they are left in the doc directory of the unpacked source distribution.
|
||||
@@ -68,6 +68,9 @@ others are pointers to URLs containing relevant files.
|
||||
Building PCRE on a Unix-like system
|
||||
-----------------------------------
|
||||
|
||||
If you are using HP's ANSI C++ compiler (aCC), please see the special note
|
||||
in the section entitled "Using HP's ANSI C++ compiler (aCC)" below.
|
||||
|
||||
To build PCRE on a Unix-like system, first run the "configure" command from the
|
||||
PCRE distribution directory, with your current directory set to the directory
|
||||
where you want the files to be created. This command is a standard GNU
|
||||
@@ -91,6 +94,10 @@ into /source/pcre/pcre-xxx, but you want to build it in /build/pcre/pcre-xxx:
|
||||
cd /build/pcre/pcre-xxx
|
||||
/source/pcre/pcre-xxx/configure
|
||||
|
||||
PCRE is written in C and is normally compiled as a C library. However, it is
|
||||
possible to build it as a C++ library, though the provided building apparatus
|
||||
does not have any features to support this.
|
||||
|
||||
There are some optional features that can be included or omitted from the PCRE
|
||||
library. You can read more about them in the pcrebuild man page.
|
||||
|
||||
@@ -107,15 +114,17 @@ library. You can read more about them in the pcrebuild man page.
|
||||
. If, in addition to support for UTF-8 character strings, you want to include
|
||||
support for the \P, \p, and \X sequences that recognize Unicode character
|
||||
properties, you must add --enable-unicode-properties to the "configure"
|
||||
command. This adds about 90K to the size of the library (in the form of a
|
||||
command. This adds about 30K to the size of the library (in the form of a
|
||||
property table); only the basic two-letter properties such as Lu are
|
||||
supported.
|
||||
|
||||
. You can build PCRE to recognized CR or NL as the newline character, instead
|
||||
of whatever your compiler uses for "\n", by adding --newline-is-cr or
|
||||
--newline-is-nl to the "configure" command, respectively. Only do this if you
|
||||
really understand what you are doing. On traditional Unix-like systems, the
|
||||
newline character is NL.
|
||||
. You can build PCRE to recognize either CR or LF or the sequence CRLF as
|
||||
indicating the end of a line. Whatever you specify at build time is the
|
||||
default; the caller of PCRE can change the selection at run time. The default
|
||||
newline indicator is a single LF character (the Unix standard). You can
|
||||
specify the default newline indicator by adding --newline-is-cr or
|
||||
--newline-is-lf or --newline-is-crlf to the "configure" command,
|
||||
respectively.
|
||||
|
||||
. When called via the POSIX interface, PCRE uses malloc() to get additional
|
||||
storage for processing capturing parentheses if there are more than 10 of
|
||||
@@ -135,6 +144,16 @@ library. You can read more about them in the pcrebuild man page.
|
||||
pcre_exec() can supply their own value. There is discussion on the pcreapi
|
||||
man page.
|
||||
|
||||
. There is a separate counter that limits the depth of recursive function calls
|
||||
during a matching process. This also has a default of ten million, which is
|
||||
essentially "unlimited". You can change the default by setting, for example,
|
||||
|
||||
--with-match-limit-recursion=500000
|
||||
|
||||
Recursive function calls use up the runtime stack; running out of stack can
|
||||
cause programs to crash in strange ways. There is a discussion about stack
|
||||
sizes in the pcrestack man page.
|
||||
|
||||
. The default maximum compiled pattern size is around 64K. You can increase
|
||||
this by adding --with-link-size=3 to the "configure" command. You can
|
||||
increase it even more by setting --with-link-size=4, but this is unlikely
|
||||
@@ -158,7 +177,6 @@ library. You can read more about them in the pcrebuild man page.
|
||||
|
||||
The "configure" script builds eight files for the basic C library:
|
||||
|
||||
. pcre.h is the header file for C programs that call PCRE
|
||||
. Makefile is the makefile that builds the library
|
||||
. config.h contains build-time configuration options for the library
|
||||
. pcre-config is a script that shows the settings of "configure" options
|
||||
@@ -262,6 +280,22 @@ when calling the "configure" command. If they are not specified, they default
|
||||
to the values of CC and CFLAGS.
|
||||
|
||||
|
||||
Using HP's ANSI C++ compiler (aCC)
|
||||
----------------------------------
|
||||
|
||||
Unless C++ support is disabled by specifiying the "--disable-cpp" option of the
|
||||
"configure" script, you *must* include the "-AA" option in the CXXFLAGS
|
||||
environment variable in order for the C++ components to compile correctly.
|
||||
|
||||
Also, note that the aCC compiler on PA-RISC platforms may have a defect whereby
|
||||
needed libraries fail to get included when specifying the "-AA" compiler
|
||||
option. If you experience unresolved symbols when linking the C++ programs,
|
||||
use the workaround of specifying the following environment variable prior to
|
||||
running the "configure" script:
|
||||
|
||||
CXXLDFLAGS="-lstd_v2 -lCsup_v2"
|
||||
|
||||
|
||||
Building on non-Unix systems
|
||||
----------------------------
|
||||
|
||||
@@ -409,28 +443,28 @@ The distribution should contain the following files:
|
||||
pcre_info.c )
|
||||
pcre_maketables.c )
|
||||
pcre_ord2utf8.c )
|
||||
pcre_printint.c )
|
||||
pcre_refcount.c )
|
||||
pcre_study.c )
|
||||
pcre_tables.c )
|
||||
pcre_try_flipped.c )
|
||||
pcre_ucp_findchar.c )
|
||||
pcre_ucp_searchfuncs.c)
|
||||
pcre_valid_utf8.c )
|
||||
pcre_version.c )
|
||||
pcre_xclass.c )
|
||||
|
||||
ucp_findchar.c )
|
||||
ucp.h ) source for the code that is used for
|
||||
ucpinternal.h ) Unicode property handling
|
||||
ucptable.c )
|
||||
ucptypetable.c )
|
||||
|
||||
pcre.in "source" for the header for the external API; pcre.h
|
||||
is built from this by "configure"
|
||||
pcre_printint.src ) debugging function that is #included in pcretest, and
|
||||
) can also be #included in pcre_compile()
|
||||
|
||||
pcre.h the public PCRE header file
|
||||
pcreposix.h header for the external POSIX wrapper API
|
||||
pcre_internal.h header for internal use
|
||||
ucp.h ) headers concerned with
|
||||
ucpinternal.h ) Unicode property handling
|
||||
config.in template for config.h, which is built by configure
|
||||
|
||||
pcrecpp.h.in "source" for the header file for the C++ wrapper
|
||||
pcrecpp.h the header file for the C++ wrapper
|
||||
pcrecpparg.h.in "source" for another C++ header file
|
||||
pcrecpp.cc )
|
||||
pcre_scanner.cc ) source for the C++ wrapper library
|
||||
|
||||
@@ -453,8 +487,9 @@ The distribution should contain the following files:
|
||||
RunGrepTest.in template for a Unix shell script for pcregrep tests
|
||||
config.guess ) files used by libtool,
|
||||
config.sub ) used only when building a shared library
|
||||
config.h.in "source" for the config.h header file
|
||||
configure a configuring shell script (built by autoconf)
|
||||
configure.in the autoconf input used to build configure
|
||||
configure.ac the autoconf input used to build configure
|
||||
doc/Tech.Notes notes on the encoding
|
||||
doc/*.3 man page sources for the PCRE functions
|
||||
doc/*.1 man page sources for pcregrep and pcretest
|
||||
@@ -482,7 +517,6 @@ The distribution should contain the following files:
|
||||
|
||||
libpcre.def
|
||||
libpcreposix.def
|
||||
pcre.def
|
||||
|
||||
(D) Auxiliary file for VPASCAL
|
||||
|
||||
@@ -491,4 +525,4 @@ The distribution should contain the following files:
|
||||
Philip Hazel
|
||||
Email local part: ph10
|
||||
Email domain: cam.ac.uk
|
||||
August 2005
|
||||
June 2006
|
||||
|
||||
@@ -1,15 +1,16 @@
|
||||
|
||||
/* On Unix systems config.in is converted by configure into config.h. PCRE is
|
||||
written in Standard C, but there are a few non-standard things it can cope
|
||||
with, allowing it to run on SunOS4 and other "close to standard" systems.
|
||||
/* On Unix-like systems config.in is converted by "configure" into config.h.
|
||||
Some other environments also support the use of "configure". PCRE is written in
|
||||
Standard C, but there are a few non-standard things it can cope with, allowing
|
||||
it to run on SunOS4 and other "close to standard" systems.
|
||||
|
||||
On a non-Unix system you should just copy this file into config.h, and set up
|
||||
the macros the way you need them. You should normally change the definitions of
|
||||
HAVE_STRERROR and HAVE_MEMMOVE to 1. Unfortunately, because of the way autoconf
|
||||
works, these cannot be made the defaults. If your system has bcopy() and not
|
||||
memmove(), change the definition of HAVE_BCOPY instead of HAVE_MEMMOVE. If your
|
||||
system has neither bcopy() nor memmove(), leave them both as 0; an emulation
|
||||
function will be used. */
|
||||
On a non-Unix-like system you should just copy this file into config.h, and set
|
||||
up the macros the way you need them. You should normally change the definitions
|
||||
of HAVE_STRERROR and HAVE_MEMMOVE to 1. Unfortunately, because of the way
|
||||
autoconf works, these cannot be made the defaults. If your system has bcopy()
|
||||
and not memmove(), change the definition of HAVE_BCOPY instead of HAVE_MEMMOVE.
|
||||
If your system has neither bcopy() nor memmove(), leave them both as 0; an
|
||||
emulation function will be used. */
|
||||
|
||||
/* If you are compiling for a system that uses EBCDIC instead of ASCII
|
||||
character codes, define this macro as 1. On systems that can use "configure",
|
||||
@@ -19,76 +20,70 @@ this can be done via --enable-ebcdic. */
|
||||
#define EBCDIC 0
|
||||
#endif
|
||||
|
||||
/* If you are compiling for a system that needs some magic to be inserted
|
||||
before the definition of an exported function, define this macro to contain the
|
||||
relevant magic. It apears at the start of every exported function. */
|
||||
/* If you are compiling for a system other than a Unix-like system or Win32,
|
||||
and it needs some magic to be inserted before the definition of a function that
|
||||
is exported by the library, define this macro to contain the relevant magic. If
|
||||
you do not define this macro, it defaults to "extern" for a C compiler and
|
||||
"extern C" for a C++ compiler on non-Win32 systems. This macro apears at the
|
||||
start of every exported function that is part of the external API. It does not
|
||||
appear on functions that are "external" in the C sense, but which are internal
|
||||
to the library. */
|
||||
|
||||
#define PCRE_EXPORT
|
||||
/* #define PCRE_DATA_SCOPE */
|
||||
|
||||
/* Define to empty if the "const" keyword does not work. */
|
||||
/* Define the following macro to empty if the "const" keyword does not work. */
|
||||
|
||||
#undef const
|
||||
|
||||
/* Define to "unsigned" if <stddef.h> doesn't define size_t. */
|
||||
/* Define the following macro to "unsigned" if <stddef.h> does not define
|
||||
size_t. */
|
||||
|
||||
#undef size_t
|
||||
|
||||
/* The following two definitions are mainly for the benefit of SunOS4, which
|
||||
doesn't have the strerror() or memmove() functions that should be present in
|
||||
does not have the strerror() or memmove() functions that should be present in
|
||||
all Standard C libraries. The macros HAVE_STRERROR and HAVE_MEMMOVE should
|
||||
normally be defined with the value 1 for other systems, but unfortunately we
|
||||
can't make this the default because "configure" files generated by autoconf
|
||||
cannot make this the default because "configure" files generated by autoconf
|
||||
will only change 0 to 1; they won't change 1 to 0 if the functions are not
|
||||
found. */
|
||||
|
||||
#define HAVE_STRERROR 0
|
||||
#define HAVE_MEMMOVE 0
|
||||
|
||||
/* There are some non-Unix systems that don't even have bcopy(). If this macro
|
||||
is false, an emulation is used. If HAVE_MEMMOVE is set to 1, the value of
|
||||
/* There are some non-Unix-like systems that don't even have bcopy(). If this
|
||||
macro is false, an emulation is used. If HAVE_MEMMOVE is set to 1, the value of
|
||||
HAVE_BCOPY is not relevant. */
|
||||
|
||||
#define HAVE_BCOPY 0
|
||||
|
||||
/* The value of NEWLINE determines the newline character. The default is to
|
||||
leave it up to the compiler, but some sites want to force a particular value.
|
||||
On Unix systems, "configure" can be used to override this default. */
|
||||
On Unix-like systems, "configure" can be used to override this default. */
|
||||
|
||||
#ifndef NEWLINE
|
||||
#define NEWLINE '\n'
|
||||
#endif
|
||||
|
||||
/* The value of LINK_SIZE determines the number of bytes used to store
|
||||
links as offsets within the compiled regex. The default is 2, which allows for
|
||||
compiled patterns up to 64K long. This covers the vast majority of cases.
|
||||
However, PCRE can also be compiled to use 3 or 4 bytes instead. This allows for
|
||||
longer patterns in extreme cases. On Unix systems, "configure" can be used to
|
||||
override this default. */
|
||||
/* The value of LINK_SIZE determines the number of bytes used to store links as
|
||||
offsets within the compiled regex. The default is 2, which allows for compiled
|
||||
patterns up to 64K long. This covers the vast majority of cases. However, PCRE
|
||||
can also be compiled to use 3 or 4 bytes instead. This allows for longer
|
||||
patterns in extreme cases. On systems that support it, "configure" can be used
|
||||
to override this default. */
|
||||
|
||||
#ifndef LINK_SIZE
|
||||
#define LINK_SIZE 2
|
||||
#endif
|
||||
|
||||
/* The value of MATCH_LIMIT determines the default number of times the match()
|
||||
function can be called during a single execution of pcre_exec(). (There is a
|
||||
runtime method of setting a different limit.) The limit exists in order to
|
||||
catch runaway regular expressions that take for ever to determine that they do
|
||||
not match. The default is set very large so that it does not accidentally catch
|
||||
legitimate cases. On Unix systems, "configure" can be used to override this
|
||||
default default. */
|
||||
|
||||
#ifndef MATCH_LIMIT
|
||||
#define MATCH_LIMIT 10000000
|
||||
#endif
|
||||
|
||||
/* When calling PCRE via the POSIX interface, additional working storage is
|
||||
required for holding the pointers to capturing substrings because PCRE requires
|
||||
three integers per substring, whereas the POSIX interface provides only two. If
|
||||
the number of expected substrings is small, the wrapper function uses space on
|
||||
the stack, because this is faster than using malloc() for each call. The
|
||||
threshold above which the stack is no longer use is defined by POSIX_MALLOC_
|
||||
THRESHOLD. On Unix systems, "configure" can be used to override this default.
|
||||
*/
|
||||
threshold above which the stack is no longer used is defined by POSIX_MALLOC_
|
||||
THRESHOLD. On systems that support it, "configure" can be used to override this
|
||||
default. */
|
||||
|
||||
#ifndef POSIX_MALLOC_THRESHOLD
|
||||
#define POSIX_MALLOC_THRESHOLD 10
|
||||
@@ -97,11 +92,52 @@ THRESHOLD. On Unix systems, "configure" can be used to override this default.
|
||||
/* PCRE uses recursive function calls to handle backtracking while matching.
|
||||
This can sometimes be a problem on systems that have stacks of limited size.
|
||||
Define NO_RECURSE to get a version that doesn't use recursion in the match()
|
||||
function; instead it creates its own stack by steam using pcre_recurse_malloc
|
||||
to get memory. For more detail, see comments and other stuff just above the
|
||||
match() function. On Unix systems, "configure" can be used to set this in the
|
||||
Makefile (use --disable-stack-for-recursion). */
|
||||
function; instead it creates its own stack by steam using pcre_recurse_malloc()
|
||||
to obtain memory from the heap. For more detail, see the comments and other
|
||||
stuff just above the match() function. On systems that support it, "configure"
|
||||
can be used to set this in the Makefile (use --disable-stack-for-recursion). */
|
||||
|
||||
/* #define NO_RECURSE */
|
||||
|
||||
/* The value of MATCH_LIMIT determines the default number of times the internal
|
||||
match() function can be called during a single execution of pcre_exec(). There
|
||||
is a runtime interface for setting a different limit. The limit exists in order
|
||||
to catch runaway regular expressions that take for ever to determine that they
|
||||
do not match. The default is set very large so that it does not accidentally
|
||||
catch legitimate cases. On systems that support it, "configure" can be used to
|
||||
override this default default. */
|
||||
|
||||
#ifndef MATCH_LIMIT
|
||||
#define MATCH_LIMIT 10000000
|
||||
#endif
|
||||
|
||||
/* The above limit applies to all calls of match(), whether or not they
|
||||
increase the recursion depth. In some environments it is desirable to limit the
|
||||
depth of recursive calls of match() more strictly, in order to restrict the
|
||||
maximum amount of stack (or heap, if NO_RECURSE is defined) that is used. The
|
||||
value of MATCH_LIMIT_RECURSION applies only to recursive calls of match(). To
|
||||
have any useful effect, it must be less than the value of MATCH_LIMIT. There is
|
||||
a runtime method for setting a different limit. On systems that support it,
|
||||
"configure" can be used to override this default default. */
|
||||
|
||||
#ifndef MATCH_LIMIT_RECURSION
|
||||
#define MATCH_LIMIT_RECURSION MATCH_LIMIT
|
||||
#endif
|
||||
|
||||
/* These three limits are parameterized just in case anybody ever wants to
|
||||
change them. Care must be taken if they are increased, because they guard
|
||||
against integer overflow caused by enormously large patterns. */
|
||||
|
||||
#ifndef MAX_NAME_SIZE
|
||||
#define MAX_NAME_SIZE 32
|
||||
#endif
|
||||
|
||||
#ifndef MAX_NAME_COUNT
|
||||
#define MAX_NAME_COUNT 10000
|
||||
#endif
|
||||
|
||||
#ifndef MAX_DUPLENGTH
|
||||
#define MAX_DUPLENGTH 30000
|
||||
#endif
|
||||
|
||||
/* End */
|
||||
|
||||
52
libpcre/configure
vendored
52
libpcre/configure
vendored
@@ -272,6 +272,7 @@ PACKAGE_STRING=
|
||||
PACKAGE_BUGREPORT=
|
||||
|
||||
ac_unique_file="dftables.c"
|
||||
ac_unique_file="pcre.h"
|
||||
# Factoring default headers for most tests.
|
||||
ac_includes_default="\
|
||||
#include <stdio.h>
|
||||
@@ -309,7 +310,7 @@ ac_includes_default="\
|
||||
# include <unistd.h>
|
||||
#endif"
|
||||
|
||||
ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC EXEEXT OBJEXT RANLIB ac_ct_RANLIB CPP EGREP pcre_have_long_long pcre_have_ulong_long build build_cpu build_vendor build_os host host_cpu host_vendor host_os BUILD_EXEEXT BUILD_OBJEXT CC_FOR_BUILD CFLAGS_FOR_BUILD EBCDIC HAVE_MEMMOVE HAVE_STRERROR LINK_SIZE MATCH_LIMIT NEWLINE NO_RECURSE PCRE_MAJOR PCRE_MINOR PCRE_DATE PCRE_VERSION PCRE_LIB_VERSION PCRE_POSIXLIB_VERSION POSIX_MALLOC_THRESHOLD UCP UTF8 POSIX_OBJ POSIX_LOBJ POSIX_LIB LIBOBJS LTLIBOBJS'
|
||||
ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC EXEEXT OBJEXT RANLIB ac_ct_RANLIB CPP EGREP pcre_have_long_long pcre_have_ulong_long build build_cpu build_vendor build_os host host_cpu host_vendor host_os BUILD_EXEEXT BUILD_OBJEXT CC_FOR_BUILD CFLAGS_FOR_BUILD CXXLDFLAGS EBCDIC HAVE_MEMMOVE HAVE_STRERROR LINK_SIZE MATCH_LIMIT MATCH_LIMIT_RECURSION NEWLINE NO_RECURSE PCRE_LIB_VERSION PCRE_POSIXLIB_VERSION PCRE_VERSION POSIX_MALLOC_THRESHOLD UCP UTF8 POSIX_OBJ POSIX_LOBJ POSIX_LIB LIBOBJS LTLIBOBJS'
|
||||
ac_subst_files=''
|
||||
|
||||
# Initialize some variables set by options.
|
||||
@@ -848,15 +849,17 @@ Optional Features:
|
||||
--enable-unicode-properties enable Unicode properties support
|
||||
--enable-newline-is-cr use CR as the newline character
|
||||
--enable-newline-is-lf use LF as the newline character
|
||||
--enable-newline-is-crlf use CRLF as the newline sequence
|
||||
--enable-ebcdic assume EBCDIC coding rather than ASCII
|
||||
--disable-stack-for-recursion disable use of stack recursion when matching
|
||||
|
||||
Optional Packages:
|
||||
--with-PACKAGE[=ARG] use PACKAGE [ARG=yes]
|
||||
--without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no)
|
||||
--with-posix-malloc-threshold=5 threshold for POSIX malloc usage
|
||||
--with-posix-malloc-threshold=10 threshold for POSIX malloc usage
|
||||
--with-link-size=2 internal link size (2, 3, or 4 allowed)
|
||||
--with-match-limit=10000000 default limit on internal looping)
|
||||
--with-match-limit=10000000 default limit on internal looping
|
||||
--with-match-limit-recursion=10000000 default limit on internal recursion
|
||||
|
||||
Some influential environment variables:
|
||||
CC C compiler command
|
||||
@@ -1307,18 +1310,13 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
ac_config_headers="$ac_config_headers config.h"
|
||||
|
||||
|
||||
|
||||
PCRE_MAJOR=6
|
||||
PCRE_MINOR=4
|
||||
PCRE_DATE=05-Sep-2005
|
||||
PCRE_VERSION=${PCRE_MAJOR}.${PCRE_MINOR}
|
||||
|
||||
|
||||
POSIX_MALLOC_THRESHOLD=-DPOSIX_MALLOC_THRESHOLD=10
|
||||
|
||||
|
||||
@@ -1326,6 +1324,12 @@ PCRE_LIB_VERSION=0:1:0
|
||||
PCRE_POSIXLIB_VERSION=0:0:0
|
||||
|
||||
|
||||
PCRE_MAJOR=`grep '#define PCRE_MAJOR' ${srcdir}/pcre.h | cut -c 29-`
|
||||
PCRE_MINOR=`grep '#define PCRE_MINOR' ${srcdir}/pcre.h | cut -c 29-`
|
||||
PCRE_PRERELEASE=`grep '#define PCRE_PRERELEASE' ${srcdir}/pcre.h | cut -c 29-`
|
||||
PCRE_VERSION=${PCRE_MAJOR}.${PCRE_MINOR}${PCRE_PRERELEASE}
|
||||
|
||||
|
||||
ac_ext=c
|
||||
ac_cpp='$CPP $CPPFLAGS'
|
||||
ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
|
||||
@@ -3447,6 +3451,16 @@ fi
|
||||
fi;
|
||||
|
||||
|
||||
# Check whether --enable-newline-is-crlf or --disable-newline-is-crlf was given.
|
||||
if test "${enable_newline_is_crlf+set}" = set; then
|
||||
enableval="$enable_newline_is_crlf"
|
||||
if test "$enableval" = "yes"; then
|
||||
NEWLINE=-DNEWLINE=3338
|
||||
fi
|
||||
|
||||
fi;
|
||||
|
||||
|
||||
# Check whether --enable-ebcdic or --disable-ebcdic was given.
|
||||
if test "${enable_ebcdic+set}" = set; then
|
||||
enableval="$enable_ebcdic"
|
||||
@@ -3495,6 +3509,15 @@ if test "${with_match_limit+set}" = set; then
|
||||
fi;
|
||||
|
||||
|
||||
|
||||
# Check whether --with-match-limit-recursion or --without-match-limit-recursion was given.
|
||||
if test "${with_match_limit_recursion+set}" = set; then
|
||||
withval="$with_match_limit_recursion"
|
||||
MATCH_LIMIT_RECURSION=-DMATCH_LIMIT_RECURSION=$withval
|
||||
|
||||
fi;
|
||||
|
||||
|
||||
if test "$UCP" != "" ; then
|
||||
UTF8=-DSUPPORT_UTF8
|
||||
fi
|
||||
@@ -3615,7 +3638,6 @@ esac
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
if test "x$enable_shared" = "xno" ; then
|
||||
@@ -3626,7 +3648,7 @@ _ACEOF
|
||||
|
||||
fi
|
||||
|
||||
ac_config_files="$ac_config_files Makefile pcre.h:pcre.h.in"
|
||||
ac_config_files="$ac_config_files Makefile"
|
||||
cat >confcache <<\_ACEOF
|
||||
# This file is a shell script that caches the results of configure
|
||||
# tests run on this system so they can be shared between configure
|
||||
@@ -4152,7 +4174,6 @@ do
|
||||
case "$ac_config_target" in
|
||||
# Handling of arguments.
|
||||
"Makefile" ) CONFIG_FILES="$CONFIG_FILES Makefile" ;;
|
||||
"pcre.h" ) CONFIG_FILES="$CONFIG_FILES pcre.h:pcre.h.in" ;;
|
||||
"config.h" ) CONFIG_HEADERS="$CONFIG_HEADERS config.h" ;;
|
||||
*) { { echo "$as_me:$LINENO: error: invalid argument: $ac_config_target" >&5
|
||||
echo "$as_me: error: invalid argument: $ac_config_target" >&2;}
|
||||
@@ -4263,19 +4284,18 @@ s,@BUILD_EXEEXT@,$BUILD_EXEEXT,;t t
|
||||
s,@BUILD_OBJEXT@,$BUILD_OBJEXT,;t t
|
||||
s,@CC_FOR_BUILD@,$CC_FOR_BUILD,;t t
|
||||
s,@CFLAGS_FOR_BUILD@,$CFLAGS_FOR_BUILD,;t t
|
||||
s,@CXXLDFLAGS@,$CXXLDFLAGS,;t t
|
||||
s,@EBCDIC@,$EBCDIC,;t t
|
||||
s,@HAVE_MEMMOVE@,$HAVE_MEMMOVE,;t t
|
||||
s,@HAVE_STRERROR@,$HAVE_STRERROR,;t t
|
||||
s,@LINK_SIZE@,$LINK_SIZE,;t t
|
||||
s,@MATCH_LIMIT@,$MATCH_LIMIT,;t t
|
||||
s,@MATCH_LIMIT_RECURSION@,$MATCH_LIMIT_RECURSION,;t t
|
||||
s,@NEWLINE@,$NEWLINE,;t t
|
||||
s,@NO_RECURSE@,$NO_RECURSE,;t t
|
||||
s,@PCRE_MAJOR@,$PCRE_MAJOR,;t t
|
||||
s,@PCRE_MINOR@,$PCRE_MINOR,;t t
|
||||
s,@PCRE_DATE@,$PCRE_DATE,;t t
|
||||
s,@PCRE_VERSION@,$PCRE_VERSION,;t t
|
||||
s,@PCRE_LIB_VERSION@,$PCRE_LIB_VERSION,;t t
|
||||
s,@PCRE_POSIXLIB_VERSION@,$PCRE_POSIXLIB_VERSION,;t t
|
||||
s,@PCRE_VERSION@,$PCRE_VERSION,;t t
|
||||
s,@POSIX_MALLOC_THRESHOLD@,$POSIX_MALLOC_THRESHOLD,;t t
|
||||
s,@UCP@,$UCP,;t t
|
||||
s,@UTF8@,$UTF8,;t t
|
||||
|
||||
@@ -13,27 +13,16 @@ dnl This is required at the start; the name is the name of a file
|
||||
dnl it should be seeing, to verify it is in the same directory.
|
||||
|
||||
AC_INIT(dftables.c)
|
||||
AC_CONFIG_SRCDIR([pcre.h])
|
||||
|
||||
dnl A safety precaution
|
||||
|
||||
AC_PREREQ(2.57)
|
||||
|
||||
dnl Arrange to build config.h from config.h.in. Note that pcre.h is
|
||||
dnl built differently, as it is just a "substitution" file.
|
||||
dnl Arrange to build config.h from config.h.in.
|
||||
dnl Manual says this macro should come right after AC_INIT.
|
||||
AC_CONFIG_HEADER(config.h)
|
||||
|
||||
dnl Provide the current PCRE version information. Do not use numbers
|
||||
dnl with leading zeros for the minor version, as they end up in a C
|
||||
dnl macro, and may be treated as octal constants. Stick to single
|
||||
dnl digits for minor numbers less than 10. There are unlikely to be
|
||||
dnl that many releases anyway.
|
||||
|
||||
PCRE_MAJOR=6
|
||||
PCRE_MINOR=4
|
||||
PCRE_DATE=05-Sep-2005
|
||||
PCRE_VERSION=${PCRE_MAJOR}.${PCRE_MINOR}
|
||||
|
||||
dnl Default values for miscellaneous macros
|
||||
|
||||
POSIX_MALLOC_THRESHOLD=-DPOSIX_MALLOC_THRESHOLD=10
|
||||
@@ -44,6 +33,14 @@ dnl are built by default on Unix systems.
|
||||
PCRE_LIB_VERSION=0:1:0
|
||||
PCRE_POSIXLIB_VERSION=0:0:0
|
||||
|
||||
dnl Find the PCRE version from the pcre.h file. The PCRE_VERSION variable is
|
||||
dnl substituted in pcre-config.in.
|
||||
|
||||
PCRE_MAJOR=`grep '#define PCRE_MAJOR' ${srcdir}/pcre.h | cut -c 29-`
|
||||
PCRE_MINOR=`grep '#define PCRE_MINOR' ${srcdir}/pcre.h | cut -c 29-`
|
||||
PCRE_PRERELEASE=`grep '#define PCRE_PRERELEASE' ${srcdir}/pcre.h | cut -c 29-`
|
||||
PCRE_VERSION=${PCRE_MAJOR}.${PCRE_MINOR}${PCRE_PRERELEASE}
|
||||
|
||||
dnl Checks for programs.
|
||||
|
||||
AC_PROG_CC
|
||||
@@ -120,6 +117,15 @@ if test "$enableval" = "yes"; then
|
||||
fi
|
||||
)
|
||||
|
||||
dnl Handle --enable-newline-is-crlf
|
||||
|
||||
AC_ARG_ENABLE(newline-is-crlf,
|
||||
[ --enable-newline-is-crlf use CRLF as the newline sequence],
|
||||
if test "$enableval" = "yes"; then
|
||||
NEWLINE=-DNEWLINE=3338
|
||||
fi
|
||||
)
|
||||
|
||||
dnl Handle --enable-ebcdic
|
||||
|
||||
AC_ARG_ENABLE(ebcdic,
|
||||
@@ -145,7 +151,7 @@ dnl I've done.
|
||||
dnl Handle --with-posix-malloc-threshold=n
|
||||
|
||||
AC_ARG_WITH(posix-malloc-threshold,
|
||||
[ --with-posix-malloc-threshold=5 threshold for POSIX malloc usage],
|
||||
[ --with-posix-malloc-threshold=10 threshold for POSIX malloc usage],
|
||||
POSIX_MALLOC_THRESHOLD=-DPOSIX_MALLOC_THRESHOLD=$withval
|
||||
)
|
||||
|
||||
@@ -156,13 +162,20 @@ AC_ARG_WITH(link-size,
|
||||
LINK_SIZE=-DLINK_SIZE=$withval
|
||||
)
|
||||
|
||||
dnl Handle --with-match_limit=n
|
||||
dnl Handle --with-match-limit=n
|
||||
|
||||
AC_ARG_WITH(match-limit,
|
||||
[ --with-match-limit=10000000 default limit on internal looping)],
|
||||
[ --with-match-limit=10000000 default limit on internal looping],
|
||||
MATCH_LIMIT=-DMATCH_LIMIT=$withval
|
||||
)
|
||||
|
||||
dnl Handle --with-match-limit_recursion=n
|
||||
|
||||
AC_ARG_WITH(match-limit-recursion,
|
||||
[ --with-match-limit-recursion=10000000 default limit on internal recursion],
|
||||
MATCH_LIMIT_RECURSION=-DMATCH_LIMIT_RECURSION=$withval
|
||||
)
|
||||
|
||||
dnl Unicode character property support implies UTF-8 support
|
||||
|
||||
if test "$UCP" != "" ; then
|
||||
@@ -187,19 +200,18 @@ AC_SUBST(BUILD_EXEEXT)
|
||||
AC_SUBST(BUILD_OBJEXT)
|
||||
AC_SUBST(CC_FOR_BUILD)
|
||||
AC_SUBST(CFLAGS_FOR_BUILD)
|
||||
AC_SUBST(CXXLDFLAGS)
|
||||
AC_SUBST(EBCDIC)
|
||||
AC_SUBST(HAVE_MEMMOVE)
|
||||
AC_SUBST(HAVE_STRERROR)
|
||||
AC_SUBST(LINK_SIZE)
|
||||
AC_SUBST(MATCH_LIMIT)
|
||||
AC_SUBST(MATCH_LIMIT_RECURSION)
|
||||
AC_SUBST(NEWLINE)
|
||||
AC_SUBST(NO_RECURSE)
|
||||
AC_SUBST(PCRE_MAJOR)
|
||||
AC_SUBST(PCRE_MINOR)
|
||||
AC_SUBST(PCRE_DATE)
|
||||
AC_SUBST(PCRE_VERSION)
|
||||
AC_SUBST(PCRE_LIB_VERSION)
|
||||
AC_SUBST(PCRE_POSIXLIB_VERSION)
|
||||
AC_SUBST(PCRE_VERSION)
|
||||
AC_SUBST(POSIX_MALLOC_THRESHOLD)
|
||||
AC_SUBST(UCP)
|
||||
AC_SUBST(UTF8)
|
||||
@@ -214,4 +226,4 @@ if test "x$enable_shared" = "xno" ; then
|
||||
fi
|
||||
|
||||
dnl This must be last; it determines what files are written as well as config.h
|
||||
AC_OUTPUT(Makefile pcre.h:pcre.h.in)
|
||||
AC_OUTPUT(Makefile )
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
||||
@@ -2,8 +2,8 @@
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* In its original form, this is the .in file that is transformed by
|
||||
"configure" into pcre.h.
|
||||
/* This is the public header file for the PCRE library, to be #included by
|
||||
applications that call the PCRE functions.
|
||||
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
|
||||
@@ -39,17 +39,40 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
#ifndef _PCRE_H
|
||||
#define _PCRE_H
|
||||
|
||||
/* The file pcre.h is build by "configure". Do not edit it; instead
|
||||
make changes to pcre.in. */
|
||||
/* The current PCRE version information. */
|
||||
|
||||
/* NOTES FOR FUTURE MAINTAINERS: Do not use numbers with leading zeros, because
|
||||
they may be treated as octal constants. The PCRE_PRERELEASE feature is for
|
||||
identifying release candidates. It might be defined as -RC2, for example. In
|
||||
real releases, it should be defined empty. Do not change the alignment of these
|
||||
statments. The code in ./configure greps out the version numbers by using "cut"
|
||||
to get values from column 29 onwards. These are substituted into pcre-config
|
||||
and libpcre.pc. The values are not put into configure.ac and substituted here
|
||||
(which would simplify this issue) because that makes life harder for those who
|
||||
cannot run ./configure. As it now stands, this file need not be edited in that
|
||||
circumstance. */
|
||||
|
||||
#define PCRE_MAJOR 6
|
||||
#define PCRE_MINOR 3
|
||||
#define PCRE_DATE 15-Aug-2005
|
||||
#define PCRE_MINOR 7
|
||||
#define PCRE_PRERELEASE
|
||||
#define PCRE_DATE 04-Jul-2006
|
||||
|
||||
/* Win32 uses DLL by default; it needs special stuff for exported functions. */
|
||||
/* Removed some defines here as I always compile staticly */
|
||||
/* Win32 uses DLL by default; it needs special stuff for exported functions
|
||||
when building PCRE. */
|
||||
|
||||
/* For other operating systems, we use the standard "extern". */
|
||||
#ifdef _WIN32
|
||||
# ifdef PCRE_DEFINITION
|
||||
# ifdef DLL_EXPORT
|
||||
# define PCRE_DATA_SCOPE __declspec(dllexport)
|
||||
# endif
|
||||
# else
|
||||
# ifndef PCRE_STATIC
|
||||
# define PCRE_DATA_SCOPE extern __declspec(dllimport)
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* Otherwise, we use the standard "extern". */
|
||||
|
||||
#ifndef PCRE_DATA_SCOPE
|
||||
# ifdef __cplusplus
|
||||
@@ -91,6 +114,10 @@ extern "C" {
|
||||
#define PCRE_DFA_SHORTEST 0x00010000
|
||||
#define PCRE_DFA_RESTART 0x00020000
|
||||
#define PCRE_FIRSTLINE 0x00040000
|
||||
#define PCRE_DUPNAMES 0x00080000
|
||||
#define PCRE_NEWLINE_CR 0x00100000
|
||||
#define PCRE_NEWLINE_LF 0x00200000
|
||||
#define PCRE_NEWLINE_CRLF 0x00300000
|
||||
|
||||
/* Exec-time and get/set-time error codes */
|
||||
|
||||
@@ -114,6 +141,7 @@ extern "C" {
|
||||
#define PCRE_ERROR_DFA_UMLIMIT (-18)
|
||||
#define PCRE_ERROR_DFA_WSSIZE (-19)
|
||||
#define PCRE_ERROR_DFA_RECURSE (-20)
|
||||
#define PCRE_ERROR_RECURSIONLIMIT (-21)
|
||||
|
||||
/* Request types for pcre_fullinfo() */
|
||||
|
||||
@@ -131,7 +159,8 @@ extern "C" {
|
||||
#define PCRE_INFO_STUDYSIZE 10
|
||||
#define PCRE_INFO_DEFAULT_TABLES 11
|
||||
|
||||
/* Request types for pcre_config() */
|
||||
/* Request types for pcre_config(). Do not re-arrange, in order to remain
|
||||
compatible. */
|
||||
|
||||
#define PCRE_CONFIG_UTF8 0
|
||||
#define PCRE_CONFIG_NEWLINE 1
|
||||
@@ -140,19 +169,30 @@ extern "C" {
|
||||
#define PCRE_CONFIG_MATCH_LIMIT 4
|
||||
#define PCRE_CONFIG_STACKRECURSE 5
|
||||
#define PCRE_CONFIG_UNICODE_PROPERTIES 6
|
||||
#define PCRE_CONFIG_MATCH_LIMIT_RECURSION 7
|
||||
|
||||
/* Bit flags for the pcre_extra structure */
|
||||
/* Bit flags for the pcre_extra structure. Do not re-arrange or redefine
|
||||
these bits, just add new ones on the end, in order to remain compatible. */
|
||||
|
||||
#define PCRE_EXTRA_STUDY_DATA 0x0001
|
||||
#define PCRE_EXTRA_MATCH_LIMIT 0x0002
|
||||
#define PCRE_EXTRA_CALLOUT_DATA 0x0004
|
||||
#define PCRE_EXTRA_TABLES 0x0008
|
||||
#define PCRE_EXTRA_MATCH_LIMIT_RECURSION 0x0010
|
||||
|
||||
/* Types */
|
||||
|
||||
struct real_pcre; /* declaration; the definition is private */
|
||||
typedef struct real_pcre pcre;
|
||||
|
||||
/* When PCRE is compiled as a C++ library, the subject pointer type can be
|
||||
replaced with a custom type. For conventional use, the public interface is a
|
||||
const char *. */
|
||||
|
||||
#ifndef PCRE_SPTR
|
||||
#define PCRE_SPTR const char *
|
||||
#endif
|
||||
|
||||
/* The structure for passing additional data to pcre_exec(). This is defined in
|
||||
such as way as to be extensible. Always add new fields at the end, in order to
|
||||
remain compatible. */
|
||||
@@ -163,6 +203,7 @@ typedef struct pcre_extra {
|
||||
unsigned long int match_limit; /* Maximum number of calls to match() */
|
||||
void *callout_data; /* Data passed back in callouts */
|
||||
const unsigned char *tables; /* Pointer to character tables */
|
||||
unsigned long int match_limit_recursion; /* Max recursive calls to match() */
|
||||
} pcre_extra;
|
||||
|
||||
/* The structure for passing out data via the pcre_callout_function. We use a
|
||||
@@ -175,7 +216,7 @@ typedef struct pcre_callout_block {
|
||||
/* ------------------------ Version 0 ------------------------------- */
|
||||
int callout_number; /* Number compiled into pattern */
|
||||
int *offset_vector; /* The offset vector */
|
||||
const char *subject; /* The subject being matched */
|
||||
PCRE_SPTR subject; /* The subject being matched */
|
||||
int subject_length; /* The length of the subject */
|
||||
int start_match; /* Offset to start of this match attempt */
|
||||
int current_position; /* Where we currently are in the subject */
|
||||
@@ -221,7 +262,7 @@ PCRE_DATA_SCOPE int pcre_copy_substring(const char *, int *, int, int, char *,
|
||||
int);
|
||||
PCRE_DATA_SCOPE int pcre_dfa_exec(const pcre *, const pcre_extra *,
|
||||
const char *, int, int, int, int *, int , int *, int);
|
||||
PCRE_DATA_SCOPE int pcre_exec(const pcre *, const pcre_extra *, const char *,
|
||||
PCRE_DATA_SCOPE int pcre_exec(const pcre *, const pcre_extra *, PCRE_SPTR,
|
||||
int, int, int, int *, int);
|
||||
PCRE_DATA_SCOPE void pcre_free_substring(const char *);
|
||||
PCRE_DATA_SCOPE void pcre_free_substring_list(const char **);
|
||||
@@ -230,6 +271,8 @@ PCRE_DATA_SCOPE int pcre_fullinfo(const pcre *, const pcre_extra *, int,
|
||||
PCRE_DATA_SCOPE int pcre_get_named_substring(const pcre *, const char *,
|
||||
int *, int, const char *, const char **);
|
||||
PCRE_DATA_SCOPE int pcre_get_stringnumber(const pcre *, const char *);
|
||||
PCRE_DATA_SCOPE int pcre_get_stringtable_entries(const pcre *, const char *,
|
||||
char **, char **);
|
||||
PCRE_DATA_SCOPE int pcre_get_substring(const char *, int *, int, int,
|
||||
const char **);
|
||||
PCRE_DATA_SCOPE int pcre_get_substring_list(const char *, int *, int,
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -58,7 +58,7 @@ Arguments:
|
||||
Returns: 0 if data returned, negative on error
|
||||
*/
|
||||
|
||||
PCRE_EXPORT int
|
||||
PCRE_DATA_SCOPE int
|
||||
pcre_config(int what, void *where)
|
||||
{
|
||||
switch (what)
|
||||
@@ -95,6 +95,10 @@ switch (what)
|
||||
*((unsigned int *)where) = MATCH_LIMIT;
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_MATCH_LIMIT_RECURSION:
|
||||
*((unsigned int *)where) = MATCH_LIMIT_RECURSION;
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_STACKRECURSE:
|
||||
#ifdef NO_RECURSE
|
||||
*((int *)where) = 0;
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -43,6 +43,7 @@ alternative matching function that uses a DFA algorithm. This is NOT Perl-
|
||||
compatible, but it has advantages in certain applications. */
|
||||
|
||||
|
||||
#define NLBLOCK md /* The block containing newline information */
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
@@ -288,7 +289,9 @@ const uschar *start_subject = md->start_subject;
|
||||
const uschar *end_subject = md->end_subject;
|
||||
const uschar *start_code = md->start_code;
|
||||
|
||||
#ifdef SUPPORT_UTF8
|
||||
BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;
|
||||
#endif
|
||||
|
||||
rlevel++;
|
||||
offsetcount &= (-2);
|
||||
@@ -421,7 +424,8 @@ ptr = current_subject;
|
||||
for (;;)
|
||||
{
|
||||
int i, j;
|
||||
int c, d, clen, dlen;
|
||||
int clen, dlen;
|
||||
unsigned int c, d;
|
||||
|
||||
/* Make the new state list into the active state list and empty the
|
||||
new state list. */
|
||||
@@ -480,7 +484,7 @@ for (;;)
|
||||
const uschar *code;
|
||||
int state_offset = current_state->offset;
|
||||
int count, codevalue;
|
||||
int chartype, othercase;
|
||||
int chartype, script;
|
||||
|
||||
#ifdef DEBUG
|
||||
printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
|
||||
@@ -645,7 +649,10 @@ for (;;)
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_CIRC:
|
||||
if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||
|
||||
((ims & PCRE_MULTILINE) != 0 && ptr[-1] == NEWLINE))
|
||||
((ims & PCRE_MULTILINE) != 0 &&
|
||||
ptr >= start_subject + md->nllen &&
|
||||
ptr != end_subject &&
|
||||
IS_NEWLINE(ptr - md->nllen)))
|
||||
{ ADD_ACTIVE(state_offset + 1, 0); }
|
||||
break;
|
||||
|
||||
@@ -679,13 +686,16 @@ for (;;)
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_ANY:
|
||||
if (clen > 0 && (c != NEWLINE || (ims & PCRE_DOTALL) != 0))
|
||||
if (clen > 0 && ((ims & PCRE_DOTALL) != 0 ||
|
||||
ptr > end_subject - md->nllen ||
|
||||
!IS_NEWLINE(ptr)))
|
||||
{ ADD_NEW(state_offset + 1, 0); }
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_EODN:
|
||||
if (clen == 0 || (c == NEWLINE && ptr + 1 == end_subject))
|
||||
if (clen == 0 ||
|
||||
(ptr == end_subject - md->nllen && IS_NEWLINE(ptr)))
|
||||
{ ADD_ACTIVE(state_offset + 1, 0); }
|
||||
break;
|
||||
|
||||
@@ -693,11 +703,14 @@ for (;;)
|
||||
case OP_DOLL:
|
||||
if ((md->moptions & PCRE_NOTEOL) == 0)
|
||||
{
|
||||
if (clen == 0 || (c == NEWLINE && (ptr + 1 == end_subject ||
|
||||
(ims & PCRE_MULTILINE) != 0)))
|
||||
if (clen == 0 ||
|
||||
(ptr <= end_subject - md->nllen && IS_NEWLINE(ptr) &&
|
||||
((ims & PCRE_MULTILINE) != 0 || ptr == end_subject - md->nllen)
|
||||
))
|
||||
{ ADD_ACTIVE(state_offset + 1, 0); }
|
||||
}
|
||||
else if (c == NEWLINE && (ims & PCRE_MULTILINE) != 0)
|
||||
else if ((ims & PCRE_MULTILINE) != 0 &&
|
||||
ptr <= end_subject - md->nllen && IS_NEWLINE(ptr))
|
||||
{ ADD_ACTIVE(state_offset + 1, 0); }
|
||||
break;
|
||||
|
||||
@@ -757,19 +770,38 @@ for (;;)
|
||||
case OP_NOTPROP:
|
||||
if (clen > 0)
|
||||
{
|
||||
int rqdtype, category;
|
||||
category = _pcre_ucp_findchar(c, &chartype, &othercase);
|
||||
rqdtype = code[1];
|
||||
if (rqdtype >= 128)
|
||||
BOOL OK;
|
||||
int category = _pcre_ucp_findprop(c, &chartype, &script);
|
||||
switch(code[1])
|
||||
{
|
||||
if ((rqdtype - 128 == category) == (codevalue == OP_PROP))
|
||||
{ ADD_NEW(state_offset + 2, 0); }
|
||||
}
|
||||
else
|
||||
{
|
||||
if ((rqdtype == chartype) == (codevalue == OP_PROP))
|
||||
{ ADD_NEW(state_offset + 2, 0); }
|
||||
case PT_ANY:
|
||||
OK = TRUE;
|
||||
break;
|
||||
|
||||
case PT_LAMP:
|
||||
OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
|
||||
break;
|
||||
|
||||
case PT_GC:
|
||||
OK = category == code[2];
|
||||
break;
|
||||
|
||||
case PT_PC:
|
||||
OK = chartype == code[2];
|
||||
break;
|
||||
|
||||
case PT_SC:
|
||||
OK = script == code[2];
|
||||
break;
|
||||
|
||||
/* Should never occur, but keep compilers from grumbling. */
|
||||
|
||||
default:
|
||||
OK = codevalue != OP_PROP;
|
||||
break;
|
||||
}
|
||||
|
||||
if (OK == (codevalue == OP_PROP)) { ADD_NEW(state_offset + 3, 0); }
|
||||
}
|
||||
break;
|
||||
#endif
|
||||
@@ -790,7 +822,11 @@ for (;;)
|
||||
{
|
||||
if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
|
||||
(c < 256 &&
|
||||
(d != OP_ANY || c != '\n' || (ims & PCRE_DOTALL) != 0) &&
|
||||
(d != OP_ANY ||
|
||||
(ims & PCRE_DOTALL) != 0 ||
|
||||
ptr > end_subject - md->nllen ||
|
||||
!IS_NEWLINE(ptr)
|
||||
) &&
|
||||
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
|
||||
{
|
||||
count++;
|
||||
@@ -807,7 +843,11 @@ for (;;)
|
||||
{
|
||||
if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
|
||||
(c < 256 &&
|
||||
(d != OP_ANY || c != '\n' || (ims & PCRE_DOTALL) != 0) &&
|
||||
(d != OP_ANY ||
|
||||
(ims & PCRE_DOTALL) != 0 ||
|
||||
ptr > end_subject - md->nllen ||
|
||||
!IS_NEWLINE(ptr)
|
||||
) &&
|
||||
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
|
||||
{
|
||||
ADD_NEW(state_offset + 2, 0);
|
||||
@@ -823,7 +863,11 @@ for (;;)
|
||||
{
|
||||
if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
|
||||
(c < 256 &&
|
||||
(d != OP_ANY || c != '\n' || (ims & PCRE_DOTALL) != 0) &&
|
||||
(d != OP_ANY ||
|
||||
(ims & PCRE_DOTALL) != 0 ||
|
||||
ptr > end_subject - md->nllen ||
|
||||
!IS_NEWLINE(ptr)
|
||||
) &&
|
||||
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
|
||||
{
|
||||
ADD_NEW(state_offset, 0);
|
||||
@@ -842,7 +886,11 @@ for (;;)
|
||||
{
|
||||
if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
|
||||
(c < 256 &&
|
||||
(d != OP_ANY || c != '\n' || (ims & PCRE_DOTALL) != 0) &&
|
||||
(d != OP_ANY ||
|
||||
(ims & PCRE_DOTALL) != 0 ||
|
||||
ptr > end_subject - md->nllen ||
|
||||
!IS_NEWLINE(ptr)
|
||||
) &&
|
||||
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
|
||||
{
|
||||
if (++count >= GET2(code, 1))
|
||||
@@ -862,14 +910,41 @@ for (;;)
|
||||
case OP_PROP_EXTRA + OP_TYPEPLUS:
|
||||
case OP_PROP_EXTRA + OP_TYPEMINPLUS:
|
||||
count = current_state->count; /* Already matched */
|
||||
if (count > 0) { ADD_ACTIVE(state_offset + 3, 0); }
|
||||
if (count > 0) { ADD_ACTIVE(state_offset + 4, 0); }
|
||||
if (clen > 0)
|
||||
{
|
||||
int category = _pcre_ucp_findchar(c, &chartype, &othercase);
|
||||
int rqdtype = code[2];
|
||||
if ((d == OP_PROP) ==
|
||||
(rqdtype == ((rqdtype >= 128)? (category + 128) : chartype)))
|
||||
{ count++; ADD_NEW(state_offset, count); }
|
||||
BOOL OK;
|
||||
int category = _pcre_ucp_findprop(c, &chartype, &script);
|
||||
switch(code[2])
|
||||
{
|
||||
case PT_ANY:
|
||||
OK = TRUE;
|
||||
break;
|
||||
|
||||
case PT_LAMP:
|
||||
OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
|
||||
break;
|
||||
|
||||
case PT_GC:
|
||||
OK = category == code[3];
|
||||
break;
|
||||
|
||||
case PT_PC:
|
||||
OK = chartype == code[3];
|
||||
break;
|
||||
|
||||
case PT_SC:
|
||||
OK = script == code[3];
|
||||
break;
|
||||
|
||||
/* Should never occur, but keep compilers from grumbling. */
|
||||
|
||||
default:
|
||||
OK = codevalue != OP_PROP;
|
||||
break;
|
||||
}
|
||||
|
||||
if (OK == (d == OP_PROP)) { count++; ADD_NEW(state_offset, count); }
|
||||
}
|
||||
break;
|
||||
|
||||
@@ -878,7 +953,7 @@ for (;;)
|
||||
case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:
|
||||
count = current_state->count; /* Already matched */
|
||||
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
|
||||
if (clen > 0 && _pcre_ucp_findchar(c, &chartype, &othercase) != ucp_M)
|
||||
if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
|
||||
{
|
||||
const uschar *nptr = ptr + clen;
|
||||
int ncount = 0;
|
||||
@@ -887,7 +962,7 @@ for (;;)
|
||||
int nd;
|
||||
int ndlen = 1;
|
||||
GETCHARLEN(nd, nptr, ndlen);
|
||||
if (_pcre_ucp_findchar(nd, &chartype, &othercase) != ucp_M) break;
|
||||
if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;
|
||||
ncount++;
|
||||
nptr += ndlen;
|
||||
}
|
||||
@@ -899,7 +974,7 @@ for (;;)
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_PROP_EXTRA + OP_TYPEQUERY:
|
||||
case OP_PROP_EXTRA + OP_TYPEMINQUERY:
|
||||
count = 3;
|
||||
count = 4;
|
||||
goto QS1;
|
||||
|
||||
case OP_PROP_EXTRA + OP_TYPESTAR:
|
||||
@@ -908,14 +983,41 @@ for (;;)
|
||||
|
||||
QS1:
|
||||
|
||||
ADD_ACTIVE(state_offset + 3, 0);
|
||||
ADD_ACTIVE(state_offset + 4, 0);
|
||||
if (clen > 0)
|
||||
{
|
||||
int category = _pcre_ucp_findchar(c, &chartype, &othercase);
|
||||
int rqdtype = code[2];
|
||||
if ((d == OP_PROP) ==
|
||||
(rqdtype == ((rqdtype >= 128)? (category + 128) : chartype)))
|
||||
{ ADD_NEW(state_offset + count, 0); }
|
||||
BOOL OK;
|
||||
int category = _pcre_ucp_findprop(c, &chartype, &script);
|
||||
switch(code[2])
|
||||
{
|
||||
case PT_ANY:
|
||||
OK = TRUE;
|
||||
break;
|
||||
|
||||
case PT_LAMP:
|
||||
OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
|
||||
break;
|
||||
|
||||
case PT_GC:
|
||||
OK = category == code[3];
|
||||
break;
|
||||
|
||||
case PT_PC:
|
||||
OK = chartype == code[3];
|
||||
break;
|
||||
|
||||
case PT_SC:
|
||||
OK = script == code[3];
|
||||
break;
|
||||
|
||||
/* Should never occur, but keep compilers from grumbling. */
|
||||
|
||||
default:
|
||||
OK = codevalue != OP_PROP;
|
||||
break;
|
||||
}
|
||||
|
||||
if (OK == (d == OP_PROP)) { ADD_NEW(state_offset + count, 0); }
|
||||
}
|
||||
break;
|
||||
|
||||
@@ -932,7 +1034,7 @@ for (;;)
|
||||
QS2:
|
||||
|
||||
ADD_ACTIVE(state_offset + 2, 0);
|
||||
if (clen > 0 && _pcre_ucp_findchar(c, &chartype, &othercase) != ucp_M)
|
||||
if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
|
||||
{
|
||||
const uschar *nptr = ptr + clen;
|
||||
int ncount = 0;
|
||||
@@ -941,7 +1043,7 @@ for (;;)
|
||||
int nd;
|
||||
int ndlen = 1;
|
||||
GETCHARLEN(nd, nptr, ndlen);
|
||||
if (_pcre_ucp_findchar(nd, &chartype, &othercase) != ucp_M) break;
|
||||
if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;
|
||||
ncount++;
|
||||
nptr += ndlen;
|
||||
}
|
||||
@@ -954,17 +1056,45 @@ for (;;)
|
||||
case OP_PROP_EXTRA + OP_TYPEUPTO:
|
||||
case OP_PROP_EXTRA + OP_TYPEMINUPTO:
|
||||
if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
|
||||
{ ADD_ACTIVE(state_offset + 5, 0); }
|
||||
{ ADD_ACTIVE(state_offset + 6, 0); }
|
||||
count = current_state->count; /* Number already matched */
|
||||
if (clen > 0)
|
||||
{
|
||||
int category = _pcre_ucp_findchar(c, &chartype, &othercase);
|
||||
int rqdtype = code[4];
|
||||
if ((d == OP_PROP) ==
|
||||
(rqdtype == ((rqdtype >= 128)? (category + 128) : chartype)))
|
||||
BOOL OK;
|
||||
int category = _pcre_ucp_findprop(c, &chartype, &script);
|
||||
switch(code[4])
|
||||
{
|
||||
case PT_ANY:
|
||||
OK = TRUE;
|
||||
break;
|
||||
|
||||
case PT_LAMP:
|
||||
OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
|
||||
break;
|
||||
|
||||
case PT_GC:
|
||||
OK = category == code[5];
|
||||
break;
|
||||
|
||||
case PT_PC:
|
||||
OK = chartype == code[5];
|
||||
break;
|
||||
|
||||
case PT_SC:
|
||||
OK = script == code[5];
|
||||
break;
|
||||
|
||||
/* Should never occur, but keep compilers from grumbling. */
|
||||
|
||||
default:
|
||||
OK = codevalue != OP_PROP;
|
||||
break;
|
||||
}
|
||||
|
||||
if (OK == (d == OP_PROP))
|
||||
{
|
||||
if (++count >= GET2(code, 1))
|
||||
{ ADD_NEW(state_offset + 5, 0); }
|
||||
{ ADD_NEW(state_offset + 6, 0); }
|
||||
else
|
||||
{ ADD_NEW(state_offset, count); }
|
||||
}
|
||||
@@ -978,7 +1108,7 @@ for (;;)
|
||||
if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
|
||||
{ ADD_ACTIVE(state_offset + 4, 0); }
|
||||
count = current_state->count; /* Number already matched */
|
||||
if (clen > 0 && _pcre_ucp_findchar(c, &chartype, &othercase) != ucp_M)
|
||||
if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
|
||||
{
|
||||
const uschar *nptr = ptr + clen;
|
||||
int ncount = 0;
|
||||
@@ -987,7 +1117,7 @@ for (;;)
|
||||
int nd;
|
||||
int ndlen = 1;
|
||||
GETCHARLEN(nd, nptr, ndlen);
|
||||
if (_pcre_ucp_findchar(nd, &chartype, &othercase) != ucp_M) break;
|
||||
if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;
|
||||
ncount++;
|
||||
nptr += ndlen;
|
||||
}
|
||||
@@ -1018,17 +1148,17 @@ for (;;)
|
||||
{
|
||||
if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
|
||||
{
|
||||
int othercase;
|
||||
if (c < 128) othercase = fcc[c]; else
|
||||
|
||||
/* If we have Unicode property support, we can use it to test the
|
||||
other case of the character, if there is one. The result of
|
||||
_pcre_ucp_findchar() is < 0 if the char isn't found, and othercase is
|
||||
returned as zero if there isn't another case. */
|
||||
other case of the character. */
|
||||
|
||||
#ifdef SUPPORT_UCP
|
||||
if (_pcre_ucp_findchar(c, &chartype, &othercase) < 0)
|
||||
#endif
|
||||
othercase = _pcre_ucp_othercase(c);
|
||||
#else
|
||||
othercase = -1;
|
||||
#endif
|
||||
|
||||
if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
|
||||
}
|
||||
@@ -1050,7 +1180,7 @@ for (;;)
|
||||
to wait for them to pass before continuing. */
|
||||
|
||||
case OP_EXTUNI:
|
||||
if (clen > 0 && _pcre_ucp_findchar(c, &chartype, &othercase) != ucp_M)
|
||||
if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
|
||||
{
|
||||
const uschar *nptr = ptr + clen;
|
||||
int ncount = 0;
|
||||
@@ -1058,7 +1188,7 @@ for (;;)
|
||||
{
|
||||
int nclen = 1;
|
||||
GETCHARLEN(c, nptr, nclen);
|
||||
if (_pcre_ucp_findchar(c, &chartype, &othercase) != ucp_M) break;
|
||||
if (_pcre_ucp_findprop(c, &chartype, &script) != ucp_M) break;
|
||||
ncount++;
|
||||
nptr += nclen;
|
||||
}
|
||||
@@ -1093,10 +1223,10 @@ for (;;)
|
||||
if ((ims & PCRE_CASELESS) != 0)
|
||||
{
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8 && c >= 128)
|
||||
if (utf8 && d >= 128)
|
||||
{
|
||||
#ifdef SUPPORT_UCP
|
||||
if (_pcre_ucp_findchar(d, &chartype, &otherd) < 0) otherd = -1;
|
||||
otherd = _pcre_ucp_othercase(d);
|
||||
#endif /* SUPPORT_UCP */
|
||||
}
|
||||
else
|
||||
@@ -1117,13 +1247,13 @@ for (;;)
|
||||
if (clen > 0)
|
||||
{
|
||||
int otherd = -1;
|
||||
if ((ims && PCRE_CASELESS) != 0)
|
||||
if ((ims & PCRE_CASELESS) != 0)
|
||||
{
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8 && c >= 128)
|
||||
if (utf8 && d >= 128)
|
||||
{
|
||||
#ifdef SUPPORT_UCP
|
||||
if (_pcre_ucp_findchar(c, &chartype, &otherd) < 0) otherd = -1;
|
||||
otherd = _pcre_ucp_othercase(d);
|
||||
#endif /* SUPPORT_UCP */
|
||||
}
|
||||
else
|
||||
@@ -1144,13 +1274,13 @@ for (;;)
|
||||
if (clen > 0)
|
||||
{
|
||||
int otherd = -1;
|
||||
if ((ims && PCRE_CASELESS) != 0)
|
||||
if ((ims & PCRE_CASELESS) != 0)
|
||||
{
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8 && c >= 128)
|
||||
if (utf8 && d >= 128)
|
||||
{
|
||||
#ifdef SUPPORT_UCP
|
||||
if (_pcre_ucp_findchar(c, &chartype, &otherd) < 0) otherd = -1;
|
||||
otherd = _pcre_ucp_othercase(d);
|
||||
#endif /* SUPPORT_UCP */
|
||||
}
|
||||
else
|
||||
@@ -1178,10 +1308,10 @@ for (;;)
|
||||
if ((ims & PCRE_CASELESS) != 0)
|
||||
{
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8 && c >= 128)
|
||||
if (utf8 && d >= 128)
|
||||
{
|
||||
#ifdef SUPPORT_UCP
|
||||
if (_pcre_ucp_findchar(d, &chartype, &otherd) < 0) otherd = -1;
|
||||
otherd = _pcre_ucp_othercase(d);
|
||||
#endif /* SUPPORT_UCP */
|
||||
}
|
||||
else
|
||||
@@ -1267,7 +1397,8 @@ for (;;)
|
||||
{ ADD_ACTIVE(next_state_offset + 5, 0); }
|
||||
if (isinclass)
|
||||
{
|
||||
if (++count >= GET2(ecode, 3))
|
||||
int max = GET2(ecode, 3);
|
||||
if (++count >= max && max != 0) /* Max 0 => no limit */
|
||||
{ ADD_NEW(next_state_offset + 5, 0); }
|
||||
else
|
||||
{ ADD_NEW(state_offset, count); }
|
||||
@@ -1519,7 +1650,7 @@ for (;;)
|
||||
cb.version = 1; /* Version 1 of the callout block */
|
||||
cb.callout_number = code[1];
|
||||
cb.offset_vector = offsets;
|
||||
cb.subject = (char *)start_subject;
|
||||
cb.subject = (PCRE_SPTR)start_subject;
|
||||
cb.subject_length = end_subject - start_subject;
|
||||
cb.start_match = current_subject - start_subject;
|
||||
cb.current_position = ptr - start_subject;
|
||||
@@ -1567,7 +1698,7 @@ for (;;)
|
||||
DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
|
||||
"%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count,
|
||||
rlevel*2-2, SP));
|
||||
return match_count;
|
||||
break; /* In effect, "return", but see the comment below */
|
||||
}
|
||||
|
||||
/* One or more states are active for the next character. */
|
||||
@@ -1575,11 +1706,13 @@ for (;;)
|
||||
ptr += clen; /* Advance to next subject character */
|
||||
} /* Loop to move along the subject string */
|
||||
|
||||
/* Control never gets here, but we must keep the compiler happy. */
|
||||
/* Control gets here from "break" a few lines above. We do it this way because
|
||||
if we use "return" above, we have compiler trouble. Some compilers warn if
|
||||
there's nothing here because they think the function doesn't return a value. On
|
||||
the other hand, if we put a dummy statement here, some more clever compilers
|
||||
complain that it can't be reached. Sigh. */
|
||||
|
||||
DPRINTF(("%.*s+++ Unexpected end of internal_dfa_exec %d +++\n"
|
||||
"%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, rlevel*2-2, SP));
|
||||
return PCRE_ERROR_NOMATCH;
|
||||
return match_count;
|
||||
}
|
||||
|
||||
|
||||
@@ -1611,13 +1744,14 @@ Returns: > 0 => number of match offset pairs placed in offsets
|
||||
< -1 => some kind of unexpected problem
|
||||
*/
|
||||
|
||||
PCRE_EXPORT int
|
||||
PCRE_DATA_SCOPE int
|
||||
pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
|
||||
const char *subject, int length, int start_offset, int options, int *offsets,
|
||||
int offsetcount, int *workspace, int wscount)
|
||||
{
|
||||
real_pcre *re = (real_pcre *)argument_re;
|
||||
dfa_match_data match_block;
|
||||
dfa_match_data *md = &match_block;
|
||||
BOOL utf8, anchored, startline, firstline;
|
||||
const uschar *current_subject, *end_subject, *lcc;
|
||||
|
||||
@@ -1632,6 +1766,7 @@ BOOL req_byte_caseless = FALSE;
|
||||
int first_byte = -1;
|
||||
int req_byte = -1;
|
||||
int req_byte2 = -1;
|
||||
int newline;
|
||||
|
||||
/* Plausibility checks */
|
||||
|
||||
@@ -1646,8 +1781,8 @@ flipping, so we scan the extra_data block first. This may set two fields in the
|
||||
match block, so we must initialize them beforehand. However, the other fields
|
||||
in the match block must not be set until after the byte flipping. */
|
||||
|
||||
match_block.tables = re->tables;
|
||||
match_block.callout_data = NULL;
|
||||
md->tables = re->tables;
|
||||
md->callout_data = NULL;
|
||||
|
||||
if (extra_data != NULL)
|
||||
{
|
||||
@@ -1655,10 +1790,12 @@ if (extra_data != NULL)
|
||||
if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
|
||||
study = (const pcre_study_data *)extra_data->study_data;
|
||||
if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0) return PCRE_ERROR_DFA_UMLIMIT;
|
||||
if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
|
||||
return PCRE_ERROR_DFA_UMLIMIT;
|
||||
if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
|
||||
match_block.callout_data = extra_data->callout_data;
|
||||
md->callout_data = extra_data->callout_data;
|
||||
if ((flags & PCRE_EXTRA_TABLES) != 0)
|
||||
match_block.tables = extra_data->tables;
|
||||
md->tables = extra_data->tables;
|
||||
}
|
||||
|
||||
/* Check that the first field in the block is the magic number. If it is not,
|
||||
@@ -1679,17 +1816,48 @@ current_subject = (const unsigned char *)subject + start_offset;
|
||||
end_subject = (const unsigned char *)subject + length;
|
||||
req_byte_ptr = current_subject - 1;
|
||||
|
||||
#ifdef SUPPORT_UTF8
|
||||
utf8 = (re->options & PCRE_UTF8) != 0;
|
||||
anchored = (options & PCRE_ANCHORED) != 0 || (re->options & PCRE_ANCHORED) != 0;
|
||||
#else
|
||||
utf8 = FALSE;
|
||||
#endif
|
||||
|
||||
anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||
|
||||
(re->options & PCRE_ANCHORED) != 0;
|
||||
|
||||
/* The remaining fixed data for passing around. */
|
||||
|
||||
match_block.start_code = (const uschar *)argument_re +
|
||||
md->start_code = (const uschar *)argument_re +
|
||||
re->name_table_offset + re->name_count * re->name_entry_size;
|
||||
match_block.start_subject = (const unsigned char *)subject;
|
||||
match_block.end_subject = end_subject;
|
||||
match_block.moptions = options;
|
||||
match_block.poptions = re->options;
|
||||
md->start_subject = (const unsigned char *)subject;
|
||||
md->end_subject = end_subject;
|
||||
md->moptions = options;
|
||||
md->poptions = re->options;
|
||||
|
||||
/* Handle different types of newline. The two bits give four cases. If nothing
|
||||
is set at run time, whatever was used at compile time applies. */
|
||||
|
||||
switch ((((options & PCRE_NEWLINE_CRLF) == 0)? re->options : options) &
|
||||
PCRE_NEWLINE_CRLF)
|
||||
{
|
||||
default: newline = NEWLINE; break; /* Compile-time default */
|
||||
case PCRE_NEWLINE_CR: newline = '\r'; break;
|
||||
case PCRE_NEWLINE_LF: newline = '\n'; break;
|
||||
case PCRE_NEWLINE_CR+
|
||||
PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
|
||||
}
|
||||
|
||||
if (newline > 255)
|
||||
{
|
||||
md->nllen = 2;
|
||||
md->nl[0] = (newline >> 8) & 255;
|
||||
md->nl[1] = newline & 255;
|
||||
}
|
||||
else
|
||||
{
|
||||
md->nllen = 1;
|
||||
md->nl[0] = newline;
|
||||
}
|
||||
|
||||
/* Check a UTF-8 string if required. Unfortunately there's no way of passing
|
||||
back the character offset. */
|
||||
@@ -1715,12 +1883,12 @@ if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
|
||||
is a feature that makes it possible to save compiled regex and re-use them
|
||||
in other programs later. */
|
||||
|
||||
if (match_block.tables == NULL) match_block.tables = _pcre_default_tables;
|
||||
if (md->tables == NULL) md->tables = _pcre_default_tables;
|
||||
|
||||
/* The lower casing table and the "must be at the start of a line" flag are
|
||||
used in a loop when finding where to start. */
|
||||
|
||||
lcc = match_block.tables + lcc_offset;
|
||||
lcc = md->tables + lcc_offset;
|
||||
startline = (re->options & PCRE_STARTLINE) != 0;
|
||||
firstline = (re->options & PCRE_FIRSTLINE) != 0;
|
||||
|
||||
@@ -1753,7 +1921,7 @@ if ((re->options & PCRE_REQCHSET) != 0)
|
||||
{
|
||||
req_byte = re->req_byte & 255;
|
||||
req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
|
||||
req_byte2 = (match_block.tables + fcc_offset)[req_byte]; /* case flipped */
|
||||
req_byte2 = (md->tables + fcc_offset)[req_byte]; /* case flipped */
|
||||
}
|
||||
|
||||
/* Call the main matching function, looping for a non-anchored regex after a
|
||||
@@ -1771,14 +1939,14 @@ for (;;)
|
||||
|
||||
/* Advance to a unique first char if possible. If firstline is TRUE, the
|
||||
start of the match is constrained to the first line of a multiline string.
|
||||
Implement this by temporarily adjusting end_subject so that we stop scanning
|
||||
at a newline. If the match fails at the newline, later code breaks this loop.
|
||||
*/
|
||||
Implement this by temporarily adjusting end_subject so that we stop
|
||||
scanning at a newline. If the match fails at the newline, later code breaks
|
||||
this loop. */
|
||||
|
||||
if (firstline)
|
||||
{
|
||||
const uschar *t = current_subject;
|
||||
while (t < save_end_subject && *t != '\n') t++;
|
||||
while (t <= save_end_subject - md->nllen && !IS_NEWLINE(t)) t++;
|
||||
end_subject = t;
|
||||
}
|
||||
|
||||
@@ -1793,13 +1961,15 @@ for (;;)
|
||||
current_subject++;
|
||||
}
|
||||
|
||||
/* Or to just after \n for a multiline match if possible */
|
||||
/* Or to just after a linebreak for a multiline match if possible */
|
||||
|
||||
else if (startline)
|
||||
{
|
||||
if (current_subject > match_block.start_subject + start_offset)
|
||||
if (current_subject > md->start_subject + md->nllen +
|
||||
start_offset)
|
||||
{
|
||||
while (current_subject < end_subject && current_subject[-1] != NEWLINE)
|
||||
while (current_subject <= end_subject &&
|
||||
!IS_NEWLINE(current_subject - md->nllen))
|
||||
current_subject++;
|
||||
}
|
||||
}
|
||||
@@ -1880,8 +2050,8 @@ for (;;)
|
||||
/* OK, now we can do the business */
|
||||
|
||||
rc = internal_dfa_exec(
|
||||
&match_block, /* fixed match data */
|
||||
match_block.start_code, /* this subexpression's code */
|
||||
md, /* fixed match data */
|
||||
md->start_code, /* this subexpression's code */
|
||||
current_subject, /* where we currently are */
|
||||
start_offset, /* start offset in subject */
|
||||
offsets, /* offset vector */
|
||||
@@ -1900,17 +2070,15 @@ for (;;)
|
||||
/* Advance to the next subject character unless we are at the end of a line
|
||||
and firstline is set. */
|
||||
|
||||
if (firstline && *current_subject == NEWLINE) break;
|
||||
if (firstline &&
|
||||
current_subject <= end_subject - md->nllen &&
|
||||
IS_NEWLINE(current_subject)) break;
|
||||
current_subject++;
|
||||
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8)
|
||||
{
|
||||
while (current_subject < end_subject && (*current_subject & 0xc0) == 0x80)
|
||||
current_subject++;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (current_subject > end_subject) break;
|
||||
}
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -61,7 +61,7 @@ Arguments:
|
||||
Returns: 0 if data returned, negative on error
|
||||
*/
|
||||
|
||||
PCRE_EXPORT int
|
||||
PCRE_DATA_SCOPE int
|
||||
pcre_fullinfo(const pcre *argument_re, const pcre_extra *extra_data, int what,
|
||||
void *where)
|
||||
{
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -50,8 +50,8 @@ for these functions came from Scott Wimer. */
|
||||
* Find number for named string *
|
||||
*************************************************/
|
||||
|
||||
/* This function is used by the two extraction functions below, as well
|
||||
as being generally available.
|
||||
/* This function is used by the get_first_set() function below, as well
|
||||
as being generally available. It assumes that names are unique.
|
||||
|
||||
Arguments:
|
||||
code the compiled regex
|
||||
@@ -93,6 +93,113 @@ return PCRE_ERROR_NOSUBSTRING;
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find (multiple) entries for named string *
|
||||
*************************************************/
|
||||
|
||||
/* This is used by the get_first_set() function below, as well as being
|
||||
generally available. It is used when duplicated names are permitted.
|
||||
|
||||
Arguments:
|
||||
code the compiled regex
|
||||
stringname the name whose entries required
|
||||
firstptr where to put the pointer to the first entry
|
||||
lastptr where to put the pointer to the last entry
|
||||
|
||||
Returns: the length of each entry, or a negative number
|
||||
(PCRE_ERROR_NOSUBSTRING) if not found
|
||||
*/
|
||||
|
||||
int
|
||||
pcre_get_stringtable_entries(const pcre *code, const char *stringname,
|
||||
char **firstptr, char **lastptr)
|
||||
{
|
||||
int rc;
|
||||
int entrysize;
|
||||
int top, bot;
|
||||
uschar *nametable, *lastentry;
|
||||
|
||||
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
|
||||
return rc;
|
||||
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
|
||||
|
||||
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
|
||||
return rc;
|
||||
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
|
||||
return rc;
|
||||
|
||||
lastentry = nametable + entrysize * (top - 1);
|
||||
bot = 0;
|
||||
while (top > bot)
|
||||
{
|
||||
int mid = (top + bot) / 2;
|
||||
uschar *entry = nametable + entrysize*mid;
|
||||
int c = strcmp(stringname, (char *)(entry + 2));
|
||||
if (c == 0)
|
||||
{
|
||||
uschar *first = entry;
|
||||
uschar *last = entry;
|
||||
while (first > nametable)
|
||||
{
|
||||
if (strcmp(stringname, (char *)(first - entrysize + 2)) != 0) break;
|
||||
first -= entrysize;
|
||||
}
|
||||
while (last < lastentry)
|
||||
{
|
||||
if (strcmp(stringname, (char *)(last + entrysize + 2)) != 0) break;
|
||||
last += entrysize;
|
||||
}
|
||||
*firstptr = (char *)first;
|
||||
*lastptr = (char *)last;
|
||||
return entrysize;
|
||||
}
|
||||
if (c > 0) bot = mid + 1; else top = mid;
|
||||
}
|
||||
|
||||
return PCRE_ERROR_NOSUBSTRING;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find first set of multiple named strings *
|
||||
*************************************************/
|
||||
|
||||
/* This function allows for duplicate names in the table of named substrings.
|
||||
It returns the number of the first one that was set in a pattern match.
|
||||
|
||||
Arguments:
|
||||
code the compiled regex
|
||||
stringname the name of the capturing substring
|
||||
ovector the vector of matched substrings
|
||||
|
||||
Returns: the number of the first that is set,
|
||||
or the number of the last one if none are set,
|
||||
or a negative number on error
|
||||
*/
|
||||
|
||||
static int
|
||||
get_first_set(const pcre *code, const char *stringname, int *ovector)
|
||||
{
|
||||
const real_pcre *re = (const real_pcre *)code;
|
||||
int entrysize;
|
||||
char *first, *last;
|
||||
uschar *entry;
|
||||
if ((re->options & (PCRE_DUPNAMES | PCRE_JCHANGED)) == 0)
|
||||
return pcre_get_stringnumber(code, stringname);
|
||||
entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
|
||||
if (entrysize <= 0) return entrysize;
|
||||
for (entry = (uschar *)first; entry <= (uschar *)last; entry += entrysize)
|
||||
{
|
||||
int n = (entry[0] << 8) + entry[1];
|
||||
if (ovector[n*2] >= 0) return n;
|
||||
}
|
||||
return (first[0] << 8) + first[1];
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Copy captured string to given buffer *
|
||||
*************************************************/
|
||||
@@ -142,7 +249,8 @@ return yield;
|
||||
*************************************************/
|
||||
|
||||
/* This function copies a single captured substring into a given buffer,
|
||||
identifying it by name.
|
||||
identifying it by name. If the regex permits duplicate names, the first
|
||||
substring that is set is chosen.
|
||||
|
||||
Arguments:
|
||||
code the compiled regex
|
||||
@@ -168,7 +276,7 @@ int
|
||||
pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
|
||||
int stringcount, const char *stringname, char *buffer, int size)
|
||||
{
|
||||
int n = pcre_get_stringnumber(code, stringname);
|
||||
int n = get_first_set(code, stringname, ovector);
|
||||
if (n <= 0) return n;
|
||||
return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
|
||||
}
|
||||
@@ -299,7 +407,8 @@ return yield;
|
||||
*************************************************/
|
||||
|
||||
/* This function copies a single captured substring, identified by name, into
|
||||
new store.
|
||||
new store. If the regex permits duplicate names, the first substring that is
|
||||
set is chosen.
|
||||
|
||||
Arguments:
|
||||
code the compiled regex
|
||||
@@ -324,9 +433,10 @@ int
|
||||
pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
|
||||
int stringcount, const char *stringname, const char **stringptr)
|
||||
{
|
||||
int n = pcre_get_stringnumber(code, stringname);
|
||||
int n = get_first_set(code, stringname, ovector);
|
||||
if (n <= 0) return n;
|
||||
return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -68,7 +68,7 @@ Returns: number of capturing subpatterns
|
||||
or negative values on error
|
||||
*/
|
||||
|
||||
PCRE_EXPORT int
|
||||
PCRE_DATA_SCOPE int
|
||||
pcre_info(const pcre *argument_re, int *optptr, int *first_byte)
|
||||
{
|
||||
real_pcre internal_re;
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -121,6 +121,32 @@ Unix, where it is defined in sys/types, so use "uschar" instead. */
|
||||
|
||||
typedef unsigned char uschar;
|
||||
|
||||
/* PCRE is able to support 3 different kinds of newline (CR, LF, CRLF). The
|
||||
following macro is used to package up testing for newlines. NLBLOCK is defined
|
||||
in the various modules to indicate in which datablock the parameters exist. */
|
||||
|
||||
#define IS_NEWLINE(p) \
|
||||
((p)[0] == NLBLOCK->nl[0] && \
|
||||
(NLBLOCK->nllen == 1 || (p)[1] == NLBLOCK->nl[1]))
|
||||
|
||||
/* When PCRE is compiled as a C++ library, the subject pointer can be
|
||||
replaced with a custom type. This makes it possible, for example, to
|
||||
allow pcre_exec() to process subject strings that are discontinuous by
|
||||
using a smart pointer class. It must always be possible to inspect all
|
||||
of the subject string in pcre_exec() because of the way it
|
||||
backtracks. Two macros are required in the normal case, for
|
||||
sign-unspecified and unsigned char pointers. The former is used for
|
||||
the external interface and appears in pcre.h, which is why its name
|
||||
must begin with PCRE_. */
|
||||
|
||||
#ifdef CUSTOM_SUBJECT_PTR
|
||||
#define PCRE_SPTR CUSTOM_SUBJECT_PTR
|
||||
#define USPTR CUSTOM_SUBJECT_PTR
|
||||
#else
|
||||
#define PCRE_SPTR const char *
|
||||
#define USPTR const unsigned char *
|
||||
#endif
|
||||
|
||||
/* Include the public PCRE header and the definitions of UCP character
|
||||
property values. */
|
||||
|
||||
@@ -156,13 +182,14 @@ case in PCRE. */
|
||||
#if HAVE_BCOPY
|
||||
#define memmove(a, b, c) bcopy(b, a, c)
|
||||
#else /* HAVE_BCOPY */
|
||||
void *
|
||||
static void *
|
||||
pcre_memmove(unsigned char *dest, const unsigned char *src, size_t n)
|
||||
{
|
||||
int i;
|
||||
size_t i;
|
||||
dest += n;
|
||||
src += n;
|
||||
for (i = 0; i < n; ++i) *(--dest) = *(--src);
|
||||
return dest;
|
||||
}
|
||||
#define memmove(a, b, c) pcre_memmove(a, b, c)
|
||||
#endif /* not HAVE_BCOPY */
|
||||
@@ -368,16 +395,17 @@ Standard C system should have one. */
|
||||
|
||||
#define PCRE_IMS (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL)
|
||||
|
||||
/* Private options flags start at the most significant end of the four bytes,
|
||||
but skip the top bit so we can use ints for convenience without getting tangled
|
||||
with negative values. The public options defined in pcre.h start at the least
|
||||
significant end. Make sure they don't overlap! */
|
||||
/* Private options flags start at the most significant end of the four bytes.
|
||||
The public options defined in pcre.h start at the least significant end. Make
|
||||
sure they don't overlap! The bits are getting a bit scarce now -- when we run
|
||||
out, there is a dummy word in the structure that could be used for the private
|
||||
bits. */
|
||||
|
||||
#define PCRE_NOPARTIAL 0x80000000 /* can't use partial with this regex */
|
||||
#define PCRE_FIRSTSET 0x40000000 /* first_byte is set */
|
||||
#define PCRE_REQCHSET 0x20000000 /* req_byte is set */
|
||||
#define PCRE_STARTLINE 0x10000000 /* start after \n for multiline */
|
||||
#define PCRE_ICHANGED 0x08000000 /* i option changes within regex */
|
||||
#define PCRE_NOPARTIAL 0x04000000 /* can't use partial with this regex */
|
||||
#define PCRE_JCHANGED 0x08000000 /* j option changes within regex */
|
||||
|
||||
/* Options for the "extra" block produced by pcre_study(). */
|
||||
|
||||
@@ -389,15 +417,17 @@ time, run time, or study time, respectively. */
|
||||
#define PUBLIC_OPTIONS \
|
||||
(PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \
|
||||
PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \
|
||||
PCRE_NO_AUTO_CAPTURE|PCRE_NO_UTF8_CHECK|PCRE_AUTO_CALLOUT|PCRE_FIRSTLINE)
|
||||
PCRE_NO_AUTO_CAPTURE|PCRE_NO_UTF8_CHECK|PCRE_AUTO_CALLOUT|PCRE_FIRSTLINE| \
|
||||
PCRE_DUPNAMES|PCRE_NEWLINE_CR|PCRE_NEWLINE_LF)
|
||||
|
||||
#define PUBLIC_EXEC_OPTIONS \
|
||||
(PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK| \
|
||||
PCRE_PARTIAL)
|
||||
PCRE_PARTIAL|PCRE_NEWLINE_CR|PCRE_NEWLINE_LF)
|
||||
|
||||
#define PUBLIC_DFA_EXEC_OPTIONS \
|
||||
(PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK| \
|
||||
PCRE_PARTIAL|PCRE_DFA_SHORTEST|PCRE_DFA_RESTART)
|
||||
PCRE_PARTIAL|PCRE_DFA_SHORTEST|PCRE_DFA_RESTART|PCRE_NEWLINE_CR| \
|
||||
PCRE_NEWLINE_LF)
|
||||
|
||||
#define PUBLIC_STUDY_OPTIONS 0 /* None defined */
|
||||
|
||||
@@ -456,6 +486,26 @@ ESC_n is defined as yet another macro, which is set in config.h to either \n
|
||||
#define ESC_tee '\t'
|
||||
#endif
|
||||
|
||||
/* Codes for different types of Unicode property */
|
||||
|
||||
#define PT_ANY 0 /* Any property - matches all chars */
|
||||
#define PT_LAMP 1 /* L& - the union of Lu, Ll, Lt */
|
||||
#define PT_GC 2 /* General characteristic (e.g. L) */
|
||||
#define PT_PC 3 /* Particular characteristic (e.g. Lu) */
|
||||
#define PT_SC 4 /* Script (e.g. Han) */
|
||||
|
||||
/* Flag bits and data types for the extended class (OP_XCLASS) for classes that
|
||||
contain UTF-8 characters with values greater than 255. */
|
||||
|
||||
#define XCL_NOT 0x01 /* Flag: this is a negative class */
|
||||
#define XCL_MAP 0x02 /* Flag: a 32-byte map is present */
|
||||
|
||||
#define XCL_END 0 /* Marks end of individual items */
|
||||
#define XCL_SINGLE 1 /* Single item (one multibyte char) follows */
|
||||
#define XCL_RANGE 2 /* A range (two multibyte chars) follows */
|
||||
#define XCL_PROP 3 /* Unicode property (2-byte property code follows) */
|
||||
#define XCL_NOTPROP 4 /* Unicode inverted property (ditto) */
|
||||
|
||||
/* These are escaped items that aren't just an encoding of a particular data
|
||||
value such as \n. They must have non-zero values, as check_escape() returns
|
||||
their negation. Also, they must appear in the same order as in the opcode
|
||||
@@ -471,19 +521,6 @@ enum { ESC_A = 1, ESC_G, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s, ESC_W,
|
||||
ESC_w, ESC_dum1, ESC_C, ESC_P, ESC_p, ESC_X, ESC_Z, ESC_z, ESC_E,
|
||||
ESC_Q, ESC_REF };
|
||||
|
||||
/* Flag bits and data types for the extended class (OP_XCLASS) for classes that
|
||||
contain UTF-8 characters with values greater than 255. */
|
||||
|
||||
#define XCL_NOT 0x01 /* Flag: this is a negative class */
|
||||
#define XCL_MAP 0x02 /* Flag: a 32-byte map is present */
|
||||
|
||||
#define XCL_END 0 /* Marks end of individual items */
|
||||
#define XCL_SINGLE 1 /* Single item (one multibyte char) follows */
|
||||
#define XCL_RANGE 2 /* A range (two multibyte chars) follows */
|
||||
#define XCL_PROP 3 /* Unicode property (one property code) follows */
|
||||
#define XCL_NOTPROP 4 /* Unicode inverted property (ditto) */
|
||||
|
||||
|
||||
/* Opcode table: OP_BRA must be last, as all values >= it are used for brackets
|
||||
that extract substrings. Starting from 1 (i.e. after OP_END), the values up to
|
||||
OP_EOD must correspond in order to the list of escapes immediately above.
|
||||
@@ -518,7 +555,7 @@ enum {
|
||||
OP_DOLL, /* 20 End of line - varies with multiline switch */
|
||||
OP_CHAR, /* 21 Match one character, casefully */
|
||||
OP_CHARNC, /* 22 Match one character, caselessly */
|
||||
OP_NOT, /* 23 Match anything but the following char */
|
||||
OP_NOT, /* 23 Match one character, not the following one */
|
||||
|
||||
OP_STAR, /* 24 The maximizing and minimizing versions of */
|
||||
OP_MINSTAR, /* 25 all these opcodes must come in pairs, with */
|
||||
@@ -647,7 +684,7 @@ in UTF-8 mode. The code that uses this table must know about such things. */
|
||||
1, /* End */ \
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* \A, \G, \B, \B, \D, \d, \S, \s, \W, \w */ \
|
||||
1, 1, /* Any, Anybyte */ \
|
||||
2, 2, 1, /* NOTPROP, PROP, EXTUNI */ \
|
||||
3, 3, 1, /* NOTPROP, PROP, EXTUNI */ \
|
||||
1, 1, 2, 1, 1, /* \Z, \z, Opt, ^, $ */ \
|
||||
2, /* Char - the minimum length */ \
|
||||
2, /* Charnc - the minimum length */ \
|
||||
@@ -698,7 +735,8 @@ enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9,
|
||||
ERR10, ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19,
|
||||
ERR20, ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR28, ERR29,
|
||||
ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39,
|
||||
ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47 };
|
||||
ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
|
||||
ERR50, ERR51 };
|
||||
|
||||
/* The real format of the start of the pcre block; the index of names and the
|
||||
code vector run on as long as necessary after the end. We store an explicit
|
||||
@@ -762,6 +800,8 @@ typedef struct compile_data {
|
||||
unsigned int backref_map; /* Bitmap of low back refs */
|
||||
int req_varyopt; /* "After variable item" flag for reqbyte */
|
||||
BOOL nopartial; /* Set TRUE if partial won't work */
|
||||
int nllen; /* 1 or 2 for newline string length */
|
||||
uschar nl[4]; /* Newline string */
|
||||
} compile_data;
|
||||
|
||||
/* Structure for maintaining a chain of pointers to the currently incomplete
|
||||
@@ -779,18 +819,18 @@ typedef struct recursion_info {
|
||||
struct recursion_info *prevrec; /* Previous recursion record (or NULL) */
|
||||
int group_num; /* Number of group that was called */
|
||||
const uschar *after_call; /* "Return value": points after the call in the expr */
|
||||
const uschar *save_start; /* Old value of md->start_match */
|
||||
USPTR save_start; /* Old value of md->start_match */
|
||||
int *offset_save; /* Pointer to start of saved offsets */
|
||||
int saved_max; /* Number of saved offsets */
|
||||
} recursion_info;
|
||||
|
||||
/* When compiling in a mode that doesn't use recursive calls to match(),
|
||||
a structure is used to remember local variables on the heap. It is defined in
|
||||
pcre.c, close to the match() function, so that it is easy to keep it in step
|
||||
with any changes of local variable. However, the pointer to the current frame
|
||||
must be saved in some "static" place over a longjmp(). We declare the
|
||||
structure here so that we can put a pointer in the match_data structure.
|
||||
NOTE: This isn't used for a "normal" compilation of pcre. */
|
||||
pcre_exec.c, close to the match() function, so that it is easy to keep it in
|
||||
step with any changes of local variable. However, the pointer to the current
|
||||
frame must be saved in some "static" place over a longjmp(). We declare the
|
||||
structure here so that we can put a pointer in the match_data structure. NOTE:
|
||||
This isn't used for a "normal" compilation of pcre. */
|
||||
|
||||
struct heapframe;
|
||||
|
||||
@@ -800,9 +840,12 @@ doing traditional NFA matching, so that they are thread-safe. */
|
||||
typedef struct match_data {
|
||||
unsigned long int match_call_count; /* As it says */
|
||||
unsigned long int match_limit; /* As it says */
|
||||
unsigned long int match_limit_recursion; /* As it says */
|
||||
int *offset_vector; /* Offset vector */
|
||||
int offset_end; /* One past the end */
|
||||
int offset_max; /* The maximum usable for return data */
|
||||
int nllen; /* 1 or 2 for newline string length */
|
||||
uschar nl[4]; /* Newline string */
|
||||
const uschar *lcc; /* Points to lower casing table */
|
||||
const uschar *ctypes; /* Points to table of type maps */
|
||||
BOOL offset_overflow; /* Set if too many extractions */
|
||||
@@ -814,10 +857,10 @@ typedef struct match_data {
|
||||
BOOL partial; /* PARTIAL flag */
|
||||
BOOL hitend; /* Hit the end of the subject at some point */
|
||||
const uschar *start_code; /* For use when recursing */
|
||||
const uschar *start_subject; /* Start of the subject string */
|
||||
const uschar *end_subject; /* End of the subject string */
|
||||
const uschar *start_match; /* Start of this match attempt */
|
||||
const uschar *end_match_ptr; /* Subject position at end match */
|
||||
USPTR start_subject; /* Start of the subject string */
|
||||
USPTR end_subject; /* End of the subject string */
|
||||
USPTR start_match; /* Start of this match attempt */
|
||||
USPTR end_match_ptr; /* Subject position at end match */
|
||||
int end_offset_top; /* Highwater mark at end of match */
|
||||
int capture_last; /* Most recent capture number */
|
||||
int start_offset; /* The start offset value */
|
||||
@@ -836,6 +879,8 @@ typedef struct dfa_match_data {
|
||||
const uschar *tables; /* Character tables */
|
||||
int moptions; /* Match options */
|
||||
int poptions; /* Pattern options */
|
||||
int nllen; /* 1 or 2 for newline string length */
|
||||
uschar nl[4]; /* Newline string */
|
||||
void *callout_data; /* To pass back to callouts */
|
||||
} dfa_match_data;
|
||||
|
||||
@@ -872,12 +917,13 @@ total length. */
|
||||
#define ctypes_offset (cbits_offset + cbit_length)
|
||||
#define tables_length (ctypes_offset + 256)
|
||||
|
||||
/* Layout of the UCP type table that translates property names into codes for
|
||||
pcre_ucp_findchar(). */
|
||||
/* Layout of the UCP type table that translates property names into types and
|
||||
codes. */
|
||||
|
||||
typedef struct {
|
||||
const char *name;
|
||||
int value;
|
||||
pcre_uint16 type;
|
||||
pcre_uint16 value;
|
||||
} ucp_type_table;
|
||||
|
||||
|
||||
@@ -908,7 +954,8 @@ sense, but are not part of the PCRE public API. */
|
||||
extern int _pcre_ord2utf8(int, uschar *);
|
||||
extern real_pcre * _pcre_try_flipped(const real_pcre *, real_pcre *,
|
||||
const pcre_study_data *, pcre_study_data *);
|
||||
extern int _pcre_ucp_findchar(const int, int *, int *);
|
||||
extern int _pcre_ucp_findprop(const unsigned int, int *, int *);
|
||||
extern int _pcre_ucp_othercase(const int);
|
||||
extern int _pcre_valid_utf8(const uschar *, int);
|
||||
extern BOOL _pcre_xclass(int, const uschar *);
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -86,29 +86,22 @@ for (i = 0; i < 256; i++) *p++ = tolower(i);
|
||||
|
||||
for (i = 0; i < 256; i++) *p++ = islower(i)? toupper(i) : tolower(i);
|
||||
|
||||
/* Then the character class tables. Don't try to be clever and save effort
|
||||
on exclusive ones - in some locales things may be different. Note that the
|
||||
table for "space" includes everything "isspace" gives, including VT in the
|
||||
default locale. This makes it work for the POSIX class [:space:]. */
|
||||
/* Then the character class tables. Don't try to be clever and save effort on
|
||||
exclusive ones - in some locales things may be different. Note that the table
|
||||
for "space" includes everything "isspace" gives, including VT in the default
|
||||
locale. This makes it work for the POSIX class [:space:]. Note also that it is
|
||||
possible for a character to be alnum or alpha without being lower or upper,
|
||||
such as "male and female ordinals" (\xAA and \xBA) in the fr_FR locale (at
|
||||
least under Debian Linux's locales as of 12/2005). So we must test for alnum
|
||||
specially. */
|
||||
|
||||
memset(p, 0, cbit_length);
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
if (isdigit(i))
|
||||
{
|
||||
p[cbit_digit + i/8] |= 1 << (i&7);
|
||||
p[cbit_word + i/8] |= 1 << (i&7);
|
||||
}
|
||||
if (isupper(i))
|
||||
{
|
||||
p[cbit_upper + i/8] |= 1 << (i&7);
|
||||
p[cbit_word + i/8] |= 1 << (i&7);
|
||||
}
|
||||
if (islower(i))
|
||||
{
|
||||
p[cbit_lower + i/8] |= 1 << (i&7);
|
||||
p[cbit_word + i/8] |= 1 << (i&7);
|
||||
}
|
||||
if (isdigit(i)) p[cbit_digit + i/8] |= 1 << (i&7);
|
||||
if (isupper(i)) p[cbit_upper + i/8] |= 1 << (i&7);
|
||||
if (islower(i)) p[cbit_lower + i/8] |= 1 << (i&7);
|
||||
if (isalnum(i)) p[cbit_word + i/8] |= 1 << (i&7);
|
||||
if (i == '_') p[cbit_word + i/8] |= 1 << (i&7);
|
||||
if (isspace(i)) p[cbit_space + i/8] |= 1 << (i&7);
|
||||
if (isxdigit(i))p[cbit_xdigit + i/8] |= 1 << (i&7);
|
||||
@@ -137,7 +130,9 @@ for (i = 0; i < 256; i++)
|
||||
meta-character, which in this sense is any character that terminates a run
|
||||
of data characters. */
|
||||
|
||||
if (strchr("*+?{^.$|()[", i) != 0) x += ctype_meta; *p++ = x; }
|
||||
if (strchr("*+?{^.$|()[", i) != 0) x += ctype_meta;
|
||||
*p++ = x;
|
||||
}
|
||||
|
||||
return yield;
|
||||
}
|
||||
|
||||
@@ -1,454 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains a PCRE private debugging function for printing out the
|
||||
internal form of a compiled regular expression, along with some supporting
|
||||
local functions. This source file is used in two places:
|
||||
|
||||
(1) It is #included by pcre_compile.c when it is compiled in debugging mode
|
||||
(DEBUG defined in pcre_internal.h). It is not included in production compiles.
|
||||
|
||||
(2) It is always #included by pcretest.c, which can be asked to print out a
|
||||
compiled regex for debugging purposes. */
|
||||
|
||||
|
||||
static const char *OP_names[] = { OP_NAME_LIST };
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Print single- or multi-byte character *
|
||||
*************************************************/
|
||||
|
||||
static int
|
||||
print_char(FILE *f, uschar *ptr, BOOL utf8)
|
||||
{
|
||||
int c = *ptr;
|
||||
|
||||
if (!utf8 || (c & 0xc0) != 0xc0)
|
||||
{
|
||||
if (isprint(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
int i;
|
||||
int a = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */
|
||||
int s = 6*a;
|
||||
c = (c & _pcre_utf8_table3[a]) << s;
|
||||
for (i = 1; i <= a; i++)
|
||||
{
|
||||
/* This is a check for malformed UTF-8; it should only occur if the sanity
|
||||
check has been turned off. Rather than swallow random bytes, just stop if
|
||||
we hit a bad one. Print it with \X instead of \x as an indication. */
|
||||
|
||||
if ((ptr[i] & 0xc0) != 0x80)
|
||||
{
|
||||
fprintf(f, "\\X{%x}", c);
|
||||
return i - 1;
|
||||
}
|
||||
|
||||
/* The byte is OK */
|
||||
|
||||
s -= 6;
|
||||
c |= (ptr[i] & 0x3f) << s;
|
||||
}
|
||||
if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c);
|
||||
return a;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find Unicode property name *
|
||||
*************************************************/
|
||||
|
||||
static const char *
|
||||
get_ucpname(int property)
|
||||
{
|
||||
#ifdef SUPPORT_UCP
|
||||
int i;
|
||||
for (i = _pcre_utt_size; i >= 0; i--)
|
||||
{
|
||||
if (property == _pcre_utt[i].value) break;
|
||||
}
|
||||
return (i >= 0)? _pcre_utt[i].name : "??";
|
||||
#else
|
||||
return "??";
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Print compiled regex *
|
||||
*************************************************/
|
||||
|
||||
/* Make this function work for a regex with integers either byte order.
|
||||
However, we assume that what we are passed is a compiled regex. */
|
||||
|
||||
static void
|
||||
pcre_printint(pcre *external_re, FILE *f)
|
||||
{
|
||||
real_pcre *re = (real_pcre *)external_re;
|
||||
uschar *codestart, *code;
|
||||
BOOL utf8;
|
||||
|
||||
unsigned int options = re->options;
|
||||
int offset = re->name_table_offset;
|
||||
int count = re->name_count;
|
||||
int size = re->name_entry_size;
|
||||
|
||||
if (re->magic_number != MAGIC_NUMBER)
|
||||
{
|
||||
offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff);
|
||||
count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff);
|
||||
size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff);
|
||||
options = ((options << 24) & 0xff000000) |
|
||||
((options << 8) & 0x00ff0000) |
|
||||
((options >> 8) & 0x0000ff00) |
|
||||
((options >> 24) & 0x000000ff);
|
||||
}
|
||||
|
||||
code = codestart = (uschar *)re + offset + count * size;
|
||||
utf8 = (options & PCRE_UTF8) != 0;
|
||||
|
||||
for(;;)
|
||||
{
|
||||
uschar *ccode;
|
||||
int c;
|
||||
int extra = 0;
|
||||
|
||||
fprintf(f, "%3d ", (int)(code - codestart));
|
||||
|
||||
if (*code >= OP_BRA)
|
||||
{
|
||||
if (*code - OP_BRA > EXTRACT_BASIC_MAX)
|
||||
fprintf(f, "%3d Bra extra\n", GET(code, 1));
|
||||
else
|
||||
fprintf(f, "%3d Bra %d\n", GET(code, 1), *code - OP_BRA);
|
||||
code += _pcre_OP_lengths[OP_BRA];
|
||||
continue;
|
||||
}
|
||||
|
||||
switch(*code)
|
||||
{
|
||||
case OP_END:
|
||||
fprintf(f, " %s\n", OP_names[*code]);
|
||||
fprintf(f, "------------------------------------------------------------------\n");
|
||||
return;
|
||||
|
||||
case OP_OPT:
|
||||
fprintf(f, " %.2x %s", code[1], OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_CHAR:
|
||||
{
|
||||
fprintf(f, " ");
|
||||
do
|
||||
{
|
||||
code++;
|
||||
code += 1 + print_char(f, code, utf8);
|
||||
}
|
||||
while (*code == OP_CHAR);
|
||||
fprintf(f, "\n");
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
|
||||
case OP_CHARNC:
|
||||
{
|
||||
fprintf(f, " NC ");
|
||||
do
|
||||
{
|
||||
code++;
|
||||
code += 1 + print_char(f, code, utf8);
|
||||
}
|
||||
while (*code == OP_CHARNC);
|
||||
fprintf(f, "\n");
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
|
||||
case OP_KETRMAX:
|
||||
case OP_KETRMIN:
|
||||
case OP_ALT:
|
||||
case OP_KET:
|
||||
case OP_ASSERT:
|
||||
case OP_ASSERT_NOT:
|
||||
case OP_ASSERTBACK:
|
||||
case OP_ASSERTBACK_NOT:
|
||||
case OP_ONCE:
|
||||
case OP_COND:
|
||||
case OP_REVERSE:
|
||||
fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_BRANUMBER:
|
||||
printf("%3d %s", GET2(code, 1), OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_CREF:
|
||||
if (GET2(code, 1) == CREF_RECURSE)
|
||||
fprintf(f, " Cond recurse");
|
||||
else
|
||||
fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_STAR:
|
||||
case OP_MINSTAR:
|
||||
case OP_PLUS:
|
||||
case OP_MINPLUS:
|
||||
case OP_QUERY:
|
||||
case OP_MINQUERY:
|
||||
case OP_TYPESTAR:
|
||||
case OP_TYPEMINSTAR:
|
||||
case OP_TYPEPLUS:
|
||||
case OP_TYPEMINPLUS:
|
||||
case OP_TYPEQUERY:
|
||||
case OP_TYPEMINQUERY:
|
||||
fprintf(f, " ");
|
||||
if (*code >= OP_TYPESTAR)
|
||||
{
|
||||
fprintf(f, "%s", OP_names[code[1]]);
|
||||
if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
|
||||
{
|
||||
fprintf(f, " %s ", get_ucpname(code[2]));
|
||||
extra = 1;
|
||||
}
|
||||
}
|
||||
else extra = print_char(f, code+1, utf8);
|
||||
fprintf(f, "%s", OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_EXACT:
|
||||
case OP_UPTO:
|
||||
case OP_MINUPTO:
|
||||
fprintf(f, " ");
|
||||
extra = print_char(f, code+3, utf8);
|
||||
fprintf(f, "{");
|
||||
if (*code != OP_EXACT) fprintf(f, ",");
|
||||
fprintf(f, "%d}", GET2(code,1));
|
||||
if (*code == OP_MINUPTO) fprintf(f, "?");
|
||||
break;
|
||||
|
||||
case OP_TYPEEXACT:
|
||||
case OP_TYPEUPTO:
|
||||
case OP_TYPEMINUPTO:
|
||||
fprintf(f, " %s", OP_names[code[3]]);
|
||||
if (code[3] == OP_PROP || code[3] == OP_NOTPROP)
|
||||
{
|
||||
fprintf(f, " %s ", get_ucpname(code[4]));
|
||||
extra = 1;
|
||||
}
|
||||
fprintf(f, "{");
|
||||
if (*code != OP_TYPEEXACT) fprintf(f, "0,");
|
||||
fprintf(f, "%d}", GET2(code,1));
|
||||
if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
|
||||
break;
|
||||
|
||||
case OP_NOT:
|
||||
if (isprint(c = code[1])) fprintf(f, " [^%c]", c);
|
||||
else fprintf(f, " [^\\x%02x]", c);
|
||||
break;
|
||||
|
||||
case OP_NOTSTAR:
|
||||
case OP_NOTMINSTAR:
|
||||
case OP_NOTPLUS:
|
||||
case OP_NOTMINPLUS:
|
||||
case OP_NOTQUERY:
|
||||
case OP_NOTMINQUERY:
|
||||
if (isprint(c = code[1])) fprintf(f, " [^%c]", c);
|
||||
else fprintf(f, " [^\\x%02x]", c);
|
||||
fprintf(f, "%s", OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_NOTEXACT:
|
||||
case OP_NOTUPTO:
|
||||
case OP_NOTMINUPTO:
|
||||
if (isprint(c = code[3])) fprintf(f, " [^%c]{", c);
|
||||
else fprintf(f, " [^\\x%02x]{", c);
|
||||
if (*code != OP_NOTEXACT) fprintf(f, "0,");
|
||||
fprintf(f, "%d}", GET2(code,1));
|
||||
if (*code == OP_NOTMINUPTO) fprintf(f, "?");
|
||||
break;
|
||||
|
||||
case OP_RECURSE:
|
||||
fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_REF:
|
||||
fprintf(f, " \\%d", GET2(code,1));
|
||||
ccode = code + _pcre_OP_lengths[*code];
|
||||
goto CLASS_REF_REPEAT;
|
||||
|
||||
case OP_CALLOUT:
|
||||
fprintf(f, " %s %d %d %d", OP_names[*code], code[1], GET(code,2),
|
||||
GET(code, 2 + LINK_SIZE));
|
||||
break;
|
||||
|
||||
case OP_PROP:
|
||||
case OP_NOTPROP:
|
||||
fprintf(f, " %s %s", OP_names[*code], get_ucpname(code[1]));
|
||||
break;
|
||||
|
||||
/* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in
|
||||
having this code always here, and it makes it less messy without all those
|
||||
#ifdefs. */
|
||||
|
||||
case OP_CLASS:
|
||||
case OP_NCLASS:
|
||||
case OP_XCLASS:
|
||||
{
|
||||
int i, min, max;
|
||||
BOOL printmap;
|
||||
|
||||
fprintf(f, " [");
|
||||
|
||||
if (*code == OP_XCLASS)
|
||||
{
|
||||
extra = GET(code, 1);
|
||||
ccode = code + LINK_SIZE + 1;
|
||||
printmap = (*ccode & XCL_MAP) != 0;
|
||||
if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^");
|
||||
}
|
||||
else
|
||||
{
|
||||
printmap = TRUE;
|
||||
ccode = code + 1;
|
||||
}
|
||||
|
||||
/* Print a bit map */
|
||||
|
||||
if (printmap)
|
||||
{
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
if ((ccode[i/8] & (1 << (i&7))) != 0)
|
||||
{
|
||||
int j;
|
||||
for (j = i+1; j < 256; j++)
|
||||
if ((ccode[j/8] & (1 << (j&7))) == 0) break;
|
||||
if (i == '-' || i == ']') fprintf(f, "\\");
|
||||
if (isprint(i)) fprintf(f, "%c", i); else fprintf(f, "\\x%02x", i);
|
||||
if (--j > i)
|
||||
{
|
||||
if (j != i + 1) fprintf(f, "-");
|
||||
if (j == '-' || j == ']') fprintf(f, "\\");
|
||||
if (isprint(j)) fprintf(f, "%c", j); else fprintf(f, "\\x%02x", j);
|
||||
}
|
||||
i = j;
|
||||
}
|
||||
}
|
||||
ccode += 32;
|
||||
}
|
||||
|
||||
/* For an XCLASS there is always some additional data */
|
||||
|
||||
if (*code == OP_XCLASS)
|
||||
{
|
||||
int ch;
|
||||
while ((ch = *ccode++) != XCL_END)
|
||||
{
|
||||
if (ch == XCL_PROP)
|
||||
{
|
||||
fprintf(f, "\\p{%s}", get_ucpname(*ccode++));
|
||||
}
|
||||
else if (ch == XCL_NOTPROP)
|
||||
{
|
||||
fprintf(f, "\\P{%s}", get_ucpname(*ccode++));
|
||||
}
|
||||
else
|
||||
{
|
||||
ccode += 1 + print_char(f, ccode, TRUE);
|
||||
if (ch == XCL_RANGE)
|
||||
{
|
||||
fprintf(f, "-");
|
||||
ccode += 1 + print_char(f, ccode, TRUE);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Indicate a non-UTF8 class which was created by negation */
|
||||
|
||||
fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
|
||||
|
||||
/* Handle repeats after a class or a back reference */
|
||||
|
||||
CLASS_REF_REPEAT:
|
||||
switch(*ccode)
|
||||
{
|
||||
case OP_CRSTAR:
|
||||
case OP_CRMINSTAR:
|
||||
case OP_CRPLUS:
|
||||
case OP_CRMINPLUS:
|
||||
case OP_CRQUERY:
|
||||
case OP_CRMINQUERY:
|
||||
fprintf(f, "%s", OP_names[*ccode]);
|
||||
extra += _pcre_OP_lengths[*ccode];
|
||||
break;
|
||||
|
||||
case OP_CRRANGE:
|
||||
case OP_CRMINRANGE:
|
||||
min = GET2(ccode,1);
|
||||
max = GET2(ccode,3);
|
||||
if (max == 0) fprintf(f, "{%d,}", min);
|
||||
else fprintf(f, "{%d,%d}", min, max);
|
||||
if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
|
||||
extra += _pcre_OP_lengths[*ccode];
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/* Anything else is just an item with no data*/
|
||||
|
||||
default:
|
||||
fprintf(f, " %s", OP_names[*code]);
|
||||
break;
|
||||
}
|
||||
|
||||
code += _pcre_OP_lengths[*code] + extra;
|
||||
fprintf(f, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
/* End of pcre_printint.src */
|
||||
@@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -63,7 +63,7 @@ Returns: the (possibly updated) count value (a non-negative number), or
|
||||
a negative error number
|
||||
*/
|
||||
|
||||
PCRE_EXPORT int
|
||||
PCRE_DATA_SCOPE int
|
||||
pcre_refcount(pcre *argument_re, int adjust)
|
||||
{
|
||||
real_pcre *re = (real_pcre *)argument_re;
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -95,6 +95,13 @@ set_start_bits(const uschar *code, uschar *start_bits, BOOL caseless,
|
||||
{
|
||||
register int c;
|
||||
|
||||
#if 0
|
||||
/* ========================================================================= */
|
||||
/* The following comment and code was inserted in January 1999. In May 2006,
|
||||
when it was observed to cause compiler warnings about unused values, I took it
|
||||
out again. If anybody is still using OS/2, they will have to put it back
|
||||
manually. */
|
||||
|
||||
/* This next statement and the later reference to dummy are here in order to
|
||||
trick the optimizer of the IBM C compiler for OS/2 into generating correct
|
||||
code. Apparently IBM isn't going to fix the problem, and we would rather not
|
||||
@@ -102,6 +109,8 @@ disable optimization (in this module it actually makes a big difference, and
|
||||
the pcre module can use all the optimization it can get). */
|
||||
|
||||
volatile int dummy;
|
||||
/* ========================================================================= */
|
||||
#endif
|
||||
|
||||
do
|
||||
{
|
||||
@@ -159,7 +168,11 @@ do
|
||||
case OP_BRAMINZERO:
|
||||
if (!set_start_bits(++tcode, start_bits, caseless, utf8, cd))
|
||||
return FALSE;
|
||||
/* =========================================================================
|
||||
See the comment at the head of this function concerning the next line,
|
||||
which was an old fudge for the benefit of OS/2.
|
||||
dummy = 1;
|
||||
========================================================================= */
|
||||
do tcode += GET(tcode,1); while (*tcode == OP_ALT);
|
||||
tcode += 1+LINK_SIZE;
|
||||
break;
|
||||
@@ -215,15 +228,29 @@ do
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
/* The cbit_space table has vertical tab as whitespace; we have to
|
||||
discard it. */
|
||||
|
||||
case OP_NOT_WHITESPACE:
|
||||
for (c = 0; c < 32; c++)
|
||||
start_bits[c] |= ~cd->cbits[c+cbit_space];
|
||||
{
|
||||
int d = cd->cbits[c+cbit_space];
|
||||
if (c == 1) d &= ~0x08;
|
||||
start_bits[c] |= ~d;
|
||||
}
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
/* The cbit_space table has vertical tab as whitespace; we have to
|
||||
discard it. */
|
||||
|
||||
case OP_WHITESPACE:
|
||||
for (c = 0; c < 32; c++)
|
||||
start_bits[c] |= cd->cbits[c+cbit_space];
|
||||
{
|
||||
int d = cd->cbits[c+cbit_space];
|
||||
if (c == 1) d &= ~0x08;
|
||||
start_bits[c] |= d;
|
||||
}
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
@@ -277,14 +304,28 @@ do
|
||||
start_bits[c] |= cd->cbits[c+cbit_digit];
|
||||
break;
|
||||
|
||||
/* The cbit_space table has vertical tab as whitespace; we have to
|
||||
discard it. */
|
||||
|
||||
case OP_NOT_WHITESPACE:
|
||||
for (c = 0; c < 32; c++)
|
||||
start_bits[c] |= ~cd->cbits[c+cbit_space];
|
||||
{
|
||||
int d = cd->cbits[c+cbit_space];
|
||||
if (c == 1) d &= ~0x08;
|
||||
start_bits[c] |= ~d;
|
||||
}
|
||||
break;
|
||||
|
||||
/* The cbit_space table has vertical tab as whitespace; we have to
|
||||
discard it. */
|
||||
|
||||
case OP_WHITESPACE:
|
||||
for (c = 0; c < 32; c++)
|
||||
start_bits[c] |= cd->cbits[c+cbit_space];
|
||||
{
|
||||
int d = cd->cbits[c+cbit_space];
|
||||
if (c == 1) d &= ~0x08;
|
||||
start_bits[c] |= d;
|
||||
}
|
||||
break;
|
||||
|
||||
case OP_NOT_WORDCHAR:
|
||||
@@ -401,17 +442,16 @@ Returns: pointer to a pcre_extra block, with study_data filled in and the
|
||||
NULL on error or if no optimization possible
|
||||
*/
|
||||
|
||||
PCRE_EXPORT pcre_extra *
|
||||
PCRE_DATA_SCOPE pcre_extra *
|
||||
pcre_study(const pcre *external_re, int options, const char **errorptr)
|
||||
{
|
||||
uschar start_bits[32];
|
||||
pcre_extra *extra;
|
||||
pcre_study_data *study;
|
||||
const uschar *tables;
|
||||
const real_pcre *re = (const real_pcre *)external_re;
|
||||
uschar *code = (uschar *)re + re->name_table_offset +
|
||||
(re->name_count * re->name_entry_size);
|
||||
uschar *code;
|
||||
compile_data compile_block;
|
||||
const real_pcre *re = (const real_pcre *)external_re;
|
||||
|
||||
*errorptr = NULL;
|
||||
|
||||
@@ -427,6 +467,9 @@ if ((options & ~PUBLIC_STUDY_OPTIONS) != 0)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
code = (uschar *)re + re->name_table_offset +
|
||||
(re->name_count * re->name_entry_size);
|
||||
|
||||
/* For an anchored pattern, or an unanchored pattern that has a first char, or
|
||||
a multiline pattern that matches only at "line starts", no further processing
|
||||
at present. */
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -48,7 +48,7 @@ clashes with the library. */
|
||||
|
||||
|
||||
/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
|
||||
the definition is next to the definition of the opcodes in internal.h. */
|
||||
the definition is next to the definition of the opcodes in pcre_internal.h. */
|
||||
|
||||
const uschar _pcre_OP_lengths[] = { OP_LENGTHS };
|
||||
|
||||
@@ -82,47 +82,110 @@ const uschar _pcre_utf8_table4[] = {
|
||||
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
||||
3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
|
||||
|
||||
/* This table translates Unicode property names into code values for the
|
||||
ucp_findchar() function. */
|
||||
/* This table translates Unicode property names into type and code values. It
|
||||
is searched by binary chop, so must be in collating sequence of name. */
|
||||
|
||||
const ucp_type_table _pcre_utt[] = {
|
||||
{ "C", 128 + ucp_C },
|
||||
{ "Cc", ucp_Cc },
|
||||
{ "Cf", ucp_Cf },
|
||||
{ "Cn", ucp_Cn },
|
||||
{ "Co", ucp_Co },
|
||||
{ "Cs", ucp_Cs },
|
||||
{ "L", 128 + ucp_L },
|
||||
{ "Ll", ucp_Ll },
|
||||
{ "Lm", ucp_Lm },
|
||||
{ "Lo", ucp_Lo },
|
||||
{ "Lt", ucp_Lt },
|
||||
{ "Lu", ucp_Lu },
|
||||
{ "M", 128 + ucp_M },
|
||||
{ "Mc", ucp_Mc },
|
||||
{ "Me", ucp_Me },
|
||||
{ "Mn", ucp_Mn },
|
||||
{ "N", 128 + ucp_N },
|
||||
{ "Nd", ucp_Nd },
|
||||
{ "Nl", ucp_Nl },
|
||||
{ "No", ucp_No },
|
||||
{ "P", 128 + ucp_P },
|
||||
{ "Pc", ucp_Pc },
|
||||
{ "Pd", ucp_Pd },
|
||||
{ "Pe", ucp_Pe },
|
||||
{ "Pf", ucp_Pf },
|
||||
{ "Pi", ucp_Pi },
|
||||
{ "Po", ucp_Po },
|
||||
{ "Ps", ucp_Ps },
|
||||
{ "S", 128 + ucp_S },
|
||||
{ "Sc", ucp_Sc },
|
||||
{ "Sk", ucp_Sk },
|
||||
{ "Sm", ucp_Sm },
|
||||
{ "So", ucp_So },
|
||||
{ "Z", 128 + ucp_Z },
|
||||
{ "Zl", ucp_Zl },
|
||||
{ "Zp", ucp_Zp },
|
||||
{ "Zs", ucp_Zs }
|
||||
{ "Any", PT_ANY, 0 },
|
||||
{ "Arabic", PT_SC, ucp_Arabic },
|
||||
{ "Armenian", PT_SC, ucp_Armenian },
|
||||
{ "Bengali", PT_SC, ucp_Bengali },
|
||||
{ "Bopomofo", PT_SC, ucp_Bopomofo },
|
||||
{ "Braille", PT_SC, ucp_Braille },
|
||||
{ "Buginese", PT_SC, ucp_Buginese },
|
||||
{ "Buhid", PT_SC, ucp_Buhid },
|
||||
{ "C", PT_GC, ucp_C },
|
||||
{ "Canadian_Aboriginal", PT_SC, ucp_Canadian_Aboriginal },
|
||||
{ "Cc", PT_PC, ucp_Cc },
|
||||
{ "Cf", PT_PC, ucp_Cf },
|
||||
{ "Cherokee", PT_SC, ucp_Cherokee },
|
||||
{ "Cn", PT_PC, ucp_Cn },
|
||||
{ "Co", PT_PC, ucp_Co },
|
||||
{ "Common", PT_SC, ucp_Common },
|
||||
{ "Coptic", PT_SC, ucp_Coptic },
|
||||
{ "Cs", PT_PC, ucp_Cs },
|
||||
{ "Cypriot", PT_SC, ucp_Cypriot },
|
||||
{ "Cyrillic", PT_SC, ucp_Cyrillic },
|
||||
{ "Deseret", PT_SC, ucp_Deseret },
|
||||
{ "Devanagari", PT_SC, ucp_Devanagari },
|
||||
{ "Ethiopic", PT_SC, ucp_Ethiopic },
|
||||
{ "Georgian", PT_SC, ucp_Georgian },
|
||||
{ "Glagolitic", PT_SC, ucp_Glagolitic },
|
||||
{ "Gothic", PT_SC, ucp_Gothic },
|
||||
{ "Greek", PT_SC, ucp_Greek },
|
||||
{ "Gujarati", PT_SC, ucp_Gujarati },
|
||||
{ "Gurmukhi", PT_SC, ucp_Gurmukhi },
|
||||
{ "Han", PT_SC, ucp_Han },
|
||||
{ "Hangul", PT_SC, ucp_Hangul },
|
||||
{ "Hanunoo", PT_SC, ucp_Hanunoo },
|
||||
{ "Hebrew", PT_SC, ucp_Hebrew },
|
||||
{ "Hiragana", PT_SC, ucp_Hiragana },
|
||||
{ "Inherited", PT_SC, ucp_Inherited },
|
||||
{ "Kannada", PT_SC, ucp_Kannada },
|
||||
{ "Katakana", PT_SC, ucp_Katakana },
|
||||
{ "Kharoshthi", PT_SC, ucp_Kharoshthi },
|
||||
{ "Khmer", PT_SC, ucp_Khmer },
|
||||
{ "L", PT_GC, ucp_L },
|
||||
{ "L&", PT_LAMP, 0 },
|
||||
{ "Lao", PT_SC, ucp_Lao },
|
||||
{ "Latin", PT_SC, ucp_Latin },
|
||||
{ "Limbu", PT_SC, ucp_Limbu },
|
||||
{ "Linear_B", PT_SC, ucp_Linear_B },
|
||||
{ "Ll", PT_PC, ucp_Ll },
|
||||
{ "Lm", PT_PC, ucp_Lm },
|
||||
{ "Lo", PT_PC, ucp_Lo },
|
||||
{ "Lt", PT_PC, ucp_Lt },
|
||||
{ "Lu", PT_PC, ucp_Lu },
|
||||
{ "M", PT_GC, ucp_M },
|
||||
{ "Malayalam", PT_SC, ucp_Malayalam },
|
||||
{ "Mc", PT_PC, ucp_Mc },
|
||||
{ "Me", PT_PC, ucp_Me },
|
||||
{ "Mn", PT_PC, ucp_Mn },
|
||||
{ "Mongolian", PT_SC, ucp_Mongolian },
|
||||
{ "Myanmar", PT_SC, ucp_Myanmar },
|
||||
{ "N", PT_GC, ucp_N },
|
||||
{ "Nd", PT_PC, ucp_Nd },
|
||||
{ "New_Tai_Lue", PT_SC, ucp_New_Tai_Lue },
|
||||
{ "Nl", PT_PC, ucp_Nl },
|
||||
{ "No", PT_PC, ucp_No },
|
||||
{ "Ogham", PT_SC, ucp_Ogham },
|
||||
{ "Old_Italic", PT_SC, ucp_Old_Italic },
|
||||
{ "Old_Persian", PT_SC, ucp_Old_Persian },
|
||||
{ "Oriya", PT_SC, ucp_Oriya },
|
||||
{ "Osmanya", PT_SC, ucp_Osmanya },
|
||||
{ "P", PT_GC, ucp_P },
|
||||
{ "Pc", PT_PC, ucp_Pc },
|
||||
{ "Pd", PT_PC, ucp_Pd },
|
||||
{ "Pe", PT_PC, ucp_Pe },
|
||||
{ "Pf", PT_PC, ucp_Pf },
|
||||
{ "Pi", PT_PC, ucp_Pi },
|
||||
{ "Po", PT_PC, ucp_Po },
|
||||
{ "Ps", PT_PC, ucp_Ps },
|
||||
{ "Runic", PT_SC, ucp_Runic },
|
||||
{ "S", PT_GC, ucp_S },
|
||||
{ "Sc", PT_PC, ucp_Sc },
|
||||
{ "Shavian", PT_SC, ucp_Shavian },
|
||||
{ "Sinhala", PT_SC, ucp_Sinhala },
|
||||
{ "Sk", PT_PC, ucp_Sk },
|
||||
{ "Sm", PT_PC, ucp_Sm },
|
||||
{ "So", PT_PC, ucp_So },
|
||||
{ "Syloti_Nagri", PT_SC, ucp_Syloti_Nagri },
|
||||
{ "Syriac", PT_SC, ucp_Syriac },
|
||||
{ "Tagalog", PT_SC, ucp_Tagalog },
|
||||
{ "Tagbanwa", PT_SC, ucp_Tagbanwa },
|
||||
{ "Tai_Le", PT_SC, ucp_Tai_Le },
|
||||
{ "Tamil", PT_SC, ucp_Tamil },
|
||||
{ "Telugu", PT_SC, ucp_Telugu },
|
||||
{ "Thaana", PT_SC, ucp_Thaana },
|
||||
{ "Thai", PT_SC, ucp_Thai },
|
||||
{ "Tibetan", PT_SC, ucp_Tibetan },
|
||||
{ "Tifinagh", PT_SC, ucp_Tifinagh },
|
||||
{ "Ugaritic", PT_SC, ucp_Ugaritic },
|
||||
{ "Yi", PT_SC, ucp_Yi },
|
||||
{ "Z", PT_GC, ucp_Z },
|
||||
{ "Zl", PT_PC, ucp_Zl },
|
||||
{ "Zp", PT_PC, ucp_Zp },
|
||||
{ "Zs", PT_PC, ucp_Zs }
|
||||
};
|
||||
|
||||
const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table);
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -62,8 +62,8 @@ Arguments:
|
||||
Returns: the flipped value
|
||||
*/
|
||||
|
||||
static long int
|
||||
byteflip(long int value, int n)
|
||||
static unsigned long int
|
||||
byteflip(unsigned long int value, int n)
|
||||
{
|
||||
if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
|
||||
return ((value & 0x000000ff) << 24) |
|
||||
@@ -94,7 +94,7 @@ Returns: the new block if is is indeed a byte-flipped regex
|
||||
NULL if it is not
|
||||
*/
|
||||
|
||||
PCRE_EXPORT real_pcre *
|
||||
real_pcre *
|
||||
_pcre_try_flipped(const real_pcre *re, real_pcre *internal_re,
|
||||
const pcre_study_data *study, pcre_study_data *internal_study)
|
||||
{
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -52,10 +52,13 @@ string that identifies the PCRE version that is in use. */
|
||||
#define STRING(a) # a
|
||||
#define XSTRING(s) STRING(s)
|
||||
|
||||
PCRE_EXPORT const char *
|
||||
PCRE_DATA_SCOPE const char *
|
||||
pcre_version(void)
|
||||
{
|
||||
return XSTRING(PCRE_MAJOR) "." XSTRING(PCRE_MINOR) " " XSTRING(PCRE_DATE);
|
||||
return XSTRING(PCRE_MAJOR)
|
||||
"." XSTRING(PCRE_MINOR)
|
||||
XSTRING(PCRE_PRERELEASE)
|
||||
" " XSTRING(PCRE_DATE);
|
||||
}
|
||||
|
||||
/* End of pcre_version.c */
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -60,7 +60,7 @@ Arguments:
|
||||
Returns: TRUE if character matches, else FALSE
|
||||
*/
|
||||
|
||||
PCRE_EXPORT BOOL
|
||||
BOOL
|
||||
_pcre_xclass(int c, const uschar *data)
|
||||
{
|
||||
int t;
|
||||
@@ -100,17 +100,40 @@ while ((t = *data++) != XCL_END)
|
||||
#ifdef SUPPORT_UCP
|
||||
else /* XCL_PROP & XCL_NOTPROP */
|
||||
{
|
||||
int chartype, othercase;
|
||||
int rqdtype = *data++;
|
||||
int category = _pcre_ucp_findchar(c, &chartype, &othercase);
|
||||
if (rqdtype >= 128)
|
||||
int chartype, script;
|
||||
int category = _pcre_ucp_findprop(c, &chartype, &script);
|
||||
|
||||
switch(*data)
|
||||
{
|
||||
if ((rqdtype - 128 == category) == (t == XCL_PROP)) return !negated;
|
||||
}
|
||||
else
|
||||
{
|
||||
if ((rqdtype == chartype) == (t == XCL_PROP)) return !negated;
|
||||
case PT_ANY:
|
||||
if (t == XCL_PROP) return !negated;
|
||||
break;
|
||||
|
||||
case PT_LAMP:
|
||||
if ((chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt) ==
|
||||
(t == XCL_PROP)) return !negated;
|
||||
break;
|
||||
|
||||
case PT_GC:
|
||||
if ((data[1] == category) == (t == XCL_PROP)) return !negated;
|
||||
break;
|
||||
|
||||
case PT_PC:
|
||||
if ((data[1] == chartype) == (t == XCL_PROP)) return !negated;
|
||||
break;
|
||||
|
||||
case PT_SC:
|
||||
if ((data[1] == script) == (t == XCL_PROP)) return !negated;
|
||||
break;
|
||||
|
||||
/* This should never occur, but compilers may mutter if there is no
|
||||
default. */
|
||||
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
data += 2;
|
||||
}
|
||||
#endif /* SUPPORT_UCP */
|
||||
}
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -77,7 +77,7 @@ static const int eint[] = {
|
||||
REG_ASSERT, /* internal error: code overflow */
|
||||
REG_BADPAT, /* unrecognized character after (?< */
|
||||
REG_BADPAT, /* lookbehind assertion is not fixed length */
|
||||
REG_BADPAT, /* malformed number after (?( */
|
||||
REG_BADPAT, /* malformed number or name after (?( */
|
||||
REG_BADPAT, /* conditional group containe more than two branches */
|
||||
REG_BADPAT, /* assertion expected after (?( */
|
||||
REG_BADPAT, /* (?R or (?digits must be followed by ) */
|
||||
@@ -94,11 +94,15 @@ static const int eint[] = {
|
||||
REG_BADPAT, /* recursive call could loop indefinitely */
|
||||
REG_BADPAT, /* unrecognized character after (?P */
|
||||
REG_BADPAT, /* syntax error after (?P */
|
||||
REG_BADPAT, /* two named groups have the same name */
|
||||
REG_BADPAT, /* two named subpatterns have the same name */
|
||||
REG_BADPAT, /* invalid UTF-8 string */
|
||||
REG_BADPAT, /* support for \P, \p, and \X has not been compiled */
|
||||
REG_BADPAT, /* malformed \P or \p sequence */
|
||||
REG_BADPAT /* unknown property name after \P or \p */
|
||||
REG_BADPAT, /* unknown property name after \P or \p */
|
||||
REG_BADPAT, /* subpattern name is too long (maximum 32 characters) */
|
||||
REG_BADPAT, /* too many named subpatterns (maximum 10,000) */
|
||||
REG_BADPAT, /* repeated subpattern is too long */
|
||||
REG_BADPAT /* octal value is greater than \377 (not in UTF-8 mode) */
|
||||
};
|
||||
|
||||
/* Table of texts corresponding to POSIX error codes */
|
||||
@@ -131,7 +135,7 @@ static const char *const pstring[] = {
|
||||
* Translate error code to string *
|
||||
*************************************************/
|
||||
|
||||
PCRE_EXPORT size_t
|
||||
PCRE_DATA_SCOPE size_t
|
||||
regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
|
||||
{
|
||||
const char *message, *addmessage;
|
||||
@@ -166,7 +170,7 @@ return length + addlength;
|
||||
* Free store held by a regex *
|
||||
*************************************************/
|
||||
|
||||
PCRE_EXPORT void
|
||||
PCRE_DATA_SCOPE void
|
||||
regfree(regex_t *preg)
|
||||
{
|
||||
(pcre_free)(preg->re_pcre);
|
||||
@@ -189,7 +193,7 @@ Returns: 0 on success
|
||||
various non-zero codes on failure
|
||||
*/
|
||||
|
||||
PCRE_EXPORT int
|
||||
PCRE_DATA_SCOPE int
|
||||
regcomp(regex_t *preg, const char *pattern, int cflags)
|
||||
{
|
||||
const char *errorptr;
|
||||
@@ -200,6 +204,8 @@ int options = 0;
|
||||
if ((cflags & REG_ICASE) != 0) options |= PCRE_CASELESS;
|
||||
if ((cflags & REG_NEWLINE) != 0) options |= PCRE_MULTILINE;
|
||||
if ((cflags & REG_DOTALL) != 0) options |= PCRE_DOTALL;
|
||||
if ((cflags & REG_NOSUB) != 0) options |= PCRE_NO_AUTO_CAPTURE;
|
||||
if ((cflags & REG_UTF8) != 0) options |= PCRE_UTF8;
|
||||
|
||||
preg->re_pcre = pcre_compile2(pattern, options, &errorcode, &errorptr,
|
||||
&erroffset, NULL);
|
||||
@@ -223,9 +229,13 @@ substring, so we have to get and release working store instead of just using
|
||||
the POSIX structures as was done in earlier releases when PCRE needed only 2
|
||||
ints. However, if the number of possible capturing brackets is small, use a
|
||||
block of store on the stack, to reduce the use of malloc/free. The threshold is
|
||||
in a macro that can be changed at configure time. */
|
||||
in a macro that can be changed at configure time.
|
||||
|
||||
PCRE_EXPORT int
|
||||
If REG_NOSUB was specified at compile time, the PCRE_NO_AUTO_CAPTURE flag will
|
||||
be set. When this is the case, the nmatch and pmatch arguments are ignored, and
|
||||
the only result is yes/no/error. */
|
||||
|
||||
PCRE_DATA_SCOPE int
|
||||
regexec(const regex_t *preg, const char *string, size_t nmatch,
|
||||
regmatch_t pmatch[], int eflags)
|
||||
{
|
||||
@@ -234,13 +244,20 @@ int options = 0;
|
||||
int *ovector = NULL;
|
||||
int small_ovector[POSIX_MALLOC_THRESHOLD * 3];
|
||||
BOOL allocated_ovector = FALSE;
|
||||
BOOL nosub =
|
||||
(((const pcre *)preg->re_pcre)->options & PCRE_NO_AUTO_CAPTURE) != 0;
|
||||
|
||||
if ((eflags & REG_NOTBOL) != 0) options |= PCRE_NOTBOL;
|
||||
if ((eflags & REG_NOTEOL) != 0) options |= PCRE_NOTEOL;
|
||||
|
||||
((regex_t *)preg)->re_erroffset = (size_t)(-1); /* Only has meaning after compile */
|
||||
|
||||
if (nmatch > 0)
|
||||
/* When no string data is being returned, ensure that nmatch is zero.
|
||||
Otherwise, ensure the vector for holding the return data is large enough. */
|
||||
|
||||
if (nosub) nmatch = 0;
|
||||
|
||||
else if (nmatch > 0)
|
||||
{
|
||||
if (nmatch <= POSIX_MALLOC_THRESHOLD)
|
||||
{
|
||||
@@ -248,6 +265,7 @@ if (nmatch > 0)
|
||||
}
|
||||
else
|
||||
{
|
||||
if (nmatch > INT_MAX/(sizeof(int) * 3)) return REG_ESPACE;
|
||||
ovector = (int *)malloc(sizeof(int) * nmatch * 3);
|
||||
if (ovector == NULL) return REG_ESPACE;
|
||||
allocated_ovector = TRUE;
|
||||
@@ -262,6 +280,8 @@ if (rc == 0) rc = nmatch; /* All captured slots were filled in */
|
||||
if (rc >= 0)
|
||||
{
|
||||
size_t i;
|
||||
if (!nosub)
|
||||
{
|
||||
for (i = 0; i < (size_t)rc; i++)
|
||||
{
|
||||
pmatch[i].rm_so = ovector[i*2];
|
||||
@@ -269,6 +289,7 @@ if (rc >= 0)
|
||||
}
|
||||
if (allocated_ovector) free(ovector);
|
||||
for (; i < nmatch; i++) pmatch[i].rm_so = pmatch[i].rm_eo = -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
Compatible Regular Expression library. It defines the things POSIX says should
|
||||
be there. I hope.
|
||||
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -50,22 +50,20 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* Options defined by POSIX. */
|
||||
/* Options, mostly defined by POSIX, but with a couple of extras. */
|
||||
|
||||
#define REG_ICASE 0x01
|
||||
#define REG_NEWLINE 0x02
|
||||
#define REG_NOTBOL 0x04
|
||||
#define REG_NOTEOL 0x08
|
||||
#define REG_ICASE 0x0001
|
||||
#define REG_NEWLINE 0x0002
|
||||
#define REG_NOTBOL 0x0004
|
||||
#define REG_NOTEOL 0x0008
|
||||
#define REG_DOTALL 0x0010 /* NOT defined by POSIX. */
|
||||
#define REG_NOSUB 0x0020
|
||||
#define REG_UTF8 0x0040 /* NOT defined by POSIX. */
|
||||
|
||||
/* Additional options, not defined by POSIX, but somebody wanted them. */
|
||||
|
||||
#define REG_DOTALL 0x10
|
||||
|
||||
/* These are not used by PCRE, but by defining them we make it easier
|
||||
/* This is not used by PCRE, but by defining it we make it easier
|
||||
to slot PCRE into existing programs that make POSIX calls. */
|
||||
|
||||
#define REG_EXTENDED 0
|
||||
#define REG_NOSUB 0
|
||||
|
||||
/* Error values. Not all these are relevant or used by the wrapper. */
|
||||
|
||||
@@ -107,12 +105,40 @@ typedef struct {
|
||||
regoff_t rm_eo;
|
||||
} regmatch_t;
|
||||
|
||||
/* Win32 uses DLL by default; it needs special stuff for exported functions
|
||||
when building PCRE. */
|
||||
|
||||
#ifndef PCRE_DATA_SCOPE
|
||||
#ifdef _WIN32
|
||||
# ifdef PCRE_DEFINITION
|
||||
# ifdef DLL_EXPORT
|
||||
# define PCRE_DATA_SCOPE __declspec(dllexport)
|
||||
# endif
|
||||
# else
|
||||
# ifndef PCRE_STATIC
|
||||
# define PCRE_DATA_SCOPE extern __declspec(dllimport)
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* Otherwise, we use the standard "extern". */
|
||||
|
||||
#ifndef PCRE_DATA_SCOPE
|
||||
# ifdef __cplusplus
|
||||
# define PCRE_DATA_SCOPE extern "C"
|
||||
# else
|
||||
# define PCRE_DATA_SCOPE extern
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* The functions */
|
||||
|
||||
extern int regcomp(regex_t *, const char *, int);
|
||||
extern int regexec(const regex_t *, const char *, size_t, regmatch_t *, int);
|
||||
extern size_t regerror(int, const regex_t *, char *, size_t);
|
||||
extern void regfree(regex_t *);
|
||||
PCRE_DATA_SCOPE int regcomp(regex_t *, const char *, int);
|
||||
PCRE_DATA_SCOPE int regexec(const regex_t *, const char *, size_t,
|
||||
regmatch_t *, int);
|
||||
PCRE_DATA_SCOPE size_t regerror(int, const regex_t *, char *, size_t);
|
||||
PCRE_DATA_SCOPE void regfree(regex_t *);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
|
||||
@@ -1,12 +1,14 @@
|
||||
/*************************************************
|
||||
* libucp - Unicode Property Table handler *
|
||||
* Unicode Property Table handler *
|
||||
*************************************************/
|
||||
|
||||
|
||||
#ifndef _UCP_H
|
||||
#define _UCP_H
|
||||
|
||||
/* These are the character categories that are returned by ucp_findchar */
|
||||
/* This file contains definitions of the property values that are returned by
|
||||
the function _pcre_ucp_findprop(). */
|
||||
|
||||
/* These are the general character categories. */
|
||||
|
||||
enum {
|
||||
ucp_C, /* Other */
|
||||
@@ -18,7 +20,7 @@ enum {
|
||||
ucp_Z /* Separator */
|
||||
};
|
||||
|
||||
/* These are the detailed character types that are returned by ucp_findchar */
|
||||
/* These are the particular character types. */
|
||||
|
||||
enum {
|
||||
ucp_Cc, /* Control */
|
||||
@@ -53,6 +55,72 @@ enum {
|
||||
ucp_Zs /* Space separator */
|
||||
};
|
||||
|
||||
/* These are the script identifications. */
|
||||
|
||||
enum {
|
||||
ucp_Arabic,
|
||||
ucp_Armenian,
|
||||
ucp_Bengali,
|
||||
ucp_Bopomofo,
|
||||
ucp_Braille,
|
||||
ucp_Buginese,
|
||||
ucp_Buhid,
|
||||
ucp_Canadian_Aboriginal,
|
||||
ucp_Cherokee,
|
||||
ucp_Common,
|
||||
ucp_Coptic,
|
||||
ucp_Cypriot,
|
||||
ucp_Cyrillic,
|
||||
ucp_Deseret,
|
||||
ucp_Devanagari,
|
||||
ucp_Ethiopic,
|
||||
ucp_Georgian,
|
||||
ucp_Glagolitic,
|
||||
ucp_Gothic,
|
||||
ucp_Greek,
|
||||
ucp_Gujarati,
|
||||
ucp_Gurmukhi,
|
||||
ucp_Han,
|
||||
ucp_Hangul,
|
||||
ucp_Hanunoo,
|
||||
ucp_Hebrew,
|
||||
ucp_Hiragana,
|
||||
ucp_Inherited,
|
||||
ucp_Kannada,
|
||||
ucp_Katakana,
|
||||
ucp_Kharoshthi,
|
||||
ucp_Khmer,
|
||||
ucp_Lao,
|
||||
ucp_Latin,
|
||||
ucp_Limbu,
|
||||
ucp_Linear_B,
|
||||
ucp_Malayalam,
|
||||
ucp_Mongolian,
|
||||
ucp_Myanmar,
|
||||
ucp_New_Tai_Lue,
|
||||
ucp_Ogham,
|
||||
ucp_Old_Italic,
|
||||
ucp_Old_Persian,
|
||||
ucp_Oriya,
|
||||
ucp_Osmanya,
|
||||
ucp_Runic,
|
||||
ucp_Shavian,
|
||||
ucp_Sinhala,
|
||||
ucp_Syloti_Nagri,
|
||||
ucp_Syriac,
|
||||
ucp_Tagalog,
|
||||
ucp_Tagbanwa,
|
||||
ucp_Tai_Le,
|
||||
ucp_Tamil,
|
||||
ucp_Telugu,
|
||||
ucp_Thaana,
|
||||
ucp_Thai,
|
||||
ucp_Tibetan,
|
||||
ucp_Tifinagh,
|
||||
ucp_Ugaritic,
|
||||
ucp_Yi
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
/* End of ucp.h */
|
||||
|
||||
Reference in New Issue
Block a user