From 2f750998efaf8183b86bb4cea64d8dc7ffb91fbe Mon Sep 17 00:00:00 2001 From: dmiller Date: Tue, 15 Apr 2025 15:08:06 +0000 Subject: [PATCH] Update libpcre to 10.45 --- CHANGELOG | 3 +- libpcre/AUTHORS | 36 - libpcre/AUTHORS.md | 200 + libpcre/BUILD.bazel | 172 + libpcre/CMakeLists.txt | 1541 ++- libpcre/{LICENCE => LICENCE.md} | 75 +- libpcre/MODULE.bazel | 9 + libpcre/Makefile.am | 51 +- libpcre/Makefile.in | 220 +- libpcre/NMAP_MODIFICATIONS | 14 +- libpcre/SECURITY.md | 44 + libpcre/WORKSPACE.bazel | 1 + libpcre/aclocal.m4 | 381 + libpcre/build.zig | 173 + libpcre/cmake/COPYING-CMAKE-SCRIPTS | 2 +- .../cmake/FindPackageHandleStandardArgs.cmake | 58 - libpcre/cmake/pcre2-config-version.cmake.in | 3 +- libpcre/cmake/pcre2-config.cmake.in | 118 +- libpcre/config-cmake.h.in | 14 +- libpcre/config.guess | 16 +- libpcre/config.sub | 20 +- libpcre/configure | 2176 ++++- libpcre/configure.ac | 464 +- libpcre/ltmain.sh | 888 +- libpcre/m4/libtool.m4 | 251 +- libpcre/m4/ltoptions.m4 | 4 +- libpcre/m4/ltsugar.m4 | 2 +- libpcre/m4/ltversion.m4 | 13 +- libpcre/m4/lt~obsolete.m4 | 4 +- libpcre/m4/pcre2_visibility.m4 | 93 +- libpcre/src/config.h.generic | 54 +- libpcre/src/config.h.in | 94 +- libpcre/src/pcre2.h.generic | 72 +- libpcre/src/pcre2.h.in | 68 +- libpcre/src/pcre2_auto_possess.c | 143 +- libpcre/src/pcre2_chkdint.c | 6 +- libpcre/src/pcre2_compile.c | 3195 +++--- libpcre/src/pcre2_compile.h | 280 + libpcre/src/pcre2_compile_class.c | 2737 ++++++ libpcre/src/pcre2_config.c | 4 +- libpcre/src/pcre2_context.c | 66 +- libpcre/src/pcre2_convert.c | 18 +- libpcre/src/pcre2_dfa_match.c | 107 +- libpcre/src/pcre2_error.c | 33 +- libpcre/src/pcre2_extuni.c | 30 +- libpcre/src/pcre2_find_bracket.c | 15 +- libpcre/src/pcre2_internal.h | 363 +- libpcre/src/pcre2_intmodedep.h | 66 +- libpcre/src/pcre2_jit_compile.c | 3678 +++---- libpcre/src/pcre2_maketables.c | 10 +- libpcre/src/pcre2_match.c | 617 +- libpcre/src/pcre2_match_data.c | 8 +- libpcre/src/pcre2_ord2utf.c | 2 +- libpcre/src/pcre2_pattern_info.c | 14 +- libpcre/src/pcre2_serialize.c | 36 +- libpcre/src/pcre2_study.c | 238 +- libpcre/src/pcre2_substitute.c | 1028 +- libpcre/src/pcre2_substring.c | 4 +- libpcre/src/pcre2_tables.c | 12 +- libpcre/src/pcre2_ucd.c | 8658 +++++++++-------- libpcre/src/pcre2_ucp.h | 62 +- libpcre/src/pcre2_ucptables.c | 777 +- libpcre/src/pcre2_util.h | 132 + libpcre/src/pcre2_xclass.c | 395 +- libpcre/src/pcre2posix.c | 14 +- libpcre/src/pcre2posix_test.c | 209 - libpcre/vms/configure.com | 1152 +++ libpcre/vms/openvms_readme.txt | 20 + libpcre/vms/pcre2.h_patch | 12 + libpcre/vms/stdint.h | 9 + 70 files changed, 20671 insertions(+), 10813 deletions(-) delete mode 100644 libpcre/AUTHORS create mode 100644 libpcre/AUTHORS.md create mode 100644 libpcre/BUILD.bazel rename libpcre/{LICENCE => LICENCE.md} (55%) create mode 100644 libpcre/MODULE.bazel create mode 100644 libpcre/SECURITY.md create mode 100644 libpcre/WORKSPACE.bazel create mode 100644 libpcre/build.zig delete mode 100644 libpcre/cmake/FindPackageHandleStandardArgs.cmake mode change 100644 => 100755 libpcre/ltmain.sh create mode 100644 libpcre/src/pcre2_compile.h create mode 100644 libpcre/src/pcre2_compile_class.c create mode 100644 libpcre/src/pcre2_util.h delete mode 100644 libpcre/src/pcre2posix_test.c create mode 100644 libpcre/vms/configure.com create mode 100644 libpcre/vms/openvms_readme.txt create mode 100644 libpcre/vms/pcre2.h_patch create mode 100644 libpcre/vms/stdint.h diff --git a/CHANGELOG b/CHANGELOG index 2ecce0817..df5fa0a68 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,7 @@ #Nmap Changelog ($Id$); -*-text-*- -o Upgraded included libraries: Lua 5.4.7, libssh2 1.11.1, libpcap 1.10.5 +o Upgraded included libraries: Lua 5.4.7, libssh2 1.11.1, libpcap 1.10.5, + libpcre 10.45 o [NSE] New script targets-ipv6-eui64 generates target IPv6 addresses from a user-provided file of MAC addresses, using the EUI-64 method. [Daniel Miller] diff --git a/libpcre/AUTHORS b/libpcre/AUTHORS deleted file mode 100644 index 9669f7755..000000000 --- a/libpcre/AUTHORS +++ /dev/null @@ -1,36 +0,0 @@ -THE MAIN PCRE2 LIBRARY CODE ---------------------------- - -Written by: Philip Hazel -Email local part: Philip.Hazel -Email domain: gmail.com - -Retired from University of Cambridge Computing Service, -Cambridge, England. - -Copyright (c) 1997-2024 University of Cambridge -All rights reserved - - -PCRE2 JUST-IN-TIME COMPILATION SUPPORT --------------------------------------- - -Written by: Zoltan Herczeg -Email local part: hzmester -Emain domain: freemail.hu - -Copyright(c) 2010-2024 Zoltan Herczeg -All rights reserved. - - -STACK-LESS JUST-IN-TIME COMPILER --------------------------------- - -Written by: Zoltan Herczeg -Email local part: hzmester -Emain domain: freemail.hu - -Copyright(c) 2009-2024 Zoltan Herczeg -All rights reserved. - -#### diff --git a/libpcre/AUTHORS.md b/libpcre/AUTHORS.md new file mode 100644 index 000000000..708fc2325 --- /dev/null +++ b/libpcre/AUTHORS.md @@ -0,0 +1,200 @@ +PCRE2 Authorship and Contributors +================================= + +COPYRIGHT +--------- + +Please see the file [LICENCE](./LICENCE.md) in the PCRE2 distribution for +copyright details. + + +MAINTAINERS +----------- + +The PCRE and PCRE2 libraries were authored and maintained by Philip Hazel. + +Since 2024, the contributors with administrator access to the project are now +Nicholas Wilson and Zoltán Herczeg. See the file [SECURITY](./SECURITY.md) for +GPG keys. + +Both administrators are volunteers acting in a personal capacity. + + + + + + + + + + + + + + + + + + +
NameRole
+ + Nicholas Wilson
+ `nicholas@nicholaswilson.me.uk`
+ Currently of Microsoft Research Cambridge, UK + +
+ + * General project administration & maintenance + * Release management + * Code maintenance + +
+ + Zoltán Herczeg
+ `hzmester@freemail.hu`
+ Currently of the University of Szeged, Hungary + +
+ + * Code maintenance + * Ownership of `sljit` and PCRE2's JIT + +
+ + +CONTRIBUTORS +------------ + +Many others have participated and contributed to PCRE2 over its history. + +The maintainers are grateful for all contributions and participation over the +years. We apologise for any names we have forgotten. + +We are especially grateful to Philip Hazel, creator of PCRE and PCRE2, and +maintainer from 1997 to 2024. + +All names listed alphabetically. + +### Contributors to PCRE2 + +This list includes names up until the PCRE2 10.44 release. New names will be +added from the Git history on each release. + + Scott Bell + Carlo Marcelo Arenas Belón + Edward Betts + Jan-Willem Blokland + Ross Burton + Dmitry Cherniachenko + Alexey Chupahin + Jessica Clarke + Alejandro Colomar + Jeremie Courreges-Anglas + Addison Crump + Alex Dowad + Daniel Engberg + Daniel Richard G + David Gaussmann + Andrey Gorbachev + Jordan Griege + Jason Hood + Bumsu Hyeon + Roy Ivy + Martin Joerg + Guillem Jover + Ralf Junker + Ayesh Karunaratne + Michael Kaufmann + Yunho Kim + Joshua Kinard + David Korczynski + Uwe Korn + Jonas Kvinge + Kristian Larsson + Kai Lu + Behzod Mansurov + B. Scott Michel + Nathan Moinvaziri + Mike Munday + Marc Mutz + Fabio Pagani + Christian Persch + Tristan Ross + William A Rowe Jr + David Seifert + Yaakov Selkowitz + Rich Siegel + Karl Skomski + Maciej Sroczyński + Wolfgang Stöggl + Thomas Tempelmann + Greg Thain + Lucas Trzesniewski + Theodore Tsirpanis + Matthew Vernon + Rémi Verschelde + Thomas Voss + Ezekiel Warren + Carl Weaver + Chris Wilson + Amin Yahyaabadi + Joe Zhang + +### Contributors to PCRE1 + +These people contributed either by sending patches or reporting serious issues. + + Irfan Adilovic + Alexander Barkov + Daniel Bergström + David Burgess + Ross Burton + David Byron + Fred Cox + Christian Ehrlicher + Tom Fortmann + Lionel Fourquaux + Mike Frysinger + Daniel Richard G + Dair Gran + "Graycode" (Red Hat Product Security) + Viktor Griph + Wen Guanxing + Robin Houston + Martin Jerabek + Peter Kankowski + Stephen Kelly + Yunho Kim + Joshua Kinard + Carsten Klein + Evgeny Kotkov + Ronald Landheer-Cieslak + Alan Lehotsky + Dmitry V. Levin + Nuno Lopes + Kai Lu + Giuseppe Maxia + Dan Mooney + Marc Mutz + Markus Oberhumer + Sheri Pierce + Petr Pisar + Ari Pollak + Bob Rossi + Ruiger Rill + Michael Shigorin + Rich Siegel + Craig Silverstein (C++ wrapper) + Karl Skomski + Paul Sokolovsky + Stan Switzer + Ian Taylor + Mark Tetrode + Jeff Trawick + Steven Van Ingelgem + Lawrence Velazquez + Jiong Wang + Stefan Weber + Chris Wilson + +Thanks go to Jeffrey Friedl for testing and debugging assistance. diff --git a/libpcre/BUILD.bazel b/libpcre/BUILD.bazel new file mode 100644 index 000000000..c975eadd7 --- /dev/null +++ b/libpcre/BUILD.bazel @@ -0,0 +1,172 @@ +load("@bazel_skylib//rules:copy_file.bzl", "copy_file") +load("@bazel_skylib//rules:native_binary.bzl", "native_test") +load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library") + +copy_file( + name = "config_h_generic", + src = "src/config.h.generic", + out = "src/config.h", +) + +copy_file( + name = "pcre2_h_generic", + src = "src/pcre2.h.generic", + out = "src/pcre2.h", +) + +copy_file( + name = "pcre2_chartables_c", + src = "src/pcre2_chartables.c.dist", + out = "src/pcre2_chartables.c", +) + +# Removed src/pcre2_ucptables.c below because it is #included in +# src/pcre2_tables.c. Also fixed typo: ckdint should be chkdint. +# PH, 22-March-2023. +cc_library( + name = "pcre2", + srcs = [ + "src/pcre2_auto_possess.c", + "src/pcre2_chkdint.c", + "src/pcre2_compile.c", + "src/pcre2_compile_class.c", + "src/pcre2_config.c", + "src/pcre2_context.c", + "src/pcre2_convert.c", + "src/pcre2_dfa_match.c", + "src/pcre2_error.c", + "src/pcre2_extuni.c", + "src/pcre2_find_bracket.c", + "src/pcre2_jit_compile.c", + "src/pcre2_maketables.c", + "src/pcre2_match.c", + "src/pcre2_match_data.c", + "src/pcre2_newline.c", + "src/pcre2_ord2utf.c", + "src/pcre2_pattern_info.c", + "src/pcre2_script_run.c", + "src/pcre2_serialize.c", + "src/pcre2_string_utils.c", + "src/pcre2_study.c", + "src/pcre2_substitute.c", + "src/pcre2_substring.c", + "src/pcre2_tables.c", + "src/pcre2_ucd.c", + "src/pcre2_valid_utf.c", + "src/pcre2_xclass.c", + ":pcre2_chartables_c", + "src/pcre2_compile.h", + "src/pcre2_internal.h", + "src/pcre2_intmodedep.h", + "src/pcre2_ucp.h", + "src/pcre2_util.h", + ":config_h_generic", + ], + textual_hdrs = [ + "src/pcre2_jit_match.c", + "src/pcre2_jit_misc.c", + "src/pcre2_ucptables.c", + ], + hdrs = [ + ":pcre2_h_generic", + ], + local_defines = [ + "HAVE_CONFIG_H", + "HAVE_MEMMOVE", + "PCRE2_CODE_UNIT_WIDTH=8", + "PCRE2_STATIC", + "SUPPORT_UNICODE", + ], + includes = ["src"], + strip_include_prefix = "src", + visibility = ["//visibility:public"], +) + +cc_library( + name = "pcre2-posix", + srcs = [ + "src/pcre2posix.c", + ":config_h_generic", + ], + hdrs = [ + "src/pcre2posix.h", + ], + local_defines = [ + "HAVE_CONFIG_H", + "HAVE_MEMMOVE", + "PCRE2_CODE_UNIT_WIDTH=8", + "PCRE2_STATIC", + "SUPPORT_UNICODE", + ], + includes = ["src"], + strip_include_prefix = "src", + visibility = ["//visibility:public"], + deps = [":pcre2"], +) + +# Totally weird issue in Bazel. It won't let you #include any files unless they +# are declared to the build system. OK, fair enough. But - for a cc_binary it +# uses the file extension to determine whether it's a header or a compilation +# unit. But... we have several .c files which are #included, rather than treated +# as a compilation unit. +# +# For cc_library() above, we can overcome this with textual_hdrs. But that +# doesn't work for cc_binary(). Here's our workaround. +# +# https://github.com/bazelbuild/bazel/issues/680 +cc_library( + name = "pcre2test_dotc_headers", + hdrs = [ + "src/pcre2_chkdint.c", + "src/pcre2_printint.c", + "src/pcre2_tables.c", + "src/pcre2_ucd.c", + "src/pcre2_valid_utf.c", + ], + strip_include_prefix = "src", + visibility = ["//visibility:private"], +) + +cc_binary( + name = "pcre2test", + srcs = [ + "src/pcre2test.c", + ":config_h_generic", + ], + local_defines = [ + "HAVE_CONFIG_H", + "HAVE_MEMMOVE", + "HAVE_STRERROR", + "PCRE2_STATIC", + "SUPPORT_UNICODE", + "SUPPORT_PCRE2_8", + ] + select({ + "@platforms//os:windows": [], + "//conditions:default": ["HAVE_UNISTD_H"], + }), + linkopts = select({ + "@platforms//os:windows": ["-STACK:2500000"], + "//conditions:default": [], + }), + visibility = ["//visibility:public"], + deps = [":pcre2test_dotc_headers", ":pcre2", ":pcre2-posix"], +) + +filegroup( + name = "testdata", + srcs = glob(["testdata/*"]), +) + +native_test( + name = "pcre2_test", + src = select({ + "@platforms//os:windows": "RunTest.bat", + "//conditions:default": "RunTest", + }), + out = select({ + "@platforms//os:windows": "RunTest.bat", + "//conditions:default": "RunTest", + }), + data = [":pcre2test", ":testdata"], + size = "small", +) \ No newline at end of file diff --git a/libpcre/CMakeLists.txt b/libpcre/CMakeLists.txt index 53e903d63..ca640004b 100644 --- a/libpcre/CMakeLists.txt +++ b/libpcre/CMakeLists.txt @@ -103,9 +103,16 @@ # 2023-01-15 Carlo added C99 as the minimum required # 2023-08-06 PH added support for setting variable length lookbehind maximum -# Increased minimum to 3.5 to workaround deprecated backward compatibility -# since 3.27. -cmake_minimum_required(VERSION 3.5 FATAL_ERROR) +################################################################################ +# We have used `gersemi` for auto-formatting our CMake files. +# Applied to all CMake files using: +# > pip3 install gersemi +# > gersemi --in-place --line-length 120 --indent 2 \ +# ./CMakeLists.txt ./cmake/*.cmake ./cmake/*.cmake.in +################################################################################ + +# Increased minimum to 3.15 to allow use of string(REPEAT). +cmake_minimum_required(VERSION 3.15 FATAL_ERROR) project(PCRE2 C) set(CMAKE_C_STANDARD 99) set(CMAKE_C_STANDARD_REQUIRED TRUE) @@ -126,325 +133,551 @@ cmake_policy(SET CMP0063 NEW) # on the command line. # SET(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) -LIST(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) +list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) -INCLUDE_DIRECTORIES(${PROJECT_SOURCE_DIR}/src) +include_directories(${PROJECT_SOURCE_DIR}/src) + +# external packages +find_package(BZip2) +find_package(ZLIB) +find_package(Readline) +find_package(Editline) # Configuration checks -INCLUDE(CheckCSourceCompiles) -INCLUDE(CheckFunctionExists) -INCLUDE(CheckSymbolExists) -INCLUDE(CheckIncludeFile) -INCLUDE(CheckTypeSize) -INCLUDE(GNUInstallDirs) # for CMAKE_INSTALL_LIBDIR +include(CheckCSourceCompiles) +include(CheckFunctionExists) +include(CheckSymbolExists) +include(CheckIncludeFile) +include(CheckTypeSize) +include(GNUInstallDirs) # for CMAKE_INSTALL_LIBDIR -CHECK_INCLUDE_FILE(dirent.h HAVE_DIRENT_H) -CHECK_INCLUDE_FILE(sys/stat.h HAVE_SYS_STAT_H) -CHECK_INCLUDE_FILE(sys/types.h HAVE_SYS_TYPES_H) -CHECK_INCLUDE_FILE(unistd.h HAVE_UNISTD_H) -CHECK_INCLUDE_FILE(windows.h HAVE_WINDOWS_H) +check_include_file(assert.h HAVE_ASSERT_H) +check_include_file(dirent.h HAVE_DIRENT_H) +check_include_file(sys/stat.h HAVE_SYS_STAT_H) +check_include_file(sys/types.h HAVE_SYS_TYPES_H) +check_include_file(unistd.h HAVE_UNISTD_H) +check_include_file(windows.h HAVE_WINDOWS_H) -CHECK_SYMBOL_EXISTS(bcopy "strings.h" HAVE_BCOPY) -CHECK_SYMBOL_EXISTS(memfd_create "sys/mman.h" HAVE_MEMFD_CREATE) -CHECK_SYMBOL_EXISTS(memmove "string.h" HAVE_MEMMOVE) -CHECK_SYMBOL_EXISTS(secure_getenv "stdlib.h" HAVE_SECURE_GETENV) -CHECK_SYMBOL_EXISTS(strerror "string.h" HAVE_STRERROR) +check_symbol_exists(bcopy "strings.h" HAVE_BCOPY) +check_symbol_exists(memfd_create "sys/mman.h" HAVE_MEMFD_CREATE) +check_symbol_exists(memmove "string.h" HAVE_MEMMOVE) +check_symbol_exists(secure_getenv "stdlib.h" HAVE_SECURE_GETENV) +check_symbol_exists(strerror "string.h" HAVE_STRERROR) -CHECK_C_SOURCE_COMPILES( - "#include - #include - int main(int c, char *v[]) { char buf[PATH_MAX]; realpath(v[c], buf); return 0; }" +check_c_source_compiles( + [=[ + #include + #include + int main(int c, char *v[]) { char buf[PATH_MAX]; realpath(v[c], buf); return 0; } + ]=] HAVE_REALPATH ) set(ORIG_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS}) -set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror") +if(NOT MSVC AND NOT CMAKE_C_COMPILER_ID STREQUAL "XL") + set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror") +endif() -CHECK_C_SOURCE_COMPILES( - "#include - int main(void) { int a,b; size_t m; __builtin_mul_overflow(a,b,&m); return 0; }" - HAVE_BUILTIN_MUL_OVERFLOW -) - -CHECK_C_SOURCE_COMPILES( +check_c_source_compiles( "int main(void) { char buf[128] __attribute__((uninitialized)); (void)buf; return 0; }" HAVE_ATTRIBUTE_UNINITIALIZED ) -CHECK_C_SOURCE_COMPILES([=[ +check_c_source_compiles( + [=[ extern __attribute__ ((visibility ("default"))) int f(void); int main(void) { return f(); } int f(void) { return 42; } - ]=] HAVE_VISIBILITY + ]=] + HAVE_VISIBILITY ) -if (HAVE_VISIBILITY) +set(CMAKE_REQUIRED_FLAGS ${ORIG_CMAKE_REQUIRED_FLAGS}) + +check_c_source_compiles("int main(void) { __assume(1); return 0; }" HAVE_BUILTIN_ASSUME) + +check_c_source_compiles( + [=[ + #include + int main(void) { int a,b; size_t m; __builtin_mul_overflow(a,b,&m); return 0; } + ]=] + HAVE_BUILTIN_MUL_OVERFLOW +) + +check_c_source_compiles( + "int main(int c, char *v[]) { if (c) __builtin_unreachable(); return (int)(*v[0]); }" + HAVE_BUILTIN_UNREACHABLE +) + +if(HAVE_VISIBILITY) set(PCRE2_EXPORT [=[__attribute__ ((visibility ("default")))]=]) else() set(PCRE2_EXPORT) endif() -set(CMAKE_REQUIRED_FLAGS ${ORIG_CMAKE_REQUIRED_FLAGS}) - # Check whether Intel CET is enabled, and if so, adjust compiler flags. This # code was written by PH, trying to imitate the logic from the autotools # configuration. -CHECK_C_SOURCE_COMPILES( - "#ifndef __CET__ - #error CET is not enabled - #endif - int main() { return 0; }" +check_c_source_compiles( + [=[ + #ifndef __CET__ + #error CET is not enabled + #endif + int main() { return 0; } + ]=] INTEL_CET_ENABLED ) -IF (INTEL_CET_ENABLED) - SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mshstk") -ENDIF(INTEL_CET_ENABLED) +if(INTEL_CET_ENABLED) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mshstk") +endif() # User-configurable options # # Note: CMakeSetup displays these in alphabetical order, regardless of # the order we use here. -SET(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libraries.") +set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libraries.") -OPTION(BUILD_STATIC_LIBS "Build static libraries." ON) +option(BUILD_STATIC_LIBS "Build static libraries." ON) -OPTION(PCRE2_BUILD_PCRE2_8 "Build 8 bit PCRE2 library" ON) +option(PCRE2_BUILD_PCRE2_8 "Build 8 bit PCRE2 library" ON) -OPTION(PCRE2_BUILD_PCRE2_16 "Build 16 bit PCRE2 library" OFF) +option(PCRE2_BUILD_PCRE2_16 "Build 16 bit PCRE2 library" OFF) -OPTION(PCRE2_BUILD_PCRE2_32 "Build 32 bit PCRE2 library" OFF) +option(PCRE2_BUILD_PCRE2_32 "Build 32 bit PCRE2 library" OFF) -OPTION(PCRE2_STATIC_PIC "Build the static library with the option position independent code enabled." OFF) +option(PCRE2_STATIC_PIC "Build the static library with the option position independent code enabled." OFF) -OPTION(PCRE2_DEBUG "Include debugging code" OFF) +set(PCRE2_DEBUG "IfDebugBuild" CACHE STRING "Include debugging code") +set_property(CACHE PCRE2_DEBUG PROPERTY STRINGS "IfDebugBuild" "ON" "OFF") -OPTION(PCRE2_DISABLE_PERCENT_ZT "Disable the use of %zu and %td (rarely needed)" OFF) +option(PCRE2_DISABLE_PERCENT_ZT "Disable the use of %zu and %td (rarely needed)" OFF) -SET(PCRE2_EBCDIC OFF CACHE BOOL - "Use EBCDIC coding instead of ASCII. (This is rarely used outside of mainframe systems.)") +set( + PCRE2_EBCDIC + OFF + CACHE BOOL + "Use EBCDIC coding instead of ASCII. (This is rarely used outside of mainframe systems.)" +) -SET(PCRE2_EBCDIC_NL25 OFF CACHE BOOL - "Use 0x25 as EBCDIC NL character instead of 0x15; implies EBCDIC.") +set(PCRE2_EBCDIC_NL25 OFF CACHE BOOL "Use 0x25 as EBCDIC NL character instead of 0x15; implies EBCDIC.") -SET(PCRE2_LINK_SIZE "2" CACHE STRING - "Internal link size (2, 3 or 4 allowed). See LINK_SIZE in config.h.in for details.") +set( + PCRE2_LINK_SIZE + "2" + CACHE STRING + "Internal link size (2, 3 or 4 allowed). See LINK_SIZE in config.h.in for details." +) -SET(PCRE2_PARENS_NEST_LIMIT "250" CACHE STRING - "Default nested parentheses limit. See PARENS_NEST_LIMIT in config.h.in for details.") +set( + PCRE2_PARENS_NEST_LIMIT + "250" + CACHE STRING + "Default nested parentheses limit. See PARENS_NEST_LIMIT in config.h.in for details." +) -SET(PCRE2_HEAP_LIMIT "20000000" CACHE STRING - "Default limit on heap memory (kibibytes). See HEAP_LIMIT in config.h.in for details.") +set( + PCRE2_HEAP_LIMIT + "20000000" + CACHE STRING + "Default limit on heap memory (kibibytes). See HEAP_LIMIT in config.h.in for details." +) -SET(PCRE2_MAX_VARLOOKBEHIND "255" CACHE STRING - "Default limit on variable lookbehinds.") +set(PCRE2_MAX_VARLOOKBEHIND "255" CACHE STRING "Default limit on variable lookbehinds.") -SET(PCRE2_MATCH_LIMIT "10000000" CACHE STRING - "Default limit on internal looping. See MATCH_LIMIT in config.h.in for details.") +set( + PCRE2_MATCH_LIMIT + "10000000" + CACHE STRING + "Default limit on internal looping. See MATCH_LIMIT in config.h.in for details." +) -SET(PCRE2_MATCH_LIMIT_DEPTH "MATCH_LIMIT" CACHE STRING - "Default limit on internal depth of search. See MATCH_LIMIT_DEPTH in config.h.in for details.") +set( + PCRE2_MATCH_LIMIT_DEPTH + "MATCH_LIMIT" + CACHE STRING + "Default limit on internal depth of search. See MATCH_LIMIT_DEPTH in config.h.in for details." +) -SET(PCRE2_NEWLINE "LF" CACHE STRING - "What to recognize as a newline (one of CR, LF, CRLF, ANY, ANYCRLF, NUL).") +set( + PCRE2GREP_BUFSIZE + "20480" + CACHE STRING + "Buffer starting size parameter for pcre2grep. See PCRE2GREP_BUFSIZE in config.h.in for details." +) -SET(PCRE2_HEAP_MATCH_RECURSE OFF CACHE BOOL - "Obsolete option: do not use") +set( + PCRE2GREP_MAX_BUFSIZE + "1048576" + CACHE STRING + "Buffer maximum size parameter for pcre2grep. See PCRE2GREP_MAX_BUFSIZE in config.h.in for details." +) -SET(PCRE2_SUPPORT_UNICODE ON CACHE BOOL - "Enable support for Unicode and UTF-8/UTF-16/UTF-32 encoding.") +set(PCRE2_NEWLINE "LF" CACHE STRING "What to recognize as a newline (one of CR, LF, CRLF, ANY, ANYCRLF, NUL).") -SET(PCRE2_SUPPORT_BSR_ANYCRLF OFF CACHE BOOL - "ON=Backslash-R matches only LF CR and CRLF, OFF=Backslash-R matches all Unicode Linebreaks") +set(PCRE2_HEAP_MATCH_RECURSE OFF CACHE BOOL "Obsolete option: do not use") -SET(PCRE2_NEVER_BACKSLASH_C OFF CACHE BOOL - "If ON, backslash-C (upper case C) is locked out.") +set(PCRE2_SUPPORT_JIT OFF CACHE BOOL "Enable support for Just-in-time compiling.") -SET(PCRE2_SUPPORT_VALGRIND OFF CACHE BOOL - "Enable Valgrind support.") +if(${CMAKE_SYSTEM_NAME} MATCHES Linux|NetBSD) + set(PCRE2_SUPPORT_JIT_SEALLOC OFF CACHE BOOL "Enable SELinux compatible execmem allocator in JIT (experimental).") +else() + set(PCRE2_SUPPORT_JIT_SEALLOC IGNORE) +endif() -OPTION(PCRE2_SHOW_REPORT "Show the final configuration report" ON) +set(PCRE2GREP_SUPPORT_JIT ON CACHE BOOL "Enable use of Just-in-time compiling in pcre2grep.") -IF (MINGW) - OPTION(NON_STANDARD_LIB_PREFIX - "ON=Shared libraries built in mingw will be named pcre2.dll, etc., instead of libpcre2.dll, etc." - OFF) +set(PCRE2GREP_SUPPORT_CALLOUT ON CACHE BOOL "Enable callout string support in pcre2grep.") - OPTION(NON_STANDARD_LIB_SUFFIX - "ON=Shared libraries built in mingw will be named libpcre2-0.dll, etc., instead of libpcre2.dll, etc." - OFF) -ENDIF(MINGW) +set(PCRE2GREP_SUPPORT_CALLOUT_FORK ON CACHE BOOL "Enable callout string fork support in pcre2grep.") -IF(MSVC) - OPTION(PCRE2_STATIC_RUNTIME - "ON=Compile against the static runtime (/MT)." - OFF) - OPTION(INSTALL_MSVC_PDB - "ON=Install .pdb files built by MSVC, if generated" - OFF) -ENDIF(MSVC) +set(PCRE2_SUPPORT_UNICODE OFF CACHE BOOL "Enable support for Unicode and UTF-8/UTF-16/UTF-32 encoding.") + +set( + PCRE2_SUPPORT_BSR_ANYCRLF + OFF + CACHE BOOL + "ON=Backslash-R matches only LF CR and CRLF, OFF=Backslash-R matches all Unicode Linebreaks" +) + +set(PCRE2_NEVER_BACKSLASH_C OFF CACHE BOOL "If ON, backslash-C (upper case C) is locked out.") + +set(PCRE2_SUPPORT_VALGRIND OFF CACHE BOOL "Enable Valgrind support.") + +option(PCRE2_SHOW_REPORT "Show the final configuration report" ON) +option(PCRE2_BUILD_PCRE2GREP "Build pcre2grep" OFF) +option(PCRE2_BUILD_TESTS "Build the tests" OFF) + +set( + PCRE2_INSTALL_CMAKEDIR + "${CMAKE_INSTALL_LIBDIR}/cmake/pcre2" + CACHE STRING + "Path used during CMake install for placing PCRE2's CMake config files, relative to the installation root (prefix)" +) + +if(MINGW) + option( + NON_STANDARD_LIB_PREFIX + "ON=Shared libraries built in mingw will be named pcre2.dll, etc., instead of libpcre2.dll, etc." + OFF + ) + + option( + NON_STANDARD_LIB_SUFFIX + "ON=Shared libraries built in mingw will be named libpcre2-0.dll, etc., instead of libpcre2.dll, etc." + OFF + ) +endif() + +if(MSVC) + option(PCRE2_STATIC_RUNTIME "ON=Compile against the static runtime (/MT)." OFF) + option(INSTALL_MSVC_PDB "ON=Install .pdb files built by MSVC, if generated" OFF) +endif() + +# bzip2 lib +if(BZIP2_FOUND) + option(PCRE2_SUPPORT_LIBBZ2 "Enable support for linking pcre2grep with libbz2." ON) +endif() +if(PCRE2_SUPPORT_LIBBZ2) + include_directories(${BZIP2_INCLUDE_DIR}) +endif() + +# zlib +if(ZLIB_FOUND) + option(PCRE2_SUPPORT_LIBZ "Enable support for linking pcre2grep with libz." ON) +endif() +if(PCRE2_SUPPORT_LIBZ) + include_directories(${ZLIB_INCLUDE_DIR}) +endif() + +# editline lib +if(EDITLINE_FOUND) + option(PCRE2_SUPPORT_LIBEDIT "Enable support for linking pcre2test with libedit." OFF) +endif() +if(EDITLINE_FOUND) + if(PCRE2_SUPPORT_LIBEDIT) + include_directories(${EDITLINE_INCLUDE_DIR}) + endif() +else() + if(PCRE2_SUPPORT_LIBEDIT) + message( + FATAL_ERROR + " libedit not found, set EDITLINE_INCLUDE_DIR to a compatible header\n" + " or set Editline_ROOT to a full libedit installed tree, as needed\n" + " Might need to enable policy CMP0074 in CMakeLists.txt" + ) + endif() +endif() + +# readline lib +if(READLINE_FOUND) + option(PCRE2_SUPPORT_LIBREADLINE "Enable support for linking pcre2test with libreadline." ON) +endif() +if(PCRE2_SUPPORT_LIBREADLINE) + include_directories(${READLINE_INCLUDE_DIR}) +endif() # Prepare build configuration -IF(NOT BUILD_SHARED_LIBS AND NOT BUILD_STATIC_LIBS) - MESSAGE(FATAL_ERROR "At least one of BUILD_SHARED_LIBS or BUILD_STATIC_LIBS must be enabled.") -ENDIF(NOT BUILD_SHARED_LIBS AND NOT BUILD_STATIC_LIBS) +if(NOT BUILD_SHARED_LIBS AND NOT BUILD_STATIC_LIBS) + message(FATAL_ERROR "At least one of BUILD_SHARED_LIBS or BUILD_STATIC_LIBS must be enabled.") +endif() -IF(NOT PCRE2_BUILD_PCRE2_8 AND NOT PCRE2_BUILD_PCRE2_16 AND NOT PCRE2_BUILD_PCRE2_32) - MESSAGE(FATAL_ERROR "At least one of PCRE2_BUILD_PCRE2_8, PCRE2_BUILD_PCRE2_16 or PCRE2_BUILD_PCRE2_32 must be enabled") -ENDIF(NOT PCRE2_BUILD_PCRE2_8 AND NOT PCRE2_BUILD_PCRE2_16 AND NOT PCRE2_BUILD_PCRE2_32) +if(NOT PCRE2_BUILD_PCRE2_8 AND NOT PCRE2_BUILD_PCRE2_16 AND NOT PCRE2_BUILD_PCRE2_32) + message( + FATAL_ERROR + "At least one of PCRE2_BUILD_PCRE2_8, PCRE2_BUILD_PCRE2_16 or PCRE2_BUILD_PCRE2_32 must be enabled" + ) +endif() -IF(PCRE2_BUILD_PCRE2_8) - SET(SUPPORT_PCRE2_8 1) -ENDIF(PCRE2_BUILD_PCRE2_8) +if(PCRE2_BUILD_PCRE2_8) + set(SUPPORT_PCRE2_8 1) +endif() -IF(PCRE2_BUILD_PCRE2_16) - SET(SUPPORT_PCRE2_16 1) -ENDIF(PCRE2_BUILD_PCRE2_16) +if(PCRE2_BUILD_PCRE2_16) + set(SUPPORT_PCRE2_16 1) +endif() -IF(PCRE2_BUILD_PCRE2_32) - SET(SUPPORT_PCRE2_32 1) -ENDIF(PCRE2_BUILD_PCRE2_32) +if(PCRE2_BUILD_PCRE2_32) + set(SUPPORT_PCRE2_32 1) +endif() -IF(PCRE2_SUPPORT_BSR_ANYCRLF) - SET(BSR_ANYCRLF 1) -ENDIF(PCRE2_SUPPORT_BSR_ANYCRLF) +if(PCRE2_BUILD_PCRE2GREP AND NOT PCRE2_BUILD_PCRE2_8) + message(STATUS "** PCRE2_BUILD_PCRE2_8 must be enabled for the pcre2grep program") + set(PCRE2_BUILD_PCRE2GREP OFF) +endif() -IF(PCRE2_NEVER_BACKSLASH_C) - SET(NEVER_BACKSLASH_C 1) -ENDIF(PCRE2_NEVER_BACKSLASH_C) +if(PCRE2_SUPPORT_LIBREADLINE AND PCRE2_SUPPORT_LIBEDIT) + if(READLINE_FOUND) + message( + FATAL_ERROR + " Only one of the readline compatible libraries can be enabled.\n" + " Disable libreadline with -DPCRE2_SUPPORT_LIBREADLINE=OFF" + ) + endif() +endif() -IF(PCRE2_SUPPORT_UNICODE) - SET(SUPPORT_UNICODE 1) -ENDIF(PCRE2_SUPPORT_UNICODE) +if(PCRE2_SUPPORT_BSR_ANYCRLF) + set(BSR_ANYCRLF 1) +endif() -IF(PCRE2_SUPPORT_VALGRIND) - SET(SUPPORT_VALGRIND 1) -ENDIF(PCRE2_SUPPORT_VALGRIND) +if(PCRE2_NEVER_BACKSLASH_C) + set(NEVER_BACKSLASH_C 1) +endif() -IF(PCRE2_DISABLE_PERCENT_ZT) - SET(DISABLE_PERCENT_ZT 1) -ENDIF(PCRE2_DISABLE_PERCENT_ZT) +if(PCRE2_SUPPORT_UNICODE) + set(SUPPORT_UNICODE 1) +endif() -SET(NEWLINE_DEFAULT "") +if(PCRE2_SUPPORT_JIT) + set(SUPPORT_JIT 1) + if(UNIX) + find_package(Threads REQUIRED) + if(CMAKE_USE_PTHREADS_INIT) + set(REQUIRE_PTHREAD 1) + endif() + endif() +endif() -IF(PCRE2_NEWLINE STREQUAL "CR") - SET(NEWLINE_DEFAULT "1") -ENDIF(PCRE2_NEWLINE STREQUAL "CR") -IF(PCRE2_NEWLINE STREQUAL "LF") - SET(NEWLINE_DEFAULT "2") -ENDIF(PCRE2_NEWLINE STREQUAL "LF") -IF(PCRE2_NEWLINE STREQUAL "CRLF") - SET(NEWLINE_DEFAULT "3") -ENDIF(PCRE2_NEWLINE STREQUAL "CRLF") -IF(PCRE2_NEWLINE STREQUAL "ANY") - SET(NEWLINE_DEFAULT "4") -ENDIF(PCRE2_NEWLINE STREQUAL "ANY") -IF(PCRE2_NEWLINE STREQUAL "ANYCRLF") - SET(NEWLINE_DEFAULT "5") -ENDIF(PCRE2_NEWLINE STREQUAL "ANYCRLF") -IF(PCRE2_NEWLINE STREQUAL "NUL") - SET(NEWLINE_DEFAULT "6") -ENDIF(PCRE2_NEWLINE STREQUAL "NUL") +if(PCRE2_SUPPORT_JIT_SEALLOC) + set(CMAKE_REQUIRED_DEFINITIONS -D_GNU_SOURCE) + check_symbol_exists(mkostemp stdlib.h REQUIRED) + unset(CMAKE_REQUIRED_DEFINITIONS) + if(${REQUIRED}) + if(${CMAKE_SYSTEM_NAME} MATCHES Linux|NetBSD) + add_compile_definitions(_GNU_SOURCE) + set(SLJIT_PROT_EXECUTABLE_ALLOCATOR 1) + else() + message(FATAL_ERROR "Your configuration is not supported") + endif() + else() + set(PCRE2_SUPPORT_JIT_SEALLOC OFF) + endif() +endif() -IF(NEWLINE_DEFAULT STREQUAL "") - MESSAGE(FATAL_ERROR "The PCRE2_NEWLINE variable must be set to one of the following values: \"LF\", \"CR\", \"CRLF\", \"ANY\", \"ANYCRLF\".") -ENDIF(NEWLINE_DEFAULT STREQUAL "") +if(PCRE2GREP_SUPPORT_JIT) + set(SUPPORT_PCRE2GREP_JIT 1) +endif() -IF(PCRE2_EBCDIC) - SET(EBCDIC 1) -ENDIF(PCRE2_EBCDIC) +if(PCRE2GREP_SUPPORT_CALLOUT) + set(SUPPORT_PCRE2GREP_CALLOUT 1) + if(PCRE2GREP_SUPPORT_CALLOUT_FORK) + set(SUPPORT_PCRE2GREP_CALLOUT_FORK 1) + endif() +endif() -IF(PCRE2_EBCDIC_NL25) - SET(EBCDIC 1) - SET(EBCDIC_NL25 1) -ENDIF(PCRE2_EBCDIC_NL25) +if(PCRE2_SUPPORT_VALGRIND) + set(SUPPORT_VALGRIND 1) +endif() + +if(PCRE2_DISABLE_PERCENT_ZT) + set(DISABLE_PERCENT_ZT 1) +endif() + +# This next one used to reference ${READLINE_LIBRARY}) +# but I was advised to add the NCURSES test as well, along with +# some modifications to cmake/FindReadline.cmake which should +# make it possible to override the default if necessary. PH + +if(PCRE2_SUPPORT_LIBREADLINE) + set(SUPPORT_LIBREADLINE 1) + set(PCRE2TEST_LIBS ${READLINE_LIBRARY} ${NCURSES_LIBRARY}) +endif() + +# libedit is a plug-compatible alternative to libreadline + +if(PCRE2_SUPPORT_LIBEDIT) + set(SUPPORT_LIBEDIT 1) + set(PCRE2TEST_LIBS ${EDITLINE_LIBRARY}) +endif() + +if(PCRE2_SUPPORT_LIBZ) + set(SUPPORT_LIBZ 1) + set(PCRE2GREP_LIBS ${PCRE2GREP_LIBS} ${ZLIB_LIBRARIES}) +endif() + +if(PCRE2_SUPPORT_LIBBZ2) + set(SUPPORT_LIBBZ2 1) + set(PCRE2GREP_LIBS ${PCRE2GREP_LIBS} ${BZIP2_LIBRARIES}) +endif() + +set(NEWLINE_DEFAULT "") + +if(PCRE2_NEWLINE STREQUAL "CR") + set(NEWLINE_DEFAULT "1") +endif() +if(PCRE2_NEWLINE STREQUAL "LF") + set(NEWLINE_DEFAULT "2") +endif() +if(PCRE2_NEWLINE STREQUAL "CRLF") + set(NEWLINE_DEFAULT "3") +endif() +if(PCRE2_NEWLINE STREQUAL "ANY") + set(NEWLINE_DEFAULT "4") +endif() +if(PCRE2_NEWLINE STREQUAL "ANYCRLF") + set(NEWLINE_DEFAULT "5") +endif() +if(PCRE2_NEWLINE STREQUAL "NUL") + set(NEWLINE_DEFAULT "6") +endif() + +if(NEWLINE_DEFAULT STREQUAL "") + message( + FATAL_ERROR + "The PCRE2_NEWLINE variable must be set to one of the following values: \"LF\", \"CR\", \"CRLF\", \"ANY\", \"ANYCRLF\"." + ) +endif() + +if(PCRE2_EBCDIC) + set(EBCDIC 1) +endif() + +if(PCRE2_EBCDIC_NL25) + set(EBCDIC 1) + set(EBCDIC_NL25 1) +endif() # Output files -CONFIGURE_FILE(config-cmake.h.in - ${PROJECT_BINARY_DIR}/config.h - @ONLY) +configure_file(config-cmake.h.in ${PROJECT_BINARY_DIR}/config.h @ONLY) # Parse version numbers and date out of configure.ac -file(STRINGS ${PROJECT_SOURCE_DIR}/configure.ac +file( + STRINGS + ${PROJECT_SOURCE_DIR}/configure.ac configure_lines - LIMIT_COUNT 50 # Read only the first 50 lines of the file + LIMIT_COUNT + 50 # Read only the first 50 lines of the file ) -set(SEARCHED_VARIABLES "pcre2_major" "pcre2_minor" "pcre2_prerelease" "pcre2_date" - "libpcre2_posix_version" "libpcre2_8_version" "libpcre2_16_version" "libpcre2_32_version") +set( + SEARCHED_VARIABLES + "pcre2_major" + "pcre2_minor" + "pcre2_prerelease" + "pcre2_date" + "libpcre2_posix_version" + "libpcre2_8_version" + "libpcre2_16_version" + "libpcre2_32_version" +) foreach(configure_line ${configure_lines}) - foreach(_substitution_variable ${SEARCHED_VARIABLES}) - string(TOUPPER ${_substitution_variable} _substitution_variable_upper) - if (NOT ${_substitution_variable_upper}) - string(REGEX MATCH "m4_define\\(${_substitution_variable}, *\\[(.*)\\]" MATCHED_STRING ${configure_line}) - if (CMAKE_MATCH_1) - set(${_substitution_variable_upper} ${CMAKE_MATCH_1}) - endif() - endif() - endforeach() + foreach(substitution_variable ${SEARCHED_VARIABLES}) + string(TOUPPER ${substitution_variable} substitution_variable_upper) + if(NOT ${substitution_variable_upper}) + string(REGEX MATCH "m4_define\\(${substitution_variable}, *\\[(.*)\\]" MATCHED_STRING ${configure_line}) + if(CMAKE_MATCH_1) + set(${substitution_variable_upper} ${CMAKE_MATCH_1}) + endif() + endif() + endforeach() endforeach() -macro(PARSE_LIB_VERSION VARIABLE_PREFIX) - string(REPLACE ":" ";" ${VARIABLE_PREFIX}_VERSION_LIST ${${VARIABLE_PREFIX}_VERSION}) - list(GET ${VARIABLE_PREFIX}_VERSION_LIST 0 ${VARIABLE_PREFIX}_VERSION_CURRENT) - list(GET ${VARIABLE_PREFIX}_VERSION_LIST 1 ${VARIABLE_PREFIX}_VERSION_REVISION) - list(GET ${VARIABLE_PREFIX}_VERSION_LIST 2 ${VARIABLE_PREFIX}_VERSION_AGE) +macro(PARSE_LIB_VERSION variable_prefix) + string(REPLACE ":" ";" ${variable_prefix}_VERSION_LIST ${${variable_prefix}_VERSION}) + list(GET ${variable_prefix}_VERSION_LIST 0 ${variable_prefix}_VERSION_CURRENT) + list(GET ${variable_prefix}_VERSION_LIST 1 ${variable_prefix}_VERSION_REVISION) + list(GET ${variable_prefix}_VERSION_LIST 2 ${variable_prefix}_VERSION_AGE) - math(EXPR ${VARIABLE_PREFIX}_SOVERSION "${${VARIABLE_PREFIX}_VERSION_CURRENT} - ${${VARIABLE_PREFIX}_VERSION_AGE}") - math(EXPR ${VARIABLE_PREFIX}_MACHO_COMPATIBILITY_VERSION "${${VARIABLE_PREFIX}_VERSION_CURRENT} + 1") - math(EXPR ${VARIABLE_PREFIX}_MACHO_CURRENT_VERSION "${${VARIABLE_PREFIX}_VERSION_CURRENT} + 1") - set(${VARIABLE_PREFIX}_MACHO_CURRENT_VERSION "${${VARIABLE_PREFIX}_MACHO_CURRENT_VERSION}.${${VARIABLE_PREFIX}_VERSION_REVISION}}") - set(${VARIABLE_PREFIX}_VERSION "${${VARIABLE_PREFIX}_SOVERSION}.${${VARIABLE_PREFIX}_VERSION_AGE}.${${VARIABLE_PREFIX}_VERSION_REVISION}") + math(EXPR ${variable_prefix}_SOVERSION "${${variable_prefix}_VERSION_CURRENT} - ${${variable_prefix}_VERSION_AGE}") + math(EXPR ${variable_prefix}_MACHO_COMPATIBILITY_VERSION "${${variable_prefix}_VERSION_CURRENT} + 1") + math(EXPR ${variable_prefix}_MACHO_CURRENT_VERSION "${${variable_prefix}_VERSION_CURRENT} + 1") + set( + ${variable_prefix}_MACHO_CURRENT_VERSION + "${${variable_prefix}_MACHO_CURRENT_VERSION}.${${variable_prefix}_VERSION_REVISION}}" + ) + set( + ${variable_prefix}_VERSION + "${${variable_prefix}_SOVERSION}.${${variable_prefix}_VERSION_AGE}.${${variable_prefix}_VERSION_REVISION}" + ) endmacro() -PARSE_LIB_VERSION(LIBPCRE2_POSIX) -PARSE_LIB_VERSION(LIBPCRE2_8) -PARSE_LIB_VERSION(LIBPCRE2_16) -PARSE_LIB_VERSION(LIBPCRE2_32) +parse_lib_version(LIBPCRE2_POSIX) +parse_lib_version(LIBPCRE2_8) +parse_lib_version(LIBPCRE2_16) +parse_lib_version(LIBPCRE2_32) -CONFIGURE_FILE(src/pcre2.h.in - ${PROJECT_BINARY_DIR}/pcre2.h - @ONLY) +configure_file(src/pcre2.h.in ${PROJECT_BINARY_DIR}/pcre2.h @ONLY) # Make sure to not link debug libs # against release libs and vice versa -IF(WIN32) - SET(CMAKE_DEBUG_POSTFIX "d") -ENDIF(WIN32) +if(WIN32) + set(CMAKE_DEBUG_POSTFIX "d") +endif() # Character table generation -OPTION(PCRE2_REBUILD_CHARTABLES "Rebuild char tables" OFF) -IF(PCRE2_REBUILD_CHARTABLES) - ADD_EXECUTABLE(pcre2_dftables src/pcre2_dftables.c) - ADD_CUSTOM_COMMAND( - COMMENT "Generating character tables (pcre2_chartables.c) for current locale" - DEPENDS pcre2_dftables +option(PCRE2_REBUILD_CHARTABLES "Rebuild char tables" OFF) +if(PCRE2_REBUILD_CHARTABLES) + add_executable(pcre2_dftables src/pcre2_dftables.c) + add_custom_command( + OUTPUT ${PROJECT_BINARY_DIR}/pcre2_chartables.c COMMAND pcre2_dftables - ARGS ${PROJECT_BINARY_DIR}/pcre2_chartables.c - OUTPUT ${PROJECT_BINARY_DIR}/pcre2_chartables.c + ARGS ${PROJECT_BINARY_DIR}/pcre2_chartables.c + DEPENDS pcre2_dftables + COMMENT "Generating character tables (pcre2_chartables.c) for current locale" + VERBATIM ) -ELSE(PCRE2_REBUILD_CHARTABLES) - CONFIGURE_FILE(${PROJECT_SOURCE_DIR}/src/pcre2_chartables.c.dist - ${PROJECT_BINARY_DIR}/pcre2_chartables.c - COPYONLY) -ENDIF(PCRE2_REBUILD_CHARTABLES) +else() + configure_file(${PROJECT_SOURCE_DIR}/src/pcre2_chartables.c.dist ${PROJECT_BINARY_DIR}/pcre2_chartables.c COPYONLY) +endif() # Source code -SET(PCRE2_HEADERS ${PROJECT_BINARY_DIR}/pcre2.h) +set(PCRE2_HEADERS ${PROJECT_BINARY_DIR}/pcre2.h) -SET(PCRE2_SOURCES +set( + PCRE2_SOURCES src/pcre2_auto_possess.c ${PROJECT_BINARY_DIR}/pcre2_chartables.c src/pcre2_chkdint.c src/pcre2_compile.c + src/pcre2_compile_class.c src/pcre2_config.c src/pcre2_context.c src/pcre2_convert.c @@ -471,284 +704,326 @@ SET(PCRE2_SOURCES src/pcre2_xclass.c ) -SET(PCRE2POSIX_HEADERS src/pcre2posix.h) -SET(PCRE2POSIX_SOURCES src/pcre2posix.c) +set(PCRE2POSIX_HEADERS src/pcre2posix.h) +set(PCRE2POSIX_SOURCES src/pcre2posix.c) -IF(MINGW AND BUILD_SHARED_LIBS) - IF (EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc) - ADD_CUSTOM_COMMAND(OUTPUT ${PROJECT_SOURCE_DIR}/pcre2.o - PRE-LINK - COMMAND windres ARGS pcre2.rc pcre2.o +if(MINGW AND BUILD_SHARED_LIBS) + if(EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc) + add_custom_command( + OUTPUT ${PROJECT_SOURCE_DIR}/pcre2.o PRE-LINK + COMMAND windres + ARGS pcre2.rc pcre2.o WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} - COMMENT Using pcre2 coff info in mingw build) - SET(PCRE2_SOURCES ${PCRE2_SOURCES} ${PROJECT_SOURCE_DIR}/pcre2.o) - ENDIF(EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc) + COMMENT "Using pcre2 coff info in mingw build" + ) + set(PCRE2_SOURCES ${PCRE2_SOURCES} ${PROJECT_SOURCE_DIR}/pcre2.o) + endif() - IF (EXISTS ${PROJECT_SOURCE_DIR}/pcre2posix.rc) - ADD_CUSTOM_COMMAND(OUTPUT ${PROJECT_SOURCE_DIR}/pcre2posix.o - PRE-LINK - COMMAND windres ARGS pcre2posix.rc pcre2posix.o + if(EXISTS ${PROJECT_SOURCE_DIR}/pcre2posix.rc) + add_custom_command( + OUTPUT ${PROJECT_SOURCE_DIR}/pcre2posix.o PRE-LINK + COMMAND windres + ARGS pcre2posix.rc pcre2posix.o WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} - COMMENT Using pcre2posix coff info in mingw build) - SET(PCRE2POSIX_SOURCES ${PCRE2POSIX_SOURCES} ${PROJECT_SOURCE_DIR}/pcre2posix.o) - ENDIF(EXISTS ${PROJECT_SOURCE_DIR}/pcre2posix.rc) -ENDIF(MINGW AND BUILD_SHARED_LIBS) + COMMENT "Using pcre2posix coff info in mingw build" + ) + set(PCRE2POSIX_SOURCES ${PCRE2POSIX_SOURCES} ${PROJECT_SOURCE_DIR}/pcre2posix.o) + endif() +endif() -IF(MSVC AND BUILD_SHARED_LIBS) - SET(dll_pdb_files ${PROJECT_BINARY_DIR}/pcre2-posix.pdb ${dll_pdb_files}) - SET(dll_pdb_debug_files ${PROJECT_BINARY_DIR}/pcre2-posixd.pdb ${dll_pdb_debug_files}) - IF (EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc) - SET(PCRE2_SOURCES ${PCRE2_SOURCES} pcre2.rc) - ENDIF(EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc) +if(MSVC AND BUILD_SHARED_LIBS) + if(EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc) + set(PCRE2_SOURCES ${PCRE2_SOURCES} pcre2.rc) + endif() - IF (EXISTS ${PROJECT_SOURCE_DIR}/pcre2posix.rc) - SET(PCRE2POSIX_SOURCES ${PCRE2POSIX_SOURCES} pcre2posix.rc) - ENDIF (EXISTS ${PROJECT_SOURCE_DIR}/pcre2posix.rc) -ENDIF(MSVC AND BUILD_SHARED_LIBS) + if(EXISTS ${PROJECT_SOURCE_DIR}/pcre2posix.rc) + set(PCRE2POSIX_SOURCES ${PCRE2POSIX_SOURCES} pcre2posix.rc) + endif() +endif() # Fix static compilation with MSVC: https://bugs.exim.org/show_bug.cgi?id=1681 # This code was taken from the CMake wiki, not from WebM. -IF(MSVC AND PCRE2_STATIC_RUNTIME) - MESSAGE(STATUS "** MSVC and PCRE2_STATIC_RUNTIME: modifying compiler flags to use static runtime library") - foreach(flag_var - CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE - CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO) +if(MSVC AND PCRE2_STATIC_RUNTIME) + message(STATUS "** MSVC and PCRE2_STATIC_RUNTIME: modifying compiler flags to use static runtime library") + foreach( + flag_var + CMAKE_C_FLAGS + CMAKE_C_FLAGS_DEBUG + CMAKE_C_FLAGS_RELEASE + CMAKE_C_FLAGS_MINSIZEREL + CMAKE_C_FLAGS_RELWITHDEBINFO + ) string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}") endforeach() -ENDIF(MSVC AND PCRE2_STATIC_RUNTIME) +endif() # Build setup -ADD_DEFINITIONS(-DHAVE_CONFIG_H) +add_compile_definitions(HAVE_CONFIG_H) -IF(MSVC) - ADD_DEFINITIONS(-D_CRT_SECURE_NO_DEPRECATE -D_CRT_SECURE_NO_WARNINGS) -ENDIF(MSVC) +if(PCRE2_DEBUG STREQUAL "IfDebugBuild") + add_compile_definitions("$<$:PCRE2_DEBUG>") +elseif(PCRE2_DEBUG) + add_compile_definitions("PCRE2_DEBUG") +endif() -SET(CMAKE_INCLUDE_CURRENT_DIR 1) +if(MSVC) + add_compile_definitions(_CRT_SECURE_NO_DEPRECATE _CRT_SECURE_NO_WARNINGS) +endif() -set(targets) +set(CMAKE_INCLUDE_CURRENT_DIR 1) + +set(TARGETS) # 8-bit library -IF(PCRE2_BUILD_PCRE2_8) - IF(BUILD_STATIC_LIBS) - ADD_LIBRARY(pcre2-8-static STATIC ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h) - SET_TARGET_PROPERTIES(pcre2-8-static PROPERTIES - COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8 - MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_8_MACHO_COMPATIBILITY_VERSION}" - MACHO_CURRENT_VERSION "${LIBPCRE2_8_MACHO_CURRENT_VERSION}" - VERSION ${LIBPCRE2_8_VERSION} - SOVERSION ${LIBPCRE2_8_SOVERSION}) - TARGET_COMPILE_DEFINITIONS(pcre2-8-static PUBLIC PCRE2_STATIC) - TARGET_INCLUDE_DIRECTORIES(pcre2-8-static PUBLIC ${PROJECT_BINARY_DIR}) - IF(REQUIRE_PTHREAD) - TARGET_LINK_LIBRARIES(pcre2-8-static Threads::Threads) - ENDIF(REQUIRE_PTHREAD) - SET(targets ${targets} pcre2-8-static) - ADD_LIBRARY(pcre2-posix-static STATIC ${PCRE2POSIX_HEADERS} ${PCRE2POSIX_SOURCES}) - SET_TARGET_PROPERTIES(pcre2-posix-static PROPERTIES - COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8 - MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_POSIX_MACHO_COMPATIBILITY_VERSION}" - MACHO_CURRENT_VERSION "${LIBPCRE2_POSIX_MACHO_CURRENT_VERSION}" - VERSION ${LIBPCRE2_POSIX_VERSION} - SOVERSION ${LIBPCRE2_POSIX_SOVERSION}) - TARGET_LINK_LIBRARIES(pcre2-posix-static pcre2-8-static) - TARGET_INCLUDE_DIRECTORIES(pcre2-posix-static PUBLIC ${PROJECT_SOURCE_DIR}/src) - set(targets ${targets} pcre2-posix-static) +if(PCRE2_BUILD_PCRE2_8) + if(BUILD_STATIC_LIBS) + add_library(pcre2-8-static STATIC ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h) + set_target_properties( + pcre2-8-static + PROPERTIES + COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8 + MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_8_MACHO_COMPATIBILITY_VERSION}" + MACHO_CURRENT_VERSION "${LIBPCRE2_8_MACHO_CURRENT_VERSION}" + VERSION ${LIBPCRE2_8_VERSION} + SOVERSION ${LIBPCRE2_8_SOVERSION} + ) + target_compile_definitions(pcre2-8-static PUBLIC PCRE2_STATIC) + target_include_directories(pcre2-8-static PUBLIC ${PROJECT_BINARY_DIR}) + if(REQUIRE_PTHREAD) + target_link_libraries(pcre2-8-static Threads::Threads) + endif() + set(TARGETS ${TARGETS} pcre2-8-static) + add_library(pcre2-posix-static STATIC ${PCRE2POSIX_HEADERS} ${PCRE2POSIX_SOURCES}) + set_target_properties( + pcre2-posix-static + PROPERTIES + COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8 + MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_POSIX_MACHO_COMPATIBILITY_VERSION}" + MACHO_CURRENT_VERSION "${LIBPCRE2_POSIX_MACHO_CURRENT_VERSION}" + VERSION ${LIBPCRE2_POSIX_VERSION} + SOVERSION ${LIBPCRE2_POSIX_SOVERSION} + ) + target_link_libraries(pcre2-posix-static pcre2-8-static) + target_include_directories(pcre2-posix-static PUBLIC ${PROJECT_SOURCE_DIR}/src) + set(TARGETS ${TARGETS} pcre2-posix-static) - IF(MSVC) - SET_TARGET_PROPERTIES(pcre2-8-static PROPERTIES OUTPUT_NAME pcre2-8-static) - SET_TARGET_PROPERTIES(pcre2-posix-static PROPERTIES OUTPUT_NAME pcre2-posix-static) - ELSE(MSVC) - SET_TARGET_PROPERTIES(pcre2-8-static PROPERTIES OUTPUT_NAME pcre2-8) - SET_TARGET_PROPERTIES(pcre2-posix-static PROPERTIES OUTPUT_NAME pcre2-posix) - ENDIF(MSVC) - IF(PCRE2_STATIC_PIC) - SET_TARGET_PROPERTIES(pcre2-8-static pcre2-posix-static PROPERTIES POSITION_INDEPENDENT_CODE 1) - ENDIF(PCRE2_STATIC_PIC) - ENDIF(BUILD_STATIC_LIBS) + if(MSVC) + set_target_properties(pcre2-8-static PROPERTIES OUTPUT_NAME pcre2-8-static) + set_target_properties(pcre2-posix-static PROPERTIES OUTPUT_NAME pcre2-posix-static) + else() + set_target_properties(pcre2-8-static PROPERTIES OUTPUT_NAME pcre2-8) + set_target_properties(pcre2-posix-static PROPERTIES OUTPUT_NAME pcre2-posix) + endif() + if(PCRE2_STATIC_PIC) + set_target_properties(pcre2-8-static pcre2-posix-static PROPERTIES POSITION_INDEPENDENT_CODE 1) + endif() + endif() - IF(BUILD_SHARED_LIBS) - ADD_LIBRARY(pcre2-8-shared SHARED ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h) - TARGET_INCLUDE_DIRECTORIES(pcre2-8-shared PUBLIC ${PROJECT_BINARY_DIR}) - SET_TARGET_PROPERTIES(pcre2-8-shared PROPERTIES - COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8 - MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_8_MACHO_COMPATIBILITY_VERSION}" - MACHO_CURRENT_VERSION "${LIBPCRE2_8_MACHO_CURRENT_VERSION}" - VERSION ${LIBPCRE2_8_VERSION} - SOVERSION ${LIBPCRE2_8_SOVERSION} - OUTPUT_NAME pcre2-8) - IF(REQUIRE_PTHREAD) - TARGET_LINK_LIBRARIES(pcre2-8-shared Threads::Threads) - ENDIF(REQUIRE_PTHREAD) - set(targets ${targets} pcre2-8-shared) + if(BUILD_SHARED_LIBS) + add_library(pcre2-8-shared SHARED ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h) + target_include_directories(pcre2-8-shared PUBLIC ${PROJECT_BINARY_DIR}) + set_target_properties( + pcre2-8-shared + PROPERTIES + COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8 + MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_8_MACHO_COMPATIBILITY_VERSION}" + MACHO_CURRENT_VERSION "${LIBPCRE2_8_MACHO_CURRENT_VERSION}" + VERSION ${LIBPCRE2_8_VERSION} + SOVERSION ${LIBPCRE2_8_SOVERSION} + OUTPUT_NAME pcre2-8 + ) + if(REQUIRE_PTHREAD) + target_link_libraries(pcre2-8-shared Threads::Threads) + endif() + set(TARGETS ${TARGETS} pcre2-8-shared) + set(DLL_PDB_FILES $/pcre2-8.pdb ${DLL_PDB_FILES}) + set(DLL_PDB_DEBUG_FILES $/pcre2-8d.pdb ${DLL_PDB_DEBUG_FILES}) - ADD_LIBRARY(pcre2-posix-shared SHARED ${PCRE2POSIX_HEADERS} ${PCRE2POSIX_SOURCES}) - TARGET_INCLUDE_DIRECTORIES(pcre2-posix-shared PUBLIC ${PROJECT_SOURCE_DIR}/src) - SET_TARGET_PROPERTIES(pcre2-posix-shared PROPERTIES - COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8 - MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_POSIX_MACHO_COMPATIBILITY_VERSION}" - MACHO_CURRENT_VERSION "${LIBPCRE2_POSIX_MACHO_CURRENT_VERSION}" - VERSION ${LIBPCRE2_POSIX_VERSION} - SOVERSION ${LIBPCRE2_POSIX_SOVERSION} - OUTPUT_NAME pcre2-posix) + add_library(pcre2-posix-shared SHARED ${PCRE2POSIX_HEADERS} ${PCRE2POSIX_SOURCES}) + target_include_directories(pcre2-posix-shared PUBLIC ${PROJECT_SOURCE_DIR}/src) + set_target_properties( + pcre2-posix-shared + PROPERTIES + COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8 + MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_POSIX_MACHO_COMPATIBILITY_VERSION}" + MACHO_CURRENT_VERSION "${LIBPCRE2_POSIX_MACHO_CURRENT_VERSION}" + VERSION ${LIBPCRE2_POSIX_VERSION} + SOVERSION ${LIBPCRE2_POSIX_SOVERSION} + OUTPUT_NAME pcre2-posix + ) set(PCRE2POSIX_CFLAG "-DPCRE2POSIX_SHARED") - TARGET_COMPILE_DEFINITIONS(pcre2-posix-shared PUBLIC ${PCRE2POSIX_CFLAG}) - TARGET_LINK_LIBRARIES(pcre2-posix-shared pcre2-8-shared) - SET(targets ${targets} pcre2-posix-shared) - SET(dll_pdb_files ${PROJECT_BINARY_DIR}/pcre2-8.pdb ${dll_pdb_files}) - SET(dll_pdb_debug_files ${PROJECT_BINARY_DIR}/pcre2-8d.pdb ${dll_pdb_debug_files}) + target_compile_definitions(pcre2-posix-shared PUBLIC ${PCRE2POSIX_CFLAG}) + target_link_libraries(pcre2-posix-shared pcre2-8-shared) + set(TARGETS ${TARGETS} pcre2-posix-shared) + set(DLL_PDB_FILES $/pcre2-posix.pdb ${DLL_PDB_FILES}) + set(DLL_PDB_DEBUG_FILES $/pcre2-posixd.pdb ${DLL_PDB_DEBUG_FILES}) - IF(MINGW) - IF(NON_STANDARD_LIB_PREFIX) - SET_TARGET_PROPERTIES(pcre2-8-shared pcre2-posix-shared PROPERTIES PREFIX "") - ENDIF(NON_STANDARD_LIB_PREFIX) - IF(NON_STANDARD_LIB_SUFFIX) - SET_TARGET_PROPERTIES(pcre2-8-shared pcre2-posix-shared PROPERTIES SUFFIX "-0.dll") - ENDIF(NON_STANDARD_LIB_SUFFIX) - ENDIF(MINGW) - ENDIF(BUILD_SHARED_LIBS) + if(MINGW) + if(NON_STANDARD_LIB_PREFIX) + set_target_properties(pcre2-8-shared pcre2-posix-shared PROPERTIES PREFIX "") + endif() + if(NON_STANDARD_LIB_SUFFIX) + set_target_properties(pcre2-8-shared pcre2-posix-shared PROPERTIES SUFFIX "-0.dll") + endif() + endif() + endif() - IF(BUILD_STATIC_LIBS) - ADD_LIBRARY(pcre2-8 ALIAS pcre2-8-static) - ADD_LIBRARY(pcre2-posix ALIAS pcre2-posix-static) - ELSE(BUILD_STATIC_LIBS) - ADD_LIBRARY(pcre2-8 ALIAS pcre2-8-shared) - ADD_LIBRARY(pcre2-posix ALIAS pcre2-posix-shared) - ENDIF(BUILD_STATIC_LIBS) -ENDIF(PCRE2_BUILD_PCRE2_8) + if(BUILD_STATIC_LIBS) + add_library(pcre2-8 ALIAS pcre2-8-static) + add_library(pcre2-posix ALIAS pcre2-posix-static) + else() + add_library(pcre2-8 ALIAS pcre2-8-shared) + add_library(pcre2-posix ALIAS pcre2-posix-shared) + endif() +endif() # 16-bit library -IF(PCRE2_BUILD_PCRE2_16) - IF(BUILD_STATIC_LIBS) - ADD_LIBRARY(pcre2-16-static STATIC ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h) - TARGET_INCLUDE_DIRECTORIES(pcre2-16-static PUBLIC ${PROJECT_BINARY_DIR}) - SET_TARGET_PROPERTIES(pcre2-16-static PROPERTIES UNITY_BUILD OFF - COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=16 - MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}" - MACHO_CURRENT_VERSION "${LIBPCRE2_32_MACHO_CURRENT_VERSION}" - VERSION ${LIBPCRE2_16_VERSION} - SOVERSION ${LIBPCRE2_16_SOVERSION}) - TARGET_COMPILE_DEFINITIONS(pcre2-16-static PUBLIC PCRE2_STATIC) - IF(REQUIRE_PTHREAD) - TARGET_LINK_LIBRARIES(pcre2-16-static Threads::Threads) - ENDIF(REQUIRE_PTHREAD) - set(targets ${targets} pcre2-16-static) +if(PCRE2_BUILD_PCRE2_16) + if(BUILD_STATIC_LIBS) + add_library(pcre2-16-static STATIC ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h) + target_include_directories(pcre2-16-static PUBLIC ${PROJECT_BINARY_DIR}) + set_target_properties( + pcre2-16-static + PROPERTIES + UNITY_BUILD OFF + COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=16 + MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}" + MACHO_CURRENT_VERSION "${LIBPCRE2_32_MACHO_CURRENT_VERSION}" + VERSION ${LIBPCRE2_16_VERSION} + SOVERSION ${LIBPCRE2_16_SOVERSION} + ) + target_compile_definitions(pcre2-16-static PUBLIC PCRE2_STATIC) + if(REQUIRE_PTHREAD) + target_link_libraries(pcre2-16-static Threads::Threads) + endif() + set(TARGETS ${TARGETS} pcre2-16-static) - IF(MSVC) - SET_TARGET_PROPERTIES(pcre2-16-static PROPERTIES OUTPUT_NAME pcre2-16-static) - ELSE(MSVC) - SET_TARGET_PROPERTIES(pcre2-16-static PROPERTIES OUTPUT_NAME pcre2-16) - ENDIF(MSVC) - IF(PCRE2_STATIC_PIC) - SET_TARGET_PROPERTIES(pcre2-16-static PROPERTIES POSITION_INDEPENDENT_CODE 1) - ENDIF(PCRE2_STATIC_PIC) - ENDIF(BUILD_STATIC_LIBS) + if(MSVC) + set_target_properties(pcre2-16-static PROPERTIES OUTPUT_NAME pcre2-16-static) + else() + set_target_properties(pcre2-16-static PROPERTIES OUTPUT_NAME pcre2-16) + endif() + if(PCRE2_STATIC_PIC) + set_target_properties(pcre2-16-static PROPERTIES POSITION_INDEPENDENT_CODE 1) + endif() + endif() - IF(BUILD_SHARED_LIBS) - ADD_LIBRARY(pcre2-16-shared SHARED ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h) - TARGET_INCLUDE_DIRECTORIES(pcre2-16-shared PUBLIC ${PROJECT_BINARY_DIR}) - SET_TARGET_PROPERTIES(pcre2-16-shared PROPERTIES UNITY_BUILD OFF - COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=16 - MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}" - MACHO_CURRENT_VERSION "${LIBPCRE2_32_MACHO_CURRENT_VERSION}" - VERSION ${LIBPCRE2_16_VERSION} - SOVERSION ${LIBPCRE2_16_SOVERSION} - OUTPUT_NAME pcre2-16) - IF(REQUIRE_PTHREAD) - TARGET_LINK_LIBRARIES(pcre2-16-shared Threads::Threads) - ENDIF(REQUIRE_PTHREAD) - set(targets ${targets} pcre2-16-shared) - SET(dll_pdb_files ${PROJECT_BINARY_DIR}/pcre2-16.pdb ${dll_pdb_files}) - SET(dll_pdb_debug_files ${PROJECT_BINARY_DIR}/pcre2-16d.pdb ${dll_pdb_debug_files}) + if(BUILD_SHARED_LIBS) + add_library(pcre2-16-shared SHARED ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h) + target_include_directories(pcre2-16-shared PUBLIC ${PROJECT_BINARY_DIR}) + set_target_properties( + pcre2-16-shared + PROPERTIES + UNITY_BUILD OFF + COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=16 + MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}" + MACHO_CURRENT_VERSION "${LIBPCRE2_32_MACHO_CURRENT_VERSION}" + VERSION ${LIBPCRE2_16_VERSION} + SOVERSION ${LIBPCRE2_16_SOVERSION} + OUTPUT_NAME pcre2-16 + ) + if(REQUIRE_PTHREAD) + target_link_libraries(pcre2-16-shared Threads::Threads) + endif() + set(TARGETS ${TARGETS} pcre2-16-shared) + set(DLL_PDB_FILES $/pcre2-16.pdb ${DLL_PDB_FILES}) + set(DLL_PDB_DEBUG_FILES $/pcre2-16d.pdb ${DLL_PDB_DEBUG_FILES}) - IF(MINGW) - IF(NON_STANDARD_LIB_PREFIX) - SET_TARGET_PROPERTIES(pcre2-16-shared PROPERTIES PREFIX "") - ENDIF(NON_STANDARD_LIB_PREFIX) - IF(NON_STANDARD_LIB_SUFFIX) - SET_TARGET_PROPERTIES(pcre2-16-shared PROPERTIES SUFFIX "-0.dll") - ENDIF(NON_STANDARD_LIB_SUFFIX) - ENDIF(MINGW) - ENDIF(BUILD_SHARED_LIBS) + if(MINGW) + if(NON_STANDARD_LIB_PREFIX) + set_target_properties(pcre2-16-shared PROPERTIES PREFIX "") + endif() + if(NON_STANDARD_LIB_SUFFIX) + set_target_properties(pcre2-16-shared PROPERTIES SUFFIX "-0.dll") + endif() + endif() + endif() - IF(BUILD_STATIC_LIBS) - ADD_LIBRARY(pcre2-16 ALIAS pcre2-16-static) - ELSE(BUILD_STATIC_LIBS) - ADD_LIBRARY(pcre2-16 ALIAS pcre2-16-shared) - ENDIF(BUILD_STATIC_LIBS) -ENDIF(PCRE2_BUILD_PCRE2_16) + if(BUILD_STATIC_LIBS) + add_library(pcre2-16 ALIAS pcre2-16-static) + else() + add_library(pcre2-16 ALIAS pcre2-16-shared) + endif() +endif() # 32-bit library -IF(PCRE2_BUILD_PCRE2_32) - IF(BUILD_STATIC_LIBS) - ADD_LIBRARY(pcre2-32-static STATIC ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h) - TARGET_INCLUDE_DIRECTORIES(pcre2-32-static PUBLIC ${PROJECT_BINARY_DIR}) - SET_TARGET_PROPERTIES(pcre2-32-static PROPERTIES UNITY_BUILD OFF - COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=32 - MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}" - MACHO_CURRENT_VERSION "${LIBPCRE2_32_MACHO_CURRENT_VERSION}" - VERSION ${LIBPCRE2_32_VERSION} - SOVERSION ${LIBPCRE2_32_SOVERSION}) - TARGET_COMPILE_DEFINITIONS(pcre2-32-static PUBLIC PCRE2_STATIC) - IF(REQUIRE_PTHREAD) - TARGET_LINK_LIBRARIES(pcre2-32-static Threads::Threads) - ENDIF(REQUIRE_PTHREAD) - set(targets ${targets} pcre2-32-static) +if(PCRE2_BUILD_PCRE2_32) + if(BUILD_STATIC_LIBS) + add_library(pcre2-32-static STATIC ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h) + target_include_directories(pcre2-32-static PUBLIC ${PROJECT_BINARY_DIR}) + set_target_properties( + pcre2-32-static + PROPERTIES + UNITY_BUILD OFF + COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=32 + MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}" + MACHO_CURRENT_VERSION "${LIBPCRE2_32_MACHO_CURRENT_VERSION}" + VERSION ${LIBPCRE2_32_VERSION} + SOVERSION ${LIBPCRE2_32_SOVERSION} + ) + target_compile_definitions(pcre2-32-static PUBLIC PCRE2_STATIC) + if(REQUIRE_PTHREAD) + target_link_libraries(pcre2-32-static Threads::Threads) + endif() + set(TARGETS ${TARGETS} pcre2-32-static) - IF(MSVC) - SET_TARGET_PROPERTIES(pcre2-32-static PROPERTIES OUTPUT_NAME pcre2-32-static) - ELSE(MSVC) - SET_TARGET_PROPERTIES(pcre2-32-static PROPERTIES OUTPUT_NAME pcre2-32) - ENDIF(MSVC) - IF(PCRE2_STATIC_PIC) - SET_TARGET_PROPERTIES(pcre2-32-static PROPERTIES POSITION_INDEPENDENT_CODE 1) - ENDIF(PCRE2_STATIC_PIC) - ENDIF(BUILD_STATIC_LIBS) + if(MSVC) + set_target_properties(pcre2-32-static PROPERTIES OUTPUT_NAME pcre2-32-static) + else() + set_target_properties(pcre2-32-static PROPERTIES OUTPUT_NAME pcre2-32) + endif() + if(PCRE2_STATIC_PIC) + set_target_properties(pcre2-32-static PROPERTIES POSITION_INDEPENDENT_CODE 1) + endif() + endif() - IF(BUILD_SHARED_LIBS) - ADD_LIBRARY(pcre2-32-shared SHARED ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h) - TARGET_INCLUDE_DIRECTORIES(pcre2-32-shared PUBLIC ${PROJECT_BINARY_DIR}) - SET_TARGET_PROPERTIES(pcre2-32-shared PROPERTIES UNITY_BUILD OFF - COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=32 - MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}" - MACHO_CURRENT_VERSION "${LIBPCRE2_32_MACHO_CURRENT_VERSION}" - VERSION ${LIBPCRE2_32_VERSION} - SOVERSION ${LIBPCRE2_32_SOVERSION} - OUTPUT_NAME pcre2-32) - IF(REQUIRE_PTHREAD) - TARGET_LINK_LIBRARIES(pcre2-32-shared Threads::Threads) - ENDIF(REQUIRE_PTHREAD) - set(targets ${targets} pcre2-32-shared) - SET(dll_pdb_files ${PROJECT_BINARY_DIR}/pcre2-32.pdb ${dll_pdb_files}) - SET(dll_pdb_debug_files ${PROJECT_BINARY_DIR}/pcre2-32d.pdb ${dll_pdb_debug_files}) + if(BUILD_SHARED_LIBS) + add_library(pcre2-32-shared SHARED ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h) + target_include_directories(pcre2-32-shared PUBLIC ${PROJECT_BINARY_DIR}) + set_target_properties( + pcre2-32-shared + PROPERTIES + UNITY_BUILD OFF + COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=32 + MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}" + MACHO_CURRENT_VERSION "${LIBPCRE2_32_MACHO_CURRENT_VERSION}" + VERSION ${LIBPCRE2_32_VERSION} + SOVERSION ${LIBPCRE2_32_SOVERSION} + OUTPUT_NAME pcre2-32 + ) + if(REQUIRE_PTHREAD) + target_link_libraries(pcre2-32-shared Threads::Threads) + endif() + set(TARGETS ${TARGETS} pcre2-32-shared) + set(DLL_PDB_FILES $/pcre2-32.pdb ${DLL_PDB_FILES}) + set(DLL_PDB_DEBUG_FILES $/pcre2-32d.pdb ${DLL_PDB_DEBUG_FILES}) - IF(MINGW) - IF(NON_STANDARD_LIB_PREFIX) - SET_TARGET_PROPERTIES(pcre2-32-shared PROPERTIES PREFIX "") - ENDIF(NON_STANDARD_LIB_PREFIX) - IF(NON_STANDARD_LIB_SUFFIX) - SET_TARGET_PROPERTIES(pcre2-32-shared PROPERTIES SUFFIX "-0.dll") - ENDIF(NON_STANDARD_LIB_SUFFIX) - ENDIF(MINGW) - ENDIF(BUILD_SHARED_LIBS) + if(MINGW) + if(NON_STANDARD_LIB_PREFIX) + set_target_properties(pcre2-32-shared PROPERTIES PREFIX "") + endif() + if(NON_STANDARD_LIB_SUFFIX) + set_target_properties(pcre2-32-shared PROPERTIES SUFFIX "-0.dll") + endif() + endif() + endif() - IF(BUILD_STATIC_LIBS) - ADD_LIBRARY(pcre2-32 ALIAS pcre2-32-static) - ELSE(BUILD_STATIC_LIBS) - ADD_LIBRARY(pcre2-32 ALIAS pcre2-32-shared) - ENDIF(BUILD_STATIC_LIBS) -ENDIF(PCRE2_BUILD_PCRE2_32) + if(BUILD_STATIC_LIBS) + add_library(pcre2-32 ALIAS pcre2-32-static) + else() + add_library(pcre2-32 ALIAS pcre2-32-shared) + endif() +endif() # Generate pkg-config files set(PACKAGE_VERSION "${PCRE2_MAJOR}.${PCRE2_MINOR}") set(prefix ${CMAKE_INSTALL_PREFIX}) - set(exec_prefix "\${prefix}") set(libdir "\${exec_prefix}/${CMAKE_INSTALL_LIBDIR}") set(includedir "\${prefix}/include") @@ -757,18 +1032,26 @@ if(WIN32 AND (CMAKE_BUILD_TYPE MATCHES Debug)) endif() if(PCRE2_BUILD_PCRE2_8) + configure_file(libpcre2-posix.pc.in libpcre2-posix.pc @ONLY) + list(APPEND pkg_config_files "${CMAKE_CURRENT_BINARY_DIR}/libpcre2-posix.pc") + configure_file(libpcre2-8.pc.in libpcre2-8.pc @ONLY) + list(APPEND pkg_config_files "${CMAKE_CURRENT_BINARY_DIR}/libpcre2-8.pc") set(enable_pcre2_8 "yes") else() set(enable_pcre2_8 "no") endif() if(PCRE2_BUILD_PCRE2_16) + configure_file(libpcre2-16.pc.in libpcre2-16.pc @ONLY) + list(APPEND pkg_config_files "${CMAKE_CURRENT_BINARY_DIR}/libpcre2-16.pc") set(enable_pcre2_16 "yes") else() set(enable_pcre2_16 "no") endif() if(PCRE2_BUILD_PCRE2_32) + configure_file(libpcre2-32.pc.in libpcre2-32.pc @ONLY) + list(APPEND pkg_config_files "${CMAKE_CURRENT_BINARY_DIR}/libpcre2-32.pc") set(enable_pcre2_32 "yes") else() set(enable_pcre2_32 "no") @@ -778,98 +1061,332 @@ configure_file(pcre2-config.in pcre2-config @ONLY NEWLINE_STYLE LF) # Executables +if(PCRE2_BUILD_PCRE2GREP) + add_executable(pcre2grep src/pcre2grep.c) + set_property(TARGET pcre2grep PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8) + set(TARGETS ${TARGETS} pcre2grep) + target_link_libraries(pcre2grep pcre2-posix ${PCRE2GREP_LIBS}) +endif() + # Testing +if(PCRE2_BUILD_TESTS) + enable_testing() + + set(PCRE2TEST_SOURCES src/pcre2test.c) + + if(MSVC) + # This is needed to avoid a stack overflow error in the standard tests. The + # flag should be indicated with a forward-slash instead of a hyphen, but + # then CMake treats it as a file path. + set(PCRE2TEST_LINKER_FLAGS -STACK:2500000) + endif() + + add_executable(pcre2test ${PCRE2TEST_SOURCES}) + set(TARGETS ${TARGETS} pcre2test) + if(PCRE2_BUILD_PCRE2_8) + list(APPEND PCRE2TEST_LIBS pcre2-posix pcre2-8) + endif() + if(PCRE2_BUILD_PCRE2_16) + list(APPEND PCRE2TEST_LIBS pcre2-16) + endif() + if(PCRE2_BUILD_PCRE2_32) + list(APPEND PCRE2TEST_LIBS pcre2-32) + endif() + target_link_libraries(pcre2test ${PCRE2TEST_LIBS} ${PCRE2TEST_LINKER_FLAGS}) + + if(PCRE2_BUILD_PCRE2_8) + add_executable(pcre2posix_test src/pcre2posix_test.c) + target_link_libraries(pcre2posix_test pcre2-posix pcre2-8) + endif() + + if(PCRE2_SUPPORT_JIT) + add_executable(pcre2_jit_test src/pcre2_jit_test.c) + set(PCRE2_JIT_TEST_LIBS) + if(PCRE2_BUILD_PCRE2_8) + list(APPEND PCRE2_JIT_TEST_LIBS pcre2-8) + endif() + if(PCRE2_BUILD_PCRE2_16) + list(APPEND PCRE2_JIT_TEST_LIBS pcre2-16) + endif() + if(PCRE2_BUILD_PCRE2_32) + list(APPEND PCRE2_JIT_TEST_LIBS pcre2-32) + endif() + target_link_libraries(pcre2_jit_test ${PCRE2_JIT_TEST_LIBS}) + endif() + + # ================================================= + # Write out a CTest configuration file + # + file( + WRITE + ${PROJECT_BINARY_DIR}/CTestCustom.ctest + "# This is a generated file. +MESSAGE(\"When testing is complete, review test output in the +\\\"${PROJECT_BINARY_DIR}/Testing/Temporary\\\" folder.\") +MESSAGE(\" \") +" + ) + + file( + WRITE + ${PROJECT_BINARY_DIR}/pcre2_test.sh + "#! /bin/sh +# This is a generated file. +srcdir=${PROJECT_SOURCE_DIR} +pcre2test=${PROJECT_BINARY_DIR}/pcre2test +test -z \"$CMAKE_CONFIG_TYPE\" || pcre2test=${PROJECT_BINARY_DIR}/$CMAKE_CONFIG_TYPE/pcre2test +. ${PROJECT_SOURCE_DIR}/RunTest +if test \"$?\" != \"0\"; then exit 1; fi +# End +" + ) + + if(UNIX) + add_test(pcre2_test sh ${PROJECT_BINARY_DIR}/pcre2_test.sh) + endif() + + if(PCRE2_BUILD_PCRE2GREP) + file( + WRITE + ${PROJECT_BINARY_DIR}/pcre2_grep_test.sh + "#! /bin/sh +# This is a generated file. +srcdir=${PROJECT_SOURCE_DIR} +pcre2grep=${PROJECT_BINARY_DIR}/pcre2grep +test -z \"$CMAKE_CONFIG_TYPE\" || pcre2grep=${PROJECT_BINARY_DIR}/$CMAKE_CONFIG_TYPE/pcre2grep +pcre2test=${PROJECT_BINARY_DIR}/pcre2test +test -z \"$CMAKE_CONFIG_TYPE\" || pcre2test=${PROJECT_BINARY_DIR}/$CMAKE_CONFIG_TYPE/pcre2test +. ${PROJECT_SOURCE_DIR}/RunGrepTest +if test \"$?\" != \"0\"; then exit 1; fi +# End +" + ) + + if(UNIX) + add_test(pcre2_grep_test sh ${PROJECT_BINARY_DIR}/pcre2_grep_test.sh) + endif() + endif() + + if(WIN32) + # Provide environment for executing the bat file version of RunTest + file(TO_NATIVE_PATH ${PROJECT_SOURCE_DIR} winsrc) + file(TO_NATIVE_PATH ${PROJECT_BINARY_DIR} winbin) + + file( + WRITE + ${PROJECT_BINARY_DIR}/pcre2_test.bat + "\@REM This is a generated file. +\@echo off +setlocal +SET srcdir=\"${winsrc}\" +SET pcre2test=\"${winbin}\\pcre2test.exe\" +if not [%CMAKE_CONFIG_TYPE%]==[] SET pcre2test=\"${winbin}\\%CMAKE_CONFIG_TYPE%\\pcre2test.exe\" +call %srcdir%\\RunTest.bat +if errorlevel 1 exit /b 1 +echo RunTest.bat tests successfully completed +" + ) + + add_test(NAME pcre2_test_bat COMMAND pcre2_test.bat) + set_tests_properties(pcre2_test_bat PROPERTIES PASS_REGULAR_EXPRESSION "RunTest\\.bat tests successfully completed") + + if(PCRE2_BUILD_PCRE2GREP) + file( + WRITE + ${PROJECT_BINARY_DIR}/pcre2_grep_test.bat + "\@REM This is a generated file. +\@echo off +setlocal +SET srcdir=\"${winsrc}\" +SET pcre2test=\"${winbin}\\pcre2test.exe\" +if not [%CMAKE_CONFIG_TYPE%]==[] SET pcre2test=\"${winbin}\\%CMAKE_CONFIG_TYPE%\\pcre2test.exe\" +SET pcre2grep=\"${winbin}\\pcre2grep.exe\" +if not [%CMAKE_CONFIG_TYPE%]==[] SET pcre2grep=\"${winbin}\\%CMAKE_CONFIG_TYPE%\\pcre2grep.exe\" +call %srcdir%\\RunGrepTest.bat +if errorlevel 1 exit /b 1 +echo RunGrepTest.bat tests successfully completed +" + ) + + add_test(NAME pcre2_grep_test_bat COMMAND pcre2_grep_test.bat) + set_tests_properties( + pcre2_grep_test_bat + PROPERTIES PASS_REGULAR_EXPRESSION "RunGrepTest\\.bat tests successfully completed" + ) + endif() + + if("$ENV{OSTYPE}" STREQUAL "msys") + # Both the sh and bat file versions of RunTest are run if make test is used + # in msys + add_test(pcre2_test_sh sh.exe ${PROJECT_BINARY_DIR}/pcre2_test.sh) + if(PCRE2_BUILD_PCRE2GREP) + add_test(pcre2_grep_test sh.exe ${PROJECT_BINARY_DIR}/pcre2_grep_test.sh) + endif() + endif() + endif() + + # Changed to accommodate testing whichever location was just built + + if(PCRE2_SUPPORT_JIT) + add_test(pcre2_jit_test pcre2_jit_test) + endif() + + if(PCRE2_BUILD_PCRE2_8) + add_test(pcre2posix_test pcre2posix_test) + endif() +endif() + # Installation -SET(CMAKE_INSTALL_ALWAYS 1) +set(CMAKE_INSTALL_ALWAYS 1) -INSTALL(TARGETS ${targets} - RUNTIME DESTINATION bin - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) -INSTALL(FILES ${pkg_config_files} DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig) -INSTALL(FILES "${CMAKE_CURRENT_BINARY_DIR}/pcre2-config" +install( + TARGETS ${TARGETS} + RUNTIME DESTINATION bin + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} +) +install(FILES ${pkg_config_files} DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig) +install( + FILES "${CMAKE_CURRENT_BINARY_DIR}/pcre2-config" DESTINATION bin # Set 0755 permissions - PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE) + PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE +) -INSTALL(FILES ${PCRE2_HEADERS} ${PCRE2POSIX_HEADERS} DESTINATION include) +install(FILES ${PCRE2_HEADERS} ${PCRE2POSIX_HEADERS} DESTINATION include) # CMake config files. -set(PCRE2_CONFIG_IN ${CMAKE_CURRENT_SOURCE_DIR}/cmake/pcre2-config.cmake.in) +set(PCRE2_CONFIG_IN ${CMAKE_CURRENT_SOURCE_DIR}/cmake/pcre2-config.cmake.in) set(PCRE2_CONFIG_OUT ${CMAKE_CURRENT_BINARY_DIR}/cmake/pcre2-config.cmake) configure_file(${PCRE2_CONFIG_IN} ${PCRE2_CONFIG_OUT} @ONLY) -set(PCRE2_CONFIG_VERSION_IN ${CMAKE_CURRENT_SOURCE_DIR}/cmake/pcre2-config-version.cmake.in) +set(PCRE2_CONFIG_VERSION_IN ${CMAKE_CURRENT_SOURCE_DIR}/cmake/pcre2-config-version.cmake.in) set(PCRE2_CONFIG_VERSION_OUT ${CMAKE_CURRENT_BINARY_DIR}/cmake/pcre2-config-version.cmake) configure_file(${PCRE2_CONFIG_VERSION_IN} ${PCRE2_CONFIG_VERSION_OUT} @ONLY) -install(FILES ${PCRE2_CONFIG_OUT} ${PCRE2_CONFIG_VERSION_OUT} DESTINATION cmake) +install(FILES ${PCRE2_CONFIG_OUT} ${PCRE2_CONFIG_VERSION_OUT} DESTINATION "${PCRE2_INSTALL_CMAKEDIR}") -IF(MSVC AND INSTALL_MSVC_PDB) - INSTALL(FILES ${dll_pdb_files} DESTINATION bin CONFIGURATIONS RelWithDebInfo) - INSTALL(FILES ${dll_pdb_debug_files} DESTINATION bin CONFIGURATIONS Debug) -ENDIF(MSVC AND INSTALL_MSVC_PDB) +file( + GLOB txts + AUTHORS.md + LICENCE.md + SECURITY.md +) + +install(FILES ${txts} DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/doc/pcre2) + +if(MSVC AND INSTALL_MSVC_PDB) + install(FILES ${DLL_PDB_FILES} DESTINATION bin CONFIGURATIONS RelWithDebInfo) + install(FILES ${DLL_PDB_DEBUG_FILES} DESTINATION bin CONFIGURATIONS Debug) +endif() # Help, only for nice output -IF(BUILD_STATIC_LIBS) - SET(BUILD_STATIC_LIBS ON) -ELSE(BUILD_STATIC_LIBS) - SET(BUILD_STATIC_LIBS OFF) -ENDIF(BUILD_STATIC_LIBS) +if(BUILD_STATIC_LIBS) + set(BUILD_STATIC_LIBS ON) +else() + set(BUILD_STATIC_LIBS OFF) +endif() -IF(PCRE2_HEAP_MATCH_RECURSE) - MESSAGE(WARNING "HEAP_MATCH_RECURSE is obsolete and does nothing.") -ENDIF(PCRE2_HEAP_MATCH_RECURSE) +if(PCRE2_HEAP_MATCH_RECURSE) + message(WARNING "HEAP_MATCH_RECURSE is obsolete and does nothing.") +endif() -IF(PCRE2_SHOW_REPORT) - STRING(TOUPPER "${CMAKE_BUILD_TYPE}" buildtype) - IF (CMAKE_C_FLAGS) - SET(cfsp " ") - ENDIF(CMAKE_C_FLAGS) - MESSAGE(STATUS "") - MESSAGE(STATUS "") - MESSAGE(STATUS "PCRE2-${PCRE2_MAJOR}.${PCRE2_MINOR} configuration summary:") - MESSAGE(STATUS "") - MESSAGE(STATUS " Install prefix .................. : ${CMAKE_INSTALL_PREFIX}") - MESSAGE(STATUS " C compiler ...................... : ${CMAKE_C_COMPILER}") - MESSAGE(STATUS " C compiler flags ................ : ${CMAKE_C_FLAGS}${cfsp}${CMAKE_C_FLAGS_${buildtype}}") - MESSAGE(STATUS "") - MESSAGE(STATUS " Build 8 bit PCRE2 library ....... : ${PCRE2_BUILD_PCRE2_8}") - MESSAGE(STATUS " Build 16 bit PCRE2 library ...... : ${PCRE2_BUILD_PCRE2_16}") - MESSAGE(STATUS " Build 32 bit PCRE2 library ...... : ${PCRE2_BUILD_PCRE2_32}") - MESSAGE(STATUS " Enable Unicode support .......... : ${PCRE2_SUPPORT_UNICODE}") - MESSAGE(STATUS " Newline char/sequence ........... : ${PCRE2_NEWLINE}") - MESSAGE(STATUS " \\R matches only ANYCRLF ......... : ${PCRE2_SUPPORT_BSR_ANYCRLF}") - MESSAGE(STATUS " \\C is disabled .................. : ${PCRE2_NEVER_BACKSLASH_C}") - MESSAGE(STATUS " EBCDIC coding ................... : ${PCRE2_EBCDIC}") - MESSAGE(STATUS " EBCDIC coding with NL=0x25 ...... : ${PCRE2_EBCDIC_NL25}") - MESSAGE(STATUS " Rebuild char tables ............. : ${PCRE2_REBUILD_CHARTABLES}") - MESSAGE(STATUS " Internal link size .............. : ${PCRE2_LINK_SIZE}") - MESSAGE(STATUS " Maximum variable lookbehind ..... : ${PCRE2_MAX_VARLOOKBEHIND}") - MESSAGE(STATUS " Parentheses nest limit .......... : ${PCRE2_PARENS_NEST_LIMIT}") - MESSAGE(STATUS " Heap limit ...................... : ${PCRE2_HEAP_LIMIT}") - MESSAGE(STATUS " Match limit ..................... : ${PCRE2_MATCH_LIMIT}") - MESSAGE(STATUS " Match depth limit ............... : ${PCRE2_MATCH_LIMIT_DEPTH}") - MESSAGE(STATUS " Build shared libs ............... : ${BUILD_SHARED_LIBS}") - MESSAGE(STATUS " Build static libs ............... : ${BUILD_STATIC_LIBS}") - MESSAGE(STATUS " with PIC enabled ............. : ${PCRE2_STATIC_PIC}") - MESSAGE(STATUS " Support Valgrind .................: ${PCRE2_SUPPORT_VALGRIND}") - IF(PCRE2_DISABLE_PERCENT_ZT) - MESSAGE(STATUS " Use %zu and %td ..................: OFF" ) - ELSE(PCRE2_DISABLE_PERCENT_ZT) - MESSAGE(STATUS " Use %zu and %td ..................: AUTO" ) - ENDIF(PCRE2_DISABLE_PERCENT_ZT) +if(PCRE2_SHOW_REPORT) + message(STATUS "") + message(STATUS "") + message(STATUS "PCRE2-${PCRE2_MAJOR}.${PCRE2_MINOR} configuration summary:") + message(STATUS "") + message(STATUS " Install prefix .................... : ${CMAKE_INSTALL_PREFIX}") + message(STATUS " C compiler ........................ : ${CMAKE_C_COMPILER}") - IF(MINGW AND BUILD_SHARED_LIBS) - MESSAGE(STATUS " Non-standard dll names (prefix) . : ${NON_STANDARD_LIB_PREFIX}") - MESSAGE(STATUS " Non-standard dll names (suffix) . : ${NON_STANDARD_LIB_SUFFIX}") - ENDIF(MINGW AND BUILD_SHARED_LIBS) + if(CMAKE_C_FLAGS) + set(CFSP " ") + endif() + if(CMAKE_CONFIGURATION_TYPES) + foreach(config IN LISTS CMAKE_CONFIGURATION_TYPES) + string(TOUPPER "${config}" buildtype) + string(LENGTH " (${config})" buildtypelen) + math(EXPR dotslen "18 - ${buildtypelen}") + string(REPEAT "." ${dotslen} dots) + message(STATUS " C compiler flags (${config}) ${dots} : ${CMAKE_C_FLAGS}${CFSP}${CMAKE_C_FLAGS_${buildtype}}") + endforeach() + else() + string(TOUPPER "${CMAKE_BUILD_TYPE}" buildtype) + message(STATUS " C compiler flags .................. : ${CMAKE_C_FLAGS}${CFSP}${CMAKE_C_FLAGS_${buildtype}}") + endif() - IF(MSVC) - MESSAGE(STATUS " Install MSVC .pdb files ..........: ${INSTALL_MSVC_PDB}") - ENDIF(MSVC) + message(STATUS "") + if(CMAKE_CONFIGURATION_TYPES) + message(STATUS " Build configurations .............. : ${CMAKE_CONFIGURATION_TYPES}") + else() + message(STATUS " Build type ........................ : ${CMAKE_BUILD_TYPE}") + endif() + message(STATUS " Build 8 bit PCRE2 library ......... : ${PCRE2_BUILD_PCRE2_8}") + message(STATUS " Build 16 bit PCRE2 library ........ : ${PCRE2_BUILD_PCRE2_16}") + message(STATUS " Build 32 bit PCRE2 library ........ : ${PCRE2_BUILD_PCRE2_32}") + message(STATUS " Include debugging code ............ : ${PCRE2_DEBUG}") + message(STATUS " Enable JIT compiling support ...... : ${PCRE2_SUPPORT_JIT}") + message(STATUS " Use SELinux allocator in JIT ...... : ${PCRE2_SUPPORT_JIT_SEALLOC}") + message(STATUS " Enable Unicode support ............ : ${PCRE2_SUPPORT_UNICODE}") + message(STATUS " Newline char/sequence ............. : ${PCRE2_NEWLINE}") + message(STATUS " \\R matches only ANYCRLF ........... : ${PCRE2_SUPPORT_BSR_ANYCRLF}") + message(STATUS " \\C is disabled .................... : ${PCRE2_NEVER_BACKSLASH_C}") + message(STATUS " EBCDIC coding ..................... : ${PCRE2_EBCDIC}") + message(STATUS " EBCDIC coding with NL=0x25 ........ : ${PCRE2_EBCDIC_NL25}") + message(STATUS " Rebuild char tables ............... : ${PCRE2_REBUILD_CHARTABLES}") + message(STATUS " Internal link size ................ : ${PCRE2_LINK_SIZE}") + message(STATUS " Maximum variable lookbehind ....... : ${PCRE2_MAX_VARLOOKBEHIND}") + message(STATUS " Parentheses nest limit ............ : ${PCRE2_PARENS_NEST_LIMIT}") + message(STATUS " Heap limit ........................ : ${PCRE2_HEAP_LIMIT}") + message(STATUS " Match limit ....................... : ${PCRE2_MATCH_LIMIT}") + message(STATUS " Match depth limit ................. : ${PCRE2_MATCH_LIMIT_DEPTH}") + message(STATUS " Build shared libs ................. : ${BUILD_SHARED_LIBS}") + message(STATUS " Build static libs ................. : ${BUILD_STATIC_LIBS}") + message(STATUS " with PIC enabled ............... : ${PCRE2_STATIC_PIC}") + message(STATUS " Build pcre2grep ................... : ${PCRE2_BUILD_PCRE2GREP}") + message(STATUS " Enable JIT in pcre2grep ........... : ${PCRE2GREP_SUPPORT_JIT}") + message(STATUS " Enable callouts in pcre2grep ...... : ${PCRE2GREP_SUPPORT_CALLOUT}") + message(STATUS " Enable callout fork in pcre2grep .. : ${PCRE2GREP_SUPPORT_CALLOUT_FORK}") + message(STATUS " Buffer size for pcre2grep ......... : ${PCRE2GREP_BUFSIZE}") + message(STATUS " Build tests (implies pcre2test .... : ${PCRE2_BUILD_TESTS}") + message(STATUS " and pcre2grep)") + if(ZLIB_FOUND) + message(STATUS " Link pcre2grep with libz .......... : ${PCRE2_SUPPORT_LIBZ}") + else() + message(STATUS " Link pcre2grep with libz .......... : Library not found") + endif() + if(BZIP2_FOUND) + message(STATUS " Link pcre2grep with libbz2 ........ : ${PCRE2_SUPPORT_LIBBZ2}") + else() + message(STATUS " Link pcre2grep with libbz2 ........ : Library not found") + endif() + if(EDITLINE_FOUND) + message(STATUS " Link pcre2test with libeditline ... : ${PCRE2_SUPPORT_LIBEDIT}") + else() + message(STATUS " Link pcre2test with libeditline ... : Library not found") + endif() + if(READLINE_FOUND) + message(STATUS " Link pcre2test with libreadline ... : ${PCRE2_SUPPORT_LIBREADLINE}") + else() + message(STATUS " Link pcre2test with libreadline ... : Library not found") + endif() + message(STATUS " Support Valgrind .................. : ${PCRE2_SUPPORT_VALGRIND}") + if(PCRE2_DISABLE_PERCENT_ZT) + message(STATUS " Use %zu and %td ................... : OFF") + else() + message(STATUS " Use %zu and %td ................... : AUTO") + endif() - MESSAGE(STATUS "") -ENDIF(PCRE2_SHOW_REPORT) + if(MINGW AND BUILD_SHARED_LIBS) + message(STATUS " Non-standard dll names (prefix) ... : ${NON_STANDARD_LIB_PREFIX}") + message(STATUS " Non-standard dll names (suffix) ... : ${NON_STANDARD_LIB_SUFFIX}") + endif() + + if(MSVC) + message(STATUS " Install MSVC .pdb files ........... : ${INSTALL_MSVC_PDB}") + endif() + + message(STATUS "") +endif() # end CMakeLists.txt diff --git a/libpcre/LICENCE b/libpcre/LICENCE.md similarity index 55% rename from libpcre/LICENCE rename to libpcre/LICENCE.md index 3c1ef032d..f58ceb75a 100644 --- a/libpcre/LICENCE +++ b/libpcre/LICENCE.md @@ -1,5 +1,8 @@ -PCRE2 LICENCE -------------- +PCRE2 License +============= + +| SPDX-License-Identifier: | BSD-3-Clause WITH PCRE2-exception | +|---------|-------| PCRE2 is a library of functions to support regular expressions whose syntax and semantics are as close as possible to those of the Perl 5 language. @@ -16,40 +19,46 @@ optimize pattern matching. This is an optional feature that can be omitted when the library is built. -THE BASIC LIBRARY FUNCTIONS ---------------------------- +COPYRIGHT +--------- -Written by: Philip Hazel -Email local part: Philip.Hazel -Email domain: gmail.com +### The basic library functions -Retired from University of Cambridge Computing Service, -Cambridge, England. + Written by: Philip Hazel + Email local part: Philip.Hazel + Email domain: gmail.com -Copyright (c) 1997-2024 University of Cambridge -All rights reserved. + Retired from University of Cambridge Computing Service, + Cambridge, England. + Copyright (c) 1997-2007 University of Cambridge + Copyright (c) 2007-2024 Philip Hazel + All rights reserved. -PCRE2 JUST-IN-TIME COMPILATION SUPPORT --------------------------------------- +### PCRE2 Just-In-Time compilation support -Written by: Zoltan Herczeg -Email local part: hzmester -Email domain: freemail.hu + Written by: Zoltan Herczeg + Email local part: hzmester + Email domain: freemail.hu -Copyright(c) 2010-2024 Zoltan Herczeg -All rights reserved. + Copyright (c) 2010-2024 Zoltan Herczeg + All rights reserved. +### Stack-less Just-In-Time compiler -STACK-LESS JUST-IN-TIME COMPILER --------------------------------- + Written by: Zoltan Herczeg + Email local part: hzmester + Email domain: freemail.hu -Written by: Zoltan Herczeg -Email local part: hzmester -Email domain: freemail.hu + Copyright (c) 2009-2024 Zoltan Herczeg + All rights reserved. -Copyright(c) 2009-2024 Zoltan Herczeg -All rights reserved. +### All other contributions + +Many other contributors have participated in the authorship of PCRE2. As PCRE2 +has never required a Contributor Licensing Agreement, or other copyright +assignment agreement, all contributions have copyright retained by each +original contributor or their employer. THE "BSD" LICENCE @@ -58,16 +67,16 @@ THE "BSD" LICENCE Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright notices, - this list of conditions and the following disclaimer. +* Redistributions of source code must retain the above copyright notices, + this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notices, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. +* Redistributions in binary form must reproduce the above copyright + notices, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. - * Neither the name of the University of Cambridge nor the names of any - contributors may be used to endorse or promote products derived from this - software without specific prior written permission. +* Neither the name of the University of Cambridge nor the names of any + contributors may be used to endorse or promote products derived from this + software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE diff --git a/libpcre/MODULE.bazel b/libpcre/MODULE.bazel new file mode 100644 index 000000000..8f4c0b46c --- /dev/null +++ b/libpcre/MODULE.bazel @@ -0,0 +1,9 @@ +module( + name = "pcre2", + version = "10.45", + compatibility_level = 1, +) + +bazel_dep(name = "rules_cc", version = "0.0.1") +bazel_dep(name = "bazel_skylib", version = "1.2.1") +bazel_dep(name = "platforms", version = "0.0.4") diff --git a/libpcre/Makefile.am b/libpcre/Makefile.am index f2762493f..721ca204d 100644 --- a/libpcre/Makefile.am +++ b/libpcre/Makefile.am @@ -10,12 +10,9 @@ AM_CPPFLAGS="-I$(srcdir)/src" ## Specify the documentation files that are distributed. dist_doc_DATA = \ - AUTHORS \ - LICENCE - -dist_html_DATA = - -dist_man_MANS = + AUTHORS.md \ + LICENCE.md \ + SECURITY.md # The Libtool libraries to install. We'll add to this later. @@ -35,9 +32,19 @@ bin_PROGRAMS = noinst_PROGRAMS = # Additional files to delete on 'make clean', 'make distclean', -# and 'make maintainer-clean'. +# and 'make maintainer-clean'. It turns out that the default is to delete only +# those binaries that *this* configuration has created. If the configuration +# has been changed, some binaries may not get automatically deleted. Therefore +# we list them here. + +CLEANFILES = \ + pcre2_dftables \ + pcre2_jit_test \ + pcre2fuzzcheck-8 \ + pcre2fuzzcheck-16 \ + pcre2fuzzcheck-32 \ + pcre2demo -CLEANFILES = DISTCLEANFILES = src/config.h.in~ MAINTAINERCLEANFILES = @@ -51,6 +58,22 @@ EXTRA_DIST = EXTRA_DIST += \ m4/ax_pthread.m4 m4/pcre2_visibility.m4 +# These are support files for building with Bazel or Zig + +EXTRA_DIST += \ + BUILD.bazel \ + MODULE.bazel \ + WORKSPACE.bazel \ + build.zig + +# These are support files for building under VMS + +EXTRA_DIST += \ + vms/configure.com \ + vms/openvms_readme.txt \ + vms/pcre2.h_patch \ + vms/stdint.h + # These files are usable versions of pcre2.h and config.h that are distributed # for the benefit of people who are building PCRE2 manually, without the # Autotools support. @@ -135,6 +158,8 @@ COMMON_SOURCES = \ src/pcre2_auto_possess.c \ src/pcre2_chkdint.c \ src/pcre2_compile.c \ + src/pcre2_compile.h \ + src/pcre2_compile_class.c \ src/pcre2_config.c \ src/pcre2_context.c \ src/pcre2_convert.c \ @@ -144,6 +169,7 @@ COMMON_SOURCES = \ src/pcre2_find_bracket.c \ src/pcre2_internal.h \ src/pcre2_intmodedep.h \ + src/pcre2_jit_char_inc.h \ src/pcre2_jit_compile.c \ src/pcre2_maketables.c \ src/pcre2_match.c \ @@ -160,6 +186,7 @@ COMMON_SOURCES = \ src/pcre2_tables.c \ src/pcre2_ucd.c \ src/pcre2_ucp.h \ + src/pcre2_util.h \ src/pcre2_valid_utf.c \ src/pcre2_xclass.c @@ -215,6 +242,7 @@ endif # WITH_PCRE2_32 EXTRA_DIST += src/pcre2_chartables.c.dist CLEANFILES += src/pcre2_chartables.c + if WITH_PCRE2_8 libpcre2_8_la_LDFLAGS = $(EXTRA_LIBPCRE2_8_LDFLAGS) endif # WITH_PCRE2_8 @@ -264,6 +292,10 @@ libpcre2_posix_la_CFLAGS += $(GCOV_CFLAGS) endif # WITH_GCOV endif # WITH_PCRE2_8 +## If fuzzer support is enabled, build a non-distributed library containing the +## fuzzing function. Also build the standalone checking binary from the same +## source but using -DSTANDALONE. + # gcov/lcov code coverage reporting # # Coverage reporting targets: @@ -367,7 +399,8 @@ endif # WITH_GCOV EXTRA_DIST += \ cmake/COPYING-CMAKE-SCRIPTS \ - cmake/FindPackageHandleStandardArgs.cmake \ + cmake/FindEditline.cmake \ + cmake/FindReadline.cmake \ cmake/pcre2-config-version.cmake.in \ cmake/pcre2-config.cmake.in \ CMakeLists.txt \ diff --git a/libpcre/Makefile.in b/libpcre/Makefile.in index 512d833db..ccde2c261 100644 --- a/libpcre/Makefile.in +++ b/libpcre/Makefile.in @@ -110,16 +110,17 @@ noinst_PROGRAMS = $(am__EXEEXT_1) @WITH_GCOV_FALSE@am__append_13 = src/*.gcda src/*.gcno subdir = . ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 -am__aclocal_m4_deps = $(top_srcdir)/m4/libtool.m4 \ - $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ - $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ +am__aclocal_m4_deps = $(top_srcdir)/m4/ax_pthread.m4 \ + $(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \ + $(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \ + $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/pcre2_visibility.m4 \ $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(top_srcdir)/configure \ $(am__configure_deps) $(dist_noinst_SCRIPTS) $(dist_doc_DATA) \ - $(dist_html_DATA) $(include_HEADERS) $(am__DIST_COMMON) + $(include_HEADERS) $(am__DIST_COMMON) am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \ configure.lineno config.status.lineno mkinstalldirs = $(install_sh) -d @@ -128,8 +129,7 @@ CONFIG_CLEAN_FILES = pcre2-config src/pcre2.h CONFIG_CLEAN_VPATH_FILES = am__installdirs = "$(DESTDIR)$(bindir)" "$(DESTDIR)$(libdir)" \ "$(DESTDIR)$(bindir)" "$(DESTDIR)$(docdir)" \ - "$(DESTDIR)$(htmldir)" "$(DESTDIR)$(includedir)" \ - "$(DESTDIR)$(includedir)" + "$(DESTDIR)$(includedir)" "$(DESTDIR)$(includedir)" @WITH_REBUILD_CHARTABLES_TRUE@am__EXEEXT_1 = pcre2_dftables$(EXEEXT) PROGRAMS = $(bin_PROGRAMS) $(noinst_PROGRAMS) am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; @@ -162,22 +162,24 @@ am__uninstall_files_from_dir = { \ LTLIBRARIES = $(lib_LTLIBRARIES) libpcre2_16_la_DEPENDENCIES = am__libpcre2_16_la_SOURCES_DIST = src/pcre2_auto_possess.c \ - src/pcre2_chkdint.c src/pcre2_compile.c src/pcre2_config.c \ + src/pcre2_chkdint.c src/pcre2_compile.c src/pcre2_compile.h \ + src/pcre2_compile_class.c src/pcre2_config.c \ src/pcre2_context.c src/pcre2_convert.c src/pcre2_dfa_match.c \ src/pcre2_error.c src/pcre2_extuni.c src/pcre2_find_bracket.c \ src/pcre2_internal.h src/pcre2_intmodedep.h \ - src/pcre2_jit_compile.c src/pcre2_maketables.c \ - src/pcre2_match.c src/pcre2_match_data.c src/pcre2_newline.c \ - src/pcre2_ord2utf.c src/pcre2_pattern_info.c \ - src/pcre2_script_run.c src/pcre2_serialize.c \ - src/pcre2_string_utils.c src/pcre2_study.c \ - src/pcre2_substitute.c src/pcre2_substring.c \ + src/pcre2_jit_char_inc.h src/pcre2_jit_compile.c \ + src/pcre2_maketables.c src/pcre2_match.c \ + src/pcre2_match_data.c src/pcre2_newline.c src/pcre2_ord2utf.c \ + src/pcre2_pattern_info.c src/pcre2_script_run.c \ + src/pcre2_serialize.c src/pcre2_string_utils.c \ + src/pcre2_study.c src/pcre2_substitute.c src/pcre2_substring.c \ src/pcre2_tables.c src/pcre2_ucd.c src/pcre2_ucp.h \ - src/pcre2_valid_utf.c src/pcre2_xclass.c + src/pcre2_util.h src/pcre2_valid_utf.c src/pcre2_xclass.c am__dirstamp = $(am__leading_dot)dirstamp am__objects_1 = src/libpcre2_16_la-pcre2_auto_possess.lo \ src/libpcre2_16_la-pcre2_chkdint.lo \ src/libpcre2_16_la-pcre2_compile.lo \ + src/libpcre2_16_la-pcre2_compile_class.lo \ src/libpcre2_16_la-pcre2_config.lo \ src/libpcre2_16_la-pcre2_context.lo \ src/libpcre2_16_la-pcre2_convert.lo \ @@ -218,21 +220,23 @@ libpcre2_16_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ @WITH_PCRE2_16_TRUE@am_libpcre2_16_la_rpath = -rpath $(libdir) libpcre2_32_la_DEPENDENCIES = am__libpcre2_32_la_SOURCES_DIST = src/pcre2_auto_possess.c \ - src/pcre2_chkdint.c src/pcre2_compile.c src/pcre2_config.c \ + src/pcre2_chkdint.c src/pcre2_compile.c src/pcre2_compile.h \ + src/pcre2_compile_class.c src/pcre2_config.c \ src/pcre2_context.c src/pcre2_convert.c src/pcre2_dfa_match.c \ src/pcre2_error.c src/pcre2_extuni.c src/pcre2_find_bracket.c \ src/pcre2_internal.h src/pcre2_intmodedep.h \ - src/pcre2_jit_compile.c src/pcre2_maketables.c \ - src/pcre2_match.c src/pcre2_match_data.c src/pcre2_newline.c \ - src/pcre2_ord2utf.c src/pcre2_pattern_info.c \ - src/pcre2_script_run.c src/pcre2_serialize.c \ - src/pcre2_string_utils.c src/pcre2_study.c \ - src/pcre2_substitute.c src/pcre2_substring.c \ + src/pcre2_jit_char_inc.h src/pcre2_jit_compile.c \ + src/pcre2_maketables.c src/pcre2_match.c \ + src/pcre2_match_data.c src/pcre2_newline.c src/pcre2_ord2utf.c \ + src/pcre2_pattern_info.c src/pcre2_script_run.c \ + src/pcre2_serialize.c src/pcre2_string_utils.c \ + src/pcre2_study.c src/pcre2_substitute.c src/pcre2_substring.c \ src/pcre2_tables.c src/pcre2_ucd.c src/pcre2_ucp.h \ - src/pcre2_valid_utf.c src/pcre2_xclass.c + src/pcre2_util.h src/pcre2_valid_utf.c src/pcre2_xclass.c am__objects_3 = src/libpcre2_32_la-pcre2_auto_possess.lo \ src/libpcre2_32_la-pcre2_chkdint.lo \ src/libpcre2_32_la-pcre2_compile.lo \ + src/libpcre2_32_la-pcre2_compile_class.lo \ src/libpcre2_32_la-pcre2_config.lo \ src/libpcre2_32_la-pcre2_context.lo \ src/libpcre2_32_la-pcre2_convert.lo \ @@ -269,21 +273,23 @@ libpcre2_32_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ @WITH_PCRE2_32_TRUE@am_libpcre2_32_la_rpath = -rpath $(libdir) libpcre2_8_la_DEPENDENCIES = am__libpcre2_8_la_SOURCES_DIST = src/pcre2_auto_possess.c \ - src/pcre2_chkdint.c src/pcre2_compile.c src/pcre2_config.c \ + src/pcre2_chkdint.c src/pcre2_compile.c src/pcre2_compile.h \ + src/pcre2_compile_class.c src/pcre2_config.c \ src/pcre2_context.c src/pcre2_convert.c src/pcre2_dfa_match.c \ src/pcre2_error.c src/pcre2_extuni.c src/pcre2_find_bracket.c \ src/pcre2_internal.h src/pcre2_intmodedep.h \ - src/pcre2_jit_compile.c src/pcre2_maketables.c \ - src/pcre2_match.c src/pcre2_match_data.c src/pcre2_newline.c \ - src/pcre2_ord2utf.c src/pcre2_pattern_info.c \ - src/pcre2_script_run.c src/pcre2_serialize.c \ - src/pcre2_string_utils.c src/pcre2_study.c \ - src/pcre2_substitute.c src/pcre2_substring.c \ + src/pcre2_jit_char_inc.h src/pcre2_jit_compile.c \ + src/pcre2_maketables.c src/pcre2_match.c \ + src/pcre2_match_data.c src/pcre2_newline.c src/pcre2_ord2utf.c \ + src/pcre2_pattern_info.c src/pcre2_script_run.c \ + src/pcre2_serialize.c src/pcre2_string_utils.c \ + src/pcre2_study.c src/pcre2_substitute.c src/pcre2_substring.c \ src/pcre2_tables.c src/pcre2_ucd.c src/pcre2_ucp.h \ - src/pcre2_valid_utf.c src/pcre2_xclass.c + src/pcre2_util.h src/pcre2_valid_utf.c src/pcre2_xclass.c am__objects_5 = src/libpcre2_8_la-pcre2_auto_possess.lo \ src/libpcre2_8_la-pcre2_chkdint.lo \ src/libpcre2_8_la-pcre2_compile.lo \ + src/libpcre2_8_la-pcre2_compile_class.lo \ src/libpcre2_8_la-pcre2_config.lo \ src/libpcre2_8_la-pcre2_context.lo \ src/libpcre2_8_la-pcre2_convert.lo \ @@ -353,6 +359,7 @@ am__depfiles_remade = \ src/$(DEPDIR)/libpcre2_16_la-pcre2_chartables.Plo \ src/$(DEPDIR)/libpcre2_16_la-pcre2_chkdint.Plo \ src/$(DEPDIR)/libpcre2_16_la-pcre2_compile.Plo \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_compile_class.Plo \ src/$(DEPDIR)/libpcre2_16_la-pcre2_config.Plo \ src/$(DEPDIR)/libpcre2_16_la-pcre2_context.Plo \ src/$(DEPDIR)/libpcre2_16_la-pcre2_convert.Plo \ @@ -381,6 +388,7 @@ am__depfiles_remade = \ src/$(DEPDIR)/libpcre2_32_la-pcre2_chartables.Plo \ src/$(DEPDIR)/libpcre2_32_la-pcre2_chkdint.Plo \ src/$(DEPDIR)/libpcre2_32_la-pcre2_compile.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_compile_class.Plo \ src/$(DEPDIR)/libpcre2_32_la-pcre2_config.Plo \ src/$(DEPDIR)/libpcre2_32_la-pcre2_context.Plo \ src/$(DEPDIR)/libpcre2_32_la-pcre2_convert.Plo \ @@ -409,6 +417,7 @@ am__depfiles_remade = \ src/$(DEPDIR)/libpcre2_8_la-pcre2_chartables.Plo \ src/$(DEPDIR)/libpcre2_8_la-pcre2_chkdint.Plo \ src/$(DEPDIR)/libpcre2_8_la-pcre2_compile.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_compile_class.Plo \ src/$(DEPDIR)/libpcre2_8_la-pcre2_config.Plo \ src/$(DEPDIR)/libpcre2_8_la-pcre2_context.Plo \ src/$(DEPDIR)/libpcre2_8_la-pcre2_convert.Plo \ @@ -468,7 +477,7 @@ am__can_run_installinfo = \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac -DATA = $(dist_doc_DATA) $(dist_html_DATA) +DATA = $(dist_doc_DATA) HEADERS = $(include_HEADERS) $(nodist_include_HEADERS) am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) # Read a list of newline-separated strings from the standard input, @@ -663,11 +672,10 @@ am__set_b = \ *) \ b='$*';; \ esac -am__DIST_COMMON = $(dist_man_MANS) $(srcdir)/Makefile.in \ - $(srcdir)/pcre2-config.in $(top_srcdir)/src/config.h.in \ - $(top_srcdir)/src/pcre2.h.in AUTHORS ar-lib compile \ - config.guess config.sub depcomp install-sh ltmain.sh missing \ - test-driver +am__DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/pcre2-config.in \ + $(top_srcdir)/src/config.h.in $(top_srcdir)/src/pcre2.h.in \ + AUTHORS.md ar-lib compile config.guess config.sub depcomp \ + install-sh ltmain.sh missing test-driver DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) distdir = $(PACKAGE)-$(VERSION) top_distdir = $(distdir) @@ -678,9 +686,9 @@ am__remove_distdir = \ || { sleep 5 && rm -rf "$(distdir)"; }; \ else :; fi am__post_remove_distdir = $(am__remove_distdir) -DIST_ARCHIVES = $(distdir).tar.gz +DIST_ARCHIVES = $(distdir).tar.gz $(distdir).tar.bz2 $(distdir).zip GZIP_ENV = --best -DIST_TARGETS = dist-gzip +DIST_TARGETS = dist-bzip2 dist-gzip dist-zip # Exists only to be overridden by the user if desired. AM_DISTCHECK_DVI_TARGET = dvi distuninstallcheck_listfiles = find . -type f -print @@ -721,6 +729,7 @@ EXTRA_LIBPCRE2_32_LDFLAGS = @EXTRA_LIBPCRE2_32_LDFLAGS@ EXTRA_LIBPCRE2_8_LDFLAGS = @EXTRA_LIBPCRE2_8_LDFLAGS@ EXTRA_LIBPCRE2_POSIX_LDFLAGS = @EXTRA_LIBPCRE2_POSIX_LDFLAGS@ FGREP = @FGREP@ +FILECMD = @FILECMD@ GCOV_CFLAGS = @GCOV_CFLAGS@ GCOV_CXXFLAGS = @GCOV_CXXFLAGS@ GCOV_LIBS = @GCOV_LIBS@ @@ -735,14 +744,18 @@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ LCOV = @LCOV@ LD = @LD@ LDFLAGS = @LDFLAGS@ +LIBBZ2 = @LIBBZ2@ LIBOBJS = @LIBOBJS@ +LIBREADLINE = @LIBREADLINE@ LIBS = @LIBS@ LIBTOOL = @LIBTOOL@ +LIBZ = @LIBZ@ LIB_POSTFIX = @LIB_POSTFIX@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAINT = @MAINT@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ @@ -766,15 +779,22 @@ PCRE2_MAJOR = @PCRE2_MAJOR@ PCRE2_MINOR = @PCRE2_MINOR@ PCRE2_PRERELEASE = @PCRE2_PRERELEASE@ PCRE2_STATIC_CFLAG = @PCRE2_STATIC_CFLAG@ +PKG_CONFIG = @PKG_CONFIG@ +PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ +PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ RANLIB = @RANLIB@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SHTOOL = @SHTOOL@ STRIP = @STRIP@ +VALGRIND_CFLAGS = @VALGRIND_CFLAGS@ +VALGRIND_LIBS = @VALGRIND_LIBS@ VERSION = @VERSION@ VISIBILITY_CFLAGS = @VISIBILITY_CFLAGS@ -VISIBILITY_CXXFLAGS = @VISIBILITY_CXXFLAGS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ @@ -787,6 +807,7 @@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ +ax_pthread_config = @ax_pthread_config@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ @@ -835,11 +856,10 @@ AUTOMAKE_OPTIONS = subdir-objects ACLOCAL_AMFLAGS = -I m4 AM_CPPFLAGS = "-I$(srcdir)/src" dist_doc_DATA = \ - AUTHORS \ - LICENCE + AUTHORS.md \ + LICENCE.md \ + SECURITY.md -dist_html_DATA = -dist_man_MANS = # The Libtool libraries to install. We'll add to this later. lib_LTLIBRARIES = $(am__append_2) $(am__append_3) $(am__append_4) \ @@ -848,8 +868,13 @@ check_SCRIPTS = dist_noinst_SCRIPTS = # Additional files to delete on 'make clean', 'make distclean', -# and 'make maintainer-clean'. -CLEANFILES = src/pcre2_chartables.c +# and 'make maintainer-clean'. It turns out that the default is to delete only +# those binaries that *this* configuration has created. If the configuration +# has been changed, some binaries may not get automatically deleted. Therefore +# we list them here. +CLEANFILES = pcre2_dftables pcre2_jit_test pcre2fuzzcheck-8 \ + pcre2fuzzcheck-16 pcre2fuzzcheck-32 pcre2demo \ + src/pcre2_chartables.c DISTCLEANFILES = src/config.h.in~ $(am__append_13) MAINTAINERCLEANFILES = src/pcre2.h.generic src/config.h.generic @@ -858,6 +883,10 @@ MAINTAINERCLEANFILES = src/pcre2.h.generic src/config.h.generic # These files contain additional m4 macros that are used by autoconf. +# These are support files for building with Bazel or Zig + +# These are support files for building under VMS + # These files are usable versions of pcre2.h and config.h that are distributed # for the benefit of people who are building PCRE2 manually, without the # Autotools support. @@ -866,10 +895,12 @@ MAINTAINERCLEANFILES = src/pcre2.h.generic src/config.h.generic # The pcre2_chartables.c.dist file is the default version of # pcre2_chartables.c, used unless --enable-rebuild-chartables is specified. -EXTRA_DIST = m4/ax_pthread.m4 m4/pcre2_visibility.m4 \ +EXTRA_DIST = m4/ax_pthread.m4 m4/pcre2_visibility.m4 BUILD.bazel \ + MODULE.bazel WORKSPACE.bazel build.zig vms/configure.com \ + vms/openvms_readme.txt vms/pcre2.h_patch vms/stdint.h \ src/pcre2.h.generic src/config.h.generic src/pcre2_ucptables.c \ src/pcre2_chartables.c.dist cmake/COPYING-CMAKE-SCRIPTS \ - cmake/FindPackageHandleStandardArgs.cmake \ + cmake/FindEditline.cmake cmake/FindReadline.cmake \ cmake/pcre2-config-version.cmake.in \ cmake/pcre2-config.cmake.in CMakeLists.txt config-cmake.h.in @@ -887,6 +918,8 @@ COMMON_SOURCES = \ src/pcre2_auto_possess.c \ src/pcre2_chkdint.c \ src/pcre2_compile.c \ + src/pcre2_compile.h \ + src/pcre2_compile_class.c \ src/pcre2_config.c \ src/pcre2_context.c \ src/pcre2_convert.c \ @@ -896,6 +929,7 @@ COMMON_SOURCES = \ src/pcre2_find_bracket.c \ src/pcre2_internal.h \ src/pcre2_intmodedep.h \ + src/pcre2_jit_char_inc.h \ src/pcre2_jit_compile.c \ src/pcre2_maketables.c \ src/pcre2_match.c \ @@ -912,6 +946,7 @@ COMMON_SOURCES = \ src/pcre2_tables.c \ src/pcre2_ucd.c \ src/pcre2_ucp.h \ + src/pcre2_util.h \ src/pcre2_valid_utf.c \ src/pcre2_xclass.c @@ -991,7 +1026,7 @@ all: $(BUILT_SOURCES) .SUFFIXES: .c .lo .log .o .obj .test .test$(EXEEXT) .trs am--refresh: Makefile @: -$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ @@ -1017,9 +1052,9 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) $(SHELL) ./config.status --recheck -$(top_srcdir)/configure: $(am__configure_deps) +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) $(am__cd) $(srcdir) && $(AUTOCONF) -$(ACLOCAL_M4): $(am__aclocal_m4_deps) +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) $(am__cd) $(srcdir) && $(ACLOCAL) $(ACLOCAL_AMFLAGS) $(am__aclocal_m4_deps): @@ -1030,7 +1065,7 @@ src/config.h: src/stamp-h1 src/stamp-h1: $(top_srcdir)/src/config.h.in $(top_builddir)/config.status @rm -f src/stamp-h1 cd $(top_builddir) && $(SHELL) ./config.status src/config.h -$(top_srcdir)/src/config.h.in: $(am__configure_deps) +$(top_srcdir)/src/config.h.in: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) ($(am__cd) $(top_srcdir) && $(AUTOHEADER)) rm -f src/stamp-h1 touch $@ @@ -1146,6 +1181,8 @@ src/libpcre2_16_la-pcre2_chkdint.lo: src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/libpcre2_16_la-pcre2_compile.lo: src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_compile_class.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) src/libpcre2_16_la-pcre2_config.lo: src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/libpcre2_16_la-pcre2_context.lo: src/$(am__dirstamp) \ @@ -1205,6 +1242,8 @@ src/libpcre2_32_la-pcre2_chkdint.lo: src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/libpcre2_32_la-pcre2_compile.lo: src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_compile_class.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) src/libpcre2_32_la-pcre2_config.lo: src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/libpcre2_32_la-pcre2_context.lo: src/$(am__dirstamp) \ @@ -1264,6 +1303,8 @@ src/libpcre2_8_la-pcre2_chkdint.lo: src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/libpcre2_8_la-pcre2_compile.lo: src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_compile_class.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) src/libpcre2_8_la-pcre2_config.lo: src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/libpcre2_8_la-pcre2_context.lo: src/$(am__dirstamp) \ @@ -1376,6 +1417,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_chartables.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_chkdint.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_compile.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_compile_class.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_config.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_context.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_convert.Plo@am__quote@ # am--include-marker @@ -1404,6 +1446,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_chartables.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_chkdint.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_compile.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_compile_class.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_config.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_context.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_convert.Plo@am__quote@ # am--include-marker @@ -1432,6 +1475,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_chartables.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_chkdint.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_compile.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_compile_class.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_config.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_context.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_convert.Plo@am__quote@ # am--include-marker @@ -1510,6 +1554,13 @@ src/libpcre2_16_la-pcre2_compile.lo: src/pcre2_compile.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_compile.lo `test -f 'src/pcre2_compile.c' || echo '$(srcdir)/'`src/pcre2_compile.c +src/libpcre2_16_la-pcre2_compile_class.lo: src/pcre2_compile_class.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_compile_class.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_compile_class.Tpo -c -o src/libpcre2_16_la-pcre2_compile_class.lo `test -f 'src/pcre2_compile_class.c' || echo '$(srcdir)/'`src/pcre2_compile_class.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_compile_class.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_compile_class.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_compile_class.c' object='src/libpcre2_16_la-pcre2_compile_class.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_compile_class.lo `test -f 'src/pcre2_compile_class.c' || echo '$(srcdir)/'`src/pcre2_compile_class.c + src/libpcre2_16_la-pcre2_config.lo: src/pcre2_config.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_config.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_config.Tpo -c -o src/libpcre2_16_la-pcre2_config.lo `test -f 'src/pcre2_config.c' || echo '$(srcdir)/'`src/pcre2_config.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_config.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_config.Plo @@ -1706,6 +1757,13 @@ src/libpcre2_32_la-pcre2_compile.lo: src/pcre2_compile.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_compile.lo `test -f 'src/pcre2_compile.c' || echo '$(srcdir)/'`src/pcre2_compile.c +src/libpcre2_32_la-pcre2_compile_class.lo: src/pcre2_compile_class.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_compile_class.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_compile_class.Tpo -c -o src/libpcre2_32_la-pcre2_compile_class.lo `test -f 'src/pcre2_compile_class.c' || echo '$(srcdir)/'`src/pcre2_compile_class.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_compile_class.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_compile_class.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_compile_class.c' object='src/libpcre2_32_la-pcre2_compile_class.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_compile_class.lo `test -f 'src/pcre2_compile_class.c' || echo '$(srcdir)/'`src/pcre2_compile_class.c + src/libpcre2_32_la-pcre2_config.lo: src/pcre2_config.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_config.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_config.Tpo -c -o src/libpcre2_32_la-pcre2_config.lo `test -f 'src/pcre2_config.c' || echo '$(srcdir)/'`src/pcre2_config.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_config.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_config.Plo @@ -1902,6 +1960,13 @@ src/libpcre2_8_la-pcre2_compile.lo: src/pcre2_compile.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_compile.lo `test -f 'src/pcre2_compile.c' || echo '$(srcdir)/'`src/pcre2_compile.c +src/libpcre2_8_la-pcre2_compile_class.lo: src/pcre2_compile_class.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_compile_class.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_compile_class.Tpo -c -o src/libpcre2_8_la-pcre2_compile_class.lo `test -f 'src/pcre2_compile_class.c' || echo '$(srcdir)/'`src/pcre2_compile_class.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_compile_class.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_compile_class.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_compile_class.c' object='src/libpcre2_8_la-pcre2_compile_class.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_compile_class.lo `test -f 'src/pcre2_compile_class.c' || echo '$(srcdir)/'`src/pcre2_compile_class.c + src/libpcre2_8_la-pcre2_config.lo: src/pcre2_config.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_config.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_config.Tpo -c -o src/libpcre2_8_la-pcre2_config.lo `test -f 'src/pcre2_config.c' || echo '$(srcdir)/'`src/pcre2_config.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_config.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_config.Plo @@ -2114,27 +2179,6 @@ uninstall-dist_docDATA: @list='$(dist_doc_DATA)'; test -n "$(docdir)" || list=; \ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ dir='$(DESTDIR)$(docdir)'; $(am__uninstall_files_from_dir) -install-dist_htmlDATA: $(dist_html_DATA) - @$(NORMAL_INSTALL) - @list='$(dist_html_DATA)'; test -n "$(htmldir)" || list=; \ - if test -n "$$list"; then \ - echo " $(MKDIR_P) '$(DESTDIR)$(htmldir)'"; \ - $(MKDIR_P) "$(DESTDIR)$(htmldir)" || exit 1; \ - fi; \ - for p in $$list; do \ - if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ - echo "$$d$$p"; \ - done | $(am__base_list) | \ - while read files; do \ - echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(htmldir)'"; \ - $(INSTALL_DATA) $$files "$(DESTDIR)$(htmldir)" || exit $$?; \ - done - -uninstall-dist_htmlDATA: - @$(NORMAL_UNINSTALL) - @list='$(dist_html_DATA)'; test -n "$(htmldir)" || list=; \ - files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ - dir='$(DESTDIR)$(htmldir)'; $(am__uninstall_files_from_dir) install-includeHEADERS: $(include_HEADERS) @$(NORMAL_INSTALL) @list='$(include_HEADERS)'; test -n "$(includedir)" || list=; \ @@ -2437,7 +2481,6 @@ distdir-am: $(DISTFILES) dist-gzip: distdir tardir=$(distdir) && $(am__tar) | eval GZIP= gzip $(GZIP_ENV) -c >$(distdir).tar.gz $(am__post_remove_distdir) - dist-bzip2: distdir tardir=$(distdir) && $(am__tar) | BZIP2=$${BZIP2--9} bzip2 -c >$(distdir).tar.bz2 $(am__post_remove_distdir) @@ -2467,7 +2510,6 @@ dist-shar: distdir @echo WARNING: "It will be removed altogether in Automake 2.0" >&2 shar $(distdir) | eval GZIP= gzip $(GZIP_ENV) -c >$(distdir).shar.gz $(am__post_remove_distdir) - dist-zip: distdir -rm -f $(distdir).zip zip -rq $(distdir).zip $(distdir) @@ -2574,7 +2616,7 @@ all-am: Makefile $(PROGRAMS) $(LTLIBRARIES) $(SCRIPTS) $(DATA) \ install-binPROGRAMS: install-libLTLIBRARIES installdirs: - for dir in "$(DESTDIR)$(bindir)" "$(DESTDIR)$(libdir)" "$(DESTDIR)$(bindir)" "$(DESTDIR)$(docdir)" "$(DESTDIR)$(htmldir)" "$(DESTDIR)$(includedir)" "$(DESTDIR)$(includedir)"; do \ + for dir in "$(DESTDIR)$(bindir)" "$(DESTDIR)$(libdir)" "$(DESTDIR)$(bindir)" "$(DESTDIR)$(docdir)" "$(DESTDIR)$(includedir)" "$(DESTDIR)$(includedir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: $(BUILT_SOURCES) @@ -2630,6 +2672,7 @@ distclean: distclean-am -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_chartables.Plo -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_chkdint.Plo -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_compile.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_compile_class.Plo -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_config.Plo -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_context.Plo -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_convert.Plo @@ -2658,6 +2701,7 @@ distclean: distclean-am -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_chartables.Plo -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_chkdint.Plo -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_compile.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_compile_class.Plo -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_config.Plo -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_context.Plo -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_convert.Plo @@ -2686,6 +2730,7 @@ distclean: distclean-am -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_chartables.Plo -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_chkdint.Plo -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_compile.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_compile_class.Plo -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_config.Plo -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_context.Plo -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_convert.Plo @@ -2728,8 +2773,8 @@ info: info-am info-am: -install-data-am: install-dist_docDATA install-dist_htmlDATA \ - install-includeHEADERS install-nodist_includeHEADERS +install-data-am: install-dist_docDATA install-includeHEADERS \ + install-nodist_includeHEADERS install-dvi: install-dvi-am @@ -2765,6 +2810,7 @@ maintainer-clean: maintainer-clean-am -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_chartables.Plo -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_chkdint.Plo -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_compile.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_compile_class.Plo -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_config.Plo -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_context.Plo -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_convert.Plo @@ -2793,6 +2839,7 @@ maintainer-clean: maintainer-clean-am -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_chartables.Plo -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_chkdint.Plo -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_compile.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_compile_class.Plo -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_config.Plo -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_context.Plo -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_convert.Plo @@ -2821,6 +2868,7 @@ maintainer-clean: maintainer-clean-am -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_chartables.Plo -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_chkdint.Plo -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_compile.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_compile_class.Plo -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_config.Plo -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_context.Plo -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_convert.Plo @@ -2864,9 +2912,8 @@ ps: ps-am ps-am: uninstall-am: uninstall-binPROGRAMS uninstall-binSCRIPTS \ - uninstall-dist_docDATA uninstall-dist_htmlDATA \ - uninstall-includeHEADERS uninstall-libLTLIBRARIES \ - uninstall-nodist_includeHEADERS + uninstall-dist_docDATA uninstall-includeHEADERS \ + uninstall-libLTLIBRARIES uninstall-nodist_includeHEADERS .MAKE: all check check-am install install-am install-exec \ install-strip @@ -2882,10 +2929,10 @@ uninstall-am: uninstall-binPROGRAMS uninstall-binSCRIPTS \ distcleancheck distdir distuninstallcheck dvi dvi-am html \ html-am info info-am install install-am install-binPROGRAMS \ install-binSCRIPTS install-data install-data-am \ - install-dist_docDATA install-dist_htmlDATA install-dvi \ - install-dvi-am install-exec install-exec-am install-html \ - install-html-am install-includeHEADERS install-info \ - install-info-am install-libLTLIBRARIES install-man \ + install-dist_docDATA install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am \ + install-includeHEADERS install-info install-info-am \ + install-libLTLIBRARIES install-man \ install-nodist_includeHEADERS install-pdf install-pdf-am \ install-ps install-ps-am install-strip installcheck \ installcheck-am installdirs maintainer-clean \ @@ -2893,9 +2940,8 @@ uninstall-am: uninstall-binPROGRAMS uninstall-binSCRIPTS \ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ recheck tags tags-am uninstall uninstall-am \ uninstall-binPROGRAMS uninstall-binSCRIPTS \ - uninstall-dist_docDATA uninstall-dist_htmlDATA \ - uninstall-includeHEADERS uninstall-libLTLIBRARIES \ - uninstall-nodist_includeHEADERS + uninstall-dist_docDATA uninstall-includeHEADERS \ + uninstall-libLTLIBRARIES uninstall-nodist_includeHEADERS .PRECIOUS: Makefile diff --git a/libpcre/NMAP_MODIFICATIONS b/libpcre/NMAP_MODIFICATIONS index 1b560b138..bb695e116 100644 --- a/libpcre/NMAP_MODIFICATIONS +++ b/libpcre/NMAP_MODIFICATIONS @@ -8,7 +8,7 @@ o Started this NMAP_MODIFICATIONS file o Removed these directories: doc testdata -src/sljit/ +deps o Removed these files: src/pcre2test.c @@ -18,11 +18,11 @@ src/pcre2grep.c src/pcre2demo.c src/pcre2_fuzzsupport.c src/pcre2_printint.c +src/pcre2_jit_char_inc.h src/pcre2_jit_match.c src/pcre2_jit_misc.c src/pcre2_jit_neon_inc.h src/pcre2_jit_simd_inc.h -src/pcre2_jit_test.c RunGrepTest.bat RunGrepTest RunTest @@ -34,18 +34,14 @@ libpcre2-8.pc.in libpcre2-posix.pc.in cmake/FindEditline.cmake cmake/FindReadline.cmake -Detrail -PrepareRelease -CheckMan -132html -CleanTxt ChangeLog NEWS HACKING -COPYING [Look at LICENCE] +COPYING [Look at LICENCE.md] README INSTALL NON-AUTOTOOLS-BUILDS o Made relevant changes to Makefile.am, configure.ac, CMakeLists.txt, and - src/pcre2_jit_compile.c to accommodate our smaller, non-JIT build. + src/pcre2_jit_compile.c to accommodate our smaller, non-JIT build + and to disable Unicode support by default. diff --git a/libpcre/SECURITY.md b/libpcre/SECURITY.md new file mode 100644 index 000000000..1e3a05b9a --- /dev/null +++ b/libpcre/SECURITY.md @@ -0,0 +1,44 @@ +# Security policies + +## Release security + +The PCRE2 project provides source-only releases, with no binaries. + +These source releases can be downloaded from the +[GitHub Releases](https://github.com/PCRE2Project/pcre2/releases) page. Each +release file is GPG-signed. + +* Releases up to and including 10.44 are signed by Philip Hazel (GPG key: + 45F68D54BBE23FB3039B46E59766E084FB0F43D8) +* Releases from 10.45 onwards will be signed by Nicholas Wilson (GPG key: + A95536204A3BB489715231282A98E77EB6F24CA8, cross-signed by Philip + Hazel's key for release continuity) + +From releases 10.45 onwards, the source code will additionally be provided via +Git checkout of the (GPG-signed) release tag. + +Please contact the maintainers for any queries about release integrity or the +project's supply-chain. + +## Reporting vulnerabilities + +The PCRE2 project prioritises security. We appreciate third-party testing and +security research, and would be grateful if you could responsibly disclose your +findings to us. We will make every effort to acknowledge your contributions. + +To report a security issue, please use the GitHub Security Advisory +["Report a Vulnerability"](https://github.com/PCRE2Project/pcre2/security/advisories/new) +tab. (Alternatively, if you prefer you may send a GPG-encrypted email to one of +the maintainers.) + +### Timeline + +As a very small volunteer team, we cannot guarantee rapid response, but would +aim to respond within 1 week, or perhaps 2 during holidays. + +### Response procedure + +PCRE2 has never previously made a rapid or embargoed release in response to a +security incident. We would work with security managers from trusted downstream +distributors, such as major Linux distributions, before disclosing the +vulnerability publicly. diff --git a/libpcre/WORKSPACE.bazel b/libpcre/WORKSPACE.bazel new file mode 100644 index 000000000..4ce2c8ced --- /dev/null +++ b/libpcre/WORKSPACE.bazel @@ -0,0 +1 @@ +# See MODULE.bazel diff --git a/libpcre/aclocal.m4 b/libpcre/aclocal.m4 index 5fca658cb..d7c332c20 100644 --- a/libpcre/aclocal.m4 +++ b/libpcre/aclocal.m4 @@ -20,6 +20,350 @@ You have another version of autoconf. It may work, but is not guaranteed to. If you have problems, you may need to regenerate the build system entirely. To do so, use the procedure documented by the package, typically 'autoreconf'.])]) +# pkg.m4 - Macros to locate and use pkg-config. -*- Autoconf -*- +# serial 12 (pkg-config-0.29.2) + +dnl Copyright © 2004 Scott James Remnant . +dnl Copyright © 2012-2015 Dan Nicholson +dnl +dnl This program is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU General Public License as published by +dnl the Free Software Foundation; either version 2 of the License, or +dnl (at your option) any later version. +dnl +dnl This program is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl General Public License for more details. +dnl +dnl You should have received a copy of the GNU General Public License +dnl along with this program; if not, write to the Free Software +dnl Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +dnl 02111-1307, USA. +dnl +dnl As a special exception to the GNU General Public License, if you +dnl distribute this file as part of a program that contains a +dnl configuration script generated by Autoconf, you may include it under +dnl the same distribution terms that you use for the rest of that +dnl program. + +dnl PKG_PREREQ(MIN-VERSION) +dnl ----------------------- +dnl Since: 0.29 +dnl +dnl Verify that the version of the pkg-config macros are at least +dnl MIN-VERSION. Unlike PKG_PROG_PKG_CONFIG, which checks the user's +dnl installed version of pkg-config, this checks the developer's version +dnl of pkg.m4 when generating configure. +dnl +dnl To ensure that this macro is defined, also add: +dnl m4_ifndef([PKG_PREREQ], +dnl [m4_fatal([must install pkg-config 0.29 or later before running autoconf/autogen])]) +dnl +dnl See the "Since" comment for each macro you use to see what version +dnl of the macros you require. +m4_defun([PKG_PREREQ], +[m4_define([PKG_MACROS_VERSION], [0.29.2]) +m4_if(m4_version_compare(PKG_MACROS_VERSION, [$1]), -1, + [m4_fatal([pkg.m4 version $1 or higher is required but ]PKG_MACROS_VERSION[ found])]) +])dnl PKG_PREREQ + +dnl PKG_PROG_PKG_CONFIG([MIN-VERSION]) +dnl ---------------------------------- +dnl Since: 0.16 +dnl +dnl Search for the pkg-config tool and set the PKG_CONFIG variable to +dnl first found in the path. Checks that the version of pkg-config found +dnl is at least MIN-VERSION. If MIN-VERSION is not specified, 0.9.0 is +dnl used since that's the first version where most current features of +dnl pkg-config existed. +AC_DEFUN([PKG_PROG_PKG_CONFIG], +[m4_pattern_forbid([^_?PKG_[A-Z_]+$]) +m4_pattern_allow([^PKG_CONFIG(_(PATH|LIBDIR|SYSROOT_DIR|ALLOW_SYSTEM_(CFLAGS|LIBS)))?$]) +m4_pattern_allow([^PKG_CONFIG_(DISABLE_UNINSTALLED|TOP_BUILD_DIR|DEBUG_SPEW)$]) +AC_ARG_VAR([PKG_CONFIG], [path to pkg-config utility]) +AC_ARG_VAR([PKG_CONFIG_PATH], [directories to add to pkg-config's search path]) +AC_ARG_VAR([PKG_CONFIG_LIBDIR], [path overriding pkg-config's built-in search path]) + +if test "x$ac_cv_env_PKG_CONFIG_set" != "xset"; then + AC_PATH_TOOL([PKG_CONFIG], [pkg-config]) +fi +if test -n "$PKG_CONFIG"; then + _pkg_min_version=m4_default([$1], [0.9.0]) + AC_MSG_CHECKING([pkg-config is at least version $_pkg_min_version]) + if $PKG_CONFIG --atleast-pkgconfig-version $_pkg_min_version; then + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + PKG_CONFIG="" + fi +fi[]dnl +])dnl PKG_PROG_PKG_CONFIG + +dnl PKG_CHECK_EXISTS(MODULES, [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND]) +dnl ------------------------------------------------------------------- +dnl Since: 0.18 +dnl +dnl Check to see whether a particular set of modules exists. Similar to +dnl PKG_CHECK_MODULES(), but does not set variables or print errors. +dnl +dnl Please remember that m4 expands AC_REQUIRE([PKG_PROG_PKG_CONFIG]) +dnl only at the first occurrence in configure.ac, so if the first place +dnl it's called might be skipped (such as if it is within an "if", you +dnl have to call PKG_CHECK_EXISTS manually +AC_DEFUN([PKG_CHECK_EXISTS], +[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl +if test -n "$PKG_CONFIG" && \ + AC_RUN_LOG([$PKG_CONFIG --exists --print-errors "$1"]); then + m4_default([$2], [:]) +m4_ifvaln([$3], [else + $3])dnl +fi]) + +dnl _PKG_CONFIG([VARIABLE], [COMMAND], [MODULES]) +dnl --------------------------------------------- +dnl Internal wrapper calling pkg-config via PKG_CONFIG and setting +dnl pkg_failed based on the result. +m4_define([_PKG_CONFIG], +[if test -n "$$1"; then + pkg_cv_[]$1="$$1" + elif test -n "$PKG_CONFIG"; then + PKG_CHECK_EXISTS([$3], + [pkg_cv_[]$1=`$PKG_CONFIG --[]$2 "$3" 2>/dev/null` + test "x$?" != "x0" && pkg_failed=yes ], + [pkg_failed=yes]) + else + pkg_failed=untried +fi[]dnl +])dnl _PKG_CONFIG + +dnl _PKG_SHORT_ERRORS_SUPPORTED +dnl --------------------------- +dnl Internal check to see if pkg-config supports short errors. +AC_DEFUN([_PKG_SHORT_ERRORS_SUPPORTED], +[AC_REQUIRE([PKG_PROG_PKG_CONFIG]) +if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then + _pkg_short_errors_supported=yes +else + _pkg_short_errors_supported=no +fi[]dnl +])dnl _PKG_SHORT_ERRORS_SUPPORTED + + +dnl PKG_CHECK_MODULES(VARIABLE-PREFIX, MODULES, [ACTION-IF-FOUND], +dnl [ACTION-IF-NOT-FOUND]) +dnl -------------------------------------------------------------- +dnl Since: 0.4.0 +dnl +dnl Note that if there is a possibility the first call to +dnl PKG_CHECK_MODULES might not happen, you should be sure to include an +dnl explicit call to PKG_PROG_PKG_CONFIG in your configure.ac +AC_DEFUN([PKG_CHECK_MODULES], +[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl +AC_ARG_VAR([$1][_CFLAGS], [C compiler flags for $1, overriding pkg-config])dnl +AC_ARG_VAR([$1][_LIBS], [linker flags for $1, overriding pkg-config])dnl + +pkg_failed=no +AC_MSG_CHECKING([for $2]) + +_PKG_CONFIG([$1][_CFLAGS], [cflags], [$2]) +_PKG_CONFIG([$1][_LIBS], [libs], [$2]) + +m4_define([_PKG_TEXT], [Alternatively, you may set the environment variables $1[]_CFLAGS +and $1[]_LIBS to avoid the need to call pkg-config. +See the pkg-config man page for more details.]) + +if test $pkg_failed = yes; then + AC_MSG_RESULT([no]) + _PKG_SHORT_ERRORS_SUPPORTED + if test $_pkg_short_errors_supported = yes; then + $1[]_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "$2" 2>&1` + else + $1[]_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "$2" 2>&1` + fi + # Put the nasty error message in config.log where it belongs + echo "$$1[]_PKG_ERRORS" >&AS_MESSAGE_LOG_FD + + m4_default([$4], [AC_MSG_ERROR( +[Package requirements ($2) were not met: + +$$1_PKG_ERRORS + +Consider adjusting the PKG_CONFIG_PATH environment variable if you +installed software in a non-standard prefix. + +_PKG_TEXT])[]dnl + ]) +elif test $pkg_failed = untried; then + AC_MSG_RESULT([no]) + m4_default([$4], [AC_MSG_FAILURE( +[The pkg-config script could not be found or is too old. Make sure it +is in your PATH or set the PKG_CONFIG environment variable to the full +path to pkg-config. + +_PKG_TEXT + +To get pkg-config, see .])[]dnl + ]) +else + $1[]_CFLAGS=$pkg_cv_[]$1[]_CFLAGS + $1[]_LIBS=$pkg_cv_[]$1[]_LIBS + AC_MSG_RESULT([yes]) + $3 +fi[]dnl +])dnl PKG_CHECK_MODULES + + +dnl PKG_CHECK_MODULES_STATIC(VARIABLE-PREFIX, MODULES, [ACTION-IF-FOUND], +dnl [ACTION-IF-NOT-FOUND]) +dnl --------------------------------------------------------------------- +dnl Since: 0.29 +dnl +dnl Checks for existence of MODULES and gathers its build flags with +dnl static libraries enabled. Sets VARIABLE-PREFIX_CFLAGS from --cflags +dnl and VARIABLE-PREFIX_LIBS from --libs. +dnl +dnl Note that if there is a possibility the first call to +dnl PKG_CHECK_MODULES_STATIC might not happen, you should be sure to +dnl include an explicit call to PKG_PROG_PKG_CONFIG in your +dnl configure.ac. +AC_DEFUN([PKG_CHECK_MODULES_STATIC], +[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl +_save_PKG_CONFIG=$PKG_CONFIG +PKG_CONFIG="$PKG_CONFIG --static" +PKG_CHECK_MODULES($@) +PKG_CONFIG=$_save_PKG_CONFIG[]dnl +])dnl PKG_CHECK_MODULES_STATIC + + +dnl PKG_INSTALLDIR([DIRECTORY]) +dnl ------------------------- +dnl Since: 0.27 +dnl +dnl Substitutes the variable pkgconfigdir as the location where a module +dnl should install pkg-config .pc files. By default the directory is +dnl $libdir/pkgconfig, but the default can be changed by passing +dnl DIRECTORY. The user can override through the --with-pkgconfigdir +dnl parameter. +AC_DEFUN([PKG_INSTALLDIR], +[m4_pushdef([pkg_default], [m4_default([$1], ['${libdir}/pkgconfig'])]) +m4_pushdef([pkg_description], + [pkg-config installation directory @<:@]pkg_default[@:>@]) +AC_ARG_WITH([pkgconfigdir], + [AS_HELP_STRING([--with-pkgconfigdir], pkg_description)],, + [with_pkgconfigdir=]pkg_default) +AC_SUBST([pkgconfigdir], [$with_pkgconfigdir]) +m4_popdef([pkg_default]) +m4_popdef([pkg_description]) +])dnl PKG_INSTALLDIR + + +dnl PKG_NOARCH_INSTALLDIR([DIRECTORY]) +dnl -------------------------------- +dnl Since: 0.27 +dnl +dnl Substitutes the variable noarch_pkgconfigdir as the location where a +dnl module should install arch-independent pkg-config .pc files. By +dnl default the directory is $datadir/pkgconfig, but the default can be +dnl changed by passing DIRECTORY. The user can override through the +dnl --with-noarch-pkgconfigdir parameter. +AC_DEFUN([PKG_NOARCH_INSTALLDIR], +[m4_pushdef([pkg_default], [m4_default([$1], ['${datadir}/pkgconfig'])]) +m4_pushdef([pkg_description], + [pkg-config arch-independent installation directory @<:@]pkg_default[@:>@]) +AC_ARG_WITH([noarch-pkgconfigdir], + [AS_HELP_STRING([--with-noarch-pkgconfigdir], pkg_description)],, + [with_noarch_pkgconfigdir=]pkg_default) +AC_SUBST([noarch_pkgconfigdir], [$with_noarch_pkgconfigdir]) +m4_popdef([pkg_default]) +m4_popdef([pkg_description]) +])dnl PKG_NOARCH_INSTALLDIR + + +dnl PKG_CHECK_VAR(VARIABLE, MODULE, CONFIG-VARIABLE, +dnl [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND]) +dnl ------------------------------------------- +dnl Since: 0.28 +dnl +dnl Retrieves the value of the pkg-config variable for the given module. +AC_DEFUN([PKG_CHECK_VAR], +[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl +AC_ARG_VAR([$1], [value of $3 for $2, overriding pkg-config])dnl + +_PKG_CONFIG([$1], [variable="][$3]["], [$2]) +AS_VAR_COPY([$1], [pkg_cv_][$1]) + +AS_VAR_IF([$1], [""], [$5], [$4])dnl +])dnl PKG_CHECK_VAR + +dnl PKG_WITH_MODULES(VARIABLE-PREFIX, MODULES, +dnl [ACTION-IF-FOUND],[ACTION-IF-NOT-FOUND], +dnl [DESCRIPTION], [DEFAULT]) +dnl ------------------------------------------ +dnl +dnl Prepare a "--with-" configure option using the lowercase +dnl [VARIABLE-PREFIX] name, merging the behaviour of AC_ARG_WITH and +dnl PKG_CHECK_MODULES in a single macro. +AC_DEFUN([PKG_WITH_MODULES], +[ +m4_pushdef([with_arg], m4_tolower([$1])) + +m4_pushdef([description], + [m4_default([$5], [build with ]with_arg[ support])]) + +m4_pushdef([def_arg], [m4_default([$6], [auto])]) +m4_pushdef([def_action_if_found], [AS_TR_SH([with_]with_arg)=yes]) +m4_pushdef([def_action_if_not_found], [AS_TR_SH([with_]with_arg)=no]) + +m4_case(def_arg, + [yes],[m4_pushdef([with_without], [--without-]with_arg)], + [m4_pushdef([with_without],[--with-]with_arg)]) + +AC_ARG_WITH(with_arg, + AS_HELP_STRING(with_without, description[ @<:@default=]def_arg[@:>@]),, + [AS_TR_SH([with_]with_arg)=def_arg]) + +AS_CASE([$AS_TR_SH([with_]with_arg)], + [yes],[PKG_CHECK_MODULES([$1],[$2],$3,$4)], + [auto],[PKG_CHECK_MODULES([$1],[$2], + [m4_n([def_action_if_found]) $3], + [m4_n([def_action_if_not_found]) $4])]) + +m4_popdef([with_arg]) +m4_popdef([description]) +m4_popdef([def_arg]) + +])dnl PKG_WITH_MODULES + +dnl PKG_HAVE_WITH_MODULES(VARIABLE-PREFIX, MODULES, +dnl [DESCRIPTION], [DEFAULT]) +dnl ----------------------------------------------- +dnl +dnl Convenience macro to trigger AM_CONDITIONAL after PKG_WITH_MODULES +dnl check._[VARIABLE-PREFIX] is exported as make variable. +AC_DEFUN([PKG_HAVE_WITH_MODULES], +[ +PKG_WITH_MODULES([$1],[$2],,,[$3],[$4]) + +AM_CONDITIONAL([HAVE_][$1], + [test "$AS_TR_SH([with_]m4_tolower([$1]))" = "yes"]) +])dnl PKG_HAVE_WITH_MODULES + +dnl PKG_HAVE_DEFINE_WITH_MODULES(VARIABLE-PREFIX, MODULES, +dnl [DESCRIPTION], [DEFAULT]) +dnl ------------------------------------------------------ +dnl +dnl Convenience macro to run AM_CONDITIONAL and AC_DEFINE after +dnl PKG_WITH_MODULES check. HAVE_[VARIABLE-PREFIX] is exported as make +dnl and preprocessor variable. +AC_DEFUN([PKG_HAVE_DEFINE_WITH_MODULES], +[ +PKG_HAVE_WITH_MODULES([$1],[$2],[$3],[$4]) + +AS_IF([test "$AS_TR_SH([with_]m4_tolower([$1]))" = "yes"], + [AC_DEFINE([HAVE_][$1], 1, [Enable ]m4_tolower([$1])[ support])]) +])dnl PKG_HAVE_DEFINE_WITH_MODULES + # Copyright (C) 2002-2021 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation @@ -715,6 +1059,42 @@ fi rmdir .tst 2>/dev/null AC_SUBST([am__leading_dot])]) +# Add --enable-maintainer-mode option to configure. -*- Autoconf -*- +# From Jim Meyering + +# Copyright (C) 1996-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_MAINTAINER_MODE([DEFAULT-MODE]) +# ---------------------------------- +# Control maintainer-specific portions of Makefiles. +# Default is to disable them, unless 'enable' is passed literally. +# For symmetry, 'disable' may be passed as well. Anyway, the user +# can override the default with the --enable/--disable switch. +AC_DEFUN([AM_MAINTAINER_MODE], +[m4_case(m4_default([$1], [disable]), + [enable], [m4_define([am_maintainer_other], [disable])], + [disable], [m4_define([am_maintainer_other], [enable])], + [m4_define([am_maintainer_other], [enable]) + m4_warn([syntax], [unexpected argument to AM@&t@_MAINTAINER_MODE: $1])]) +AC_MSG_CHECKING([whether to enable maintainer-specific portions of Makefiles]) + dnl maintainer-mode's default is 'disable' unless 'enable' is passed + AC_ARG_ENABLE([maintainer-mode], + [AS_HELP_STRING([--]am_maintainer_other[-maintainer-mode], + am_maintainer_other[ make rules and dependencies not useful + (and sometimes confusing) to the casual installer])], + [USE_MAINTAINER_MODE=$enableval], + [USE_MAINTAINER_MODE=]m4_if(am_maintainer_other, [enable], [no], [yes])) + AC_MSG_RESULT([$USE_MAINTAINER_MODE]) + AM_CONDITIONAL([MAINTAINER_MODE], [test $USE_MAINTAINER_MODE = yes]) + MAINT=$MAINTAINER_MODE_TRUE + AC_SUBST([MAINT])dnl +] +) + # Check to see how 'make' treats includes. -*- Autoconf -*- # Copyright (C) 2001-2021 Free Software Foundation, Inc. @@ -1208,6 +1588,7 @@ AC_SUBST([am__tar]) AC_SUBST([am__untar]) ]) # _AM_PROG_TAR +m4_include([m4/ax_pthread.m4]) m4_include([m4/libtool.m4]) m4_include([m4/ltoptions.m4]) m4_include([m4/ltsugar.m4]) diff --git a/libpcre/build.zig b/libpcre/build.zig new file mode 100644 index 000000000..4cb6d99d6 --- /dev/null +++ b/libpcre/build.zig @@ -0,0 +1,173 @@ +const std = @import("std"); + +pub const CodeUnitWidth = enum { + @"8", + @"16", + @"32", +}; + +pub fn build(b: *std.Build) !void { + const target = b.standardTargetOptions(.{}); + const optimize = b.standardOptimizeOption(.{}); + const linkage = b.option(std.builtin.LinkMode, "linkage", "whether to statically or dynamically link the library") orelse @as(std.builtin.LinkMode, if (target.result.isGnuLibC()) .dynamic else .static); + const codeUnitWidth = b.option(CodeUnitWidth, "code-unit-width", "Sets the code unit width") orelse .@"8"; + + const pcre2_header_dir = b.addWriteFiles(); + const pcre2_header = pcre2_header_dir.addCopyFile(b.path("src/pcre2.h.generic"), "pcre2.h"); + + const config_header = b.addConfigHeader( + .{ + .style = .{ .cmake = b.path("config-cmake.h.in") }, + .include_path = "config.h", + }, + .{ + .HAVE_ASSERT_H = true, + .HAVE_UNISTD_H = (target.result.os.tag != .windows), + .HAVE_WINDOWS_H = (target.result.os.tag == .windows), + + .HAVE_MEMMOVE = true, + .HAVE_STRERROR = true, + + .SUPPORT_PCRE2_8 = codeUnitWidth == CodeUnitWidth.@"8", + .SUPPORT_PCRE2_16 = codeUnitWidth == CodeUnitWidth.@"16", + .SUPPORT_PCRE2_32 = codeUnitWidth == CodeUnitWidth.@"32", + .SUPPORT_UNICODE = true, + + .PCRE2_EXPORT = null, + .PCRE2_LINK_SIZE = 2, + .PCRE2_HEAP_LIMIT = 20000000, + .PCRE2_MATCH_LIMIT = 10000000, + .PCRE2_MATCH_LIMIT_DEPTH = "MATCH_LIMIT", + .PCRE2_MAX_VARLOOKBEHIND = 255, + .NEWLINE_DEFAULT = 2, + .PCRE2_PARENS_NEST_LIMIT = 250, + }, + ); + + // pcre2-8/16/32.so + + const lib = std.Build.Step.Compile.create(b, .{ + .name = b.fmt("pcre2-{s}", .{@tagName(codeUnitWidth)}), + .root_module = .{ + .target = target, + .optimize = optimize, + .link_libc = true, + }, + .kind = .lib, + .linkage = linkage, + }); + + lib.defineCMacro("HAVE_CONFIG_H", null); + lib.defineCMacro("PCRE2_CODE_UNIT_WIDTH", @tagName(codeUnitWidth)); + if (linkage == .static) { + lib.defineCMacro("PCRE2_STATIC", null); + } + + lib.addConfigHeader(config_header); + lib.addIncludePath(pcre2_header_dir.getDirectory()); + lib.addIncludePath(b.path("src")); + + lib.addCSourceFile(.{ + .file = b.addWriteFiles().addCopyFile(b.path("src/pcre2_chartables.c.dist"), "pcre2_chartables.c"), + }); + + lib.addCSourceFiles(.{ + .files = &.{ + "src/pcre2_auto_possess.c", + "src/pcre2_chkdint.c", + "src/pcre2_compile.c", + "src/pcre2_compile_class.c", + "src/pcre2_config.c", + "src/pcre2_context.c", + "src/pcre2_convert.c", + "src/pcre2_dfa_match.c", + "src/pcre2_error.c", + "src/pcre2_extuni.c", + "src/pcre2_find_bracket.c", + "src/pcre2_jit_compile.c", + "src/pcre2_maketables.c", + "src/pcre2_match.c", + "src/pcre2_match_data.c", + "src/pcre2_newline.c", + "src/pcre2_ord2utf.c", + "src/pcre2_pattern_info.c", + "src/pcre2_script_run.c", + "src/pcre2_serialize.c", + "src/pcre2_string_utils.c", + "src/pcre2_study.c", + "src/pcre2_substitute.c", + "src/pcre2_substring.c", + "src/pcre2_tables.c", + "src/pcre2_ucd.c", + "src/pcre2_valid_utf.c", + "src/pcre2_xclass.c", + }, + }); + + lib.installHeader(pcre2_header, "pcre2.h"); + b.installArtifact(lib); + + + // pcre2test + + const pcre2test = b.addExecutable(.{ + .name = "pcre2test", + .target = target, + .optimize = optimize, + }); + + + // pcre2-posix.so + + if (codeUnitWidth == CodeUnitWidth.@"8") { + const posixLib = std.Build.Step.Compile.create(b, .{ + .name = "pcre2-posix", + .root_module = .{ + .target = target, + .optimize = optimize, + .link_libc = true, + }, + .kind = .lib, + .linkage = linkage, + }); + + posixLib.defineCMacro("HAVE_CONFIG_H", null); + posixLib.defineCMacro("PCRE2_CODE_UNIT_WIDTH", @tagName(codeUnitWidth)); + if (linkage == .static) { + posixLib.defineCMacro("PCRE2_STATIC", null); + } + + posixLib.addConfigHeader(config_header); + posixLib.addIncludePath(pcre2_header_dir.getDirectory()); + posixLib.addIncludePath(b.path("src")); + + posixLib.addCSourceFiles(.{ + .files = &.{ + "src/pcre2posix.c", + }, + }); + + posixLib.installHeader(b.path("src/pcre2posix.h"), "pcre2posix.h"); + b.installArtifact(posixLib); + + pcre2test.linkLibrary(posixLib); + } + + + // pcre2test (again) + + pcre2test.defineCMacro("HAVE_CONFIG_H", null); + + pcre2test.addConfigHeader(config_header); + pcre2test.addIncludePath(pcre2_header_dir.getDirectory()); + pcre2test.addIncludePath(b.path("src")); + + pcre2test.addCSourceFile(.{ + .file = b.path("src/pcre2test.c"), + }); + + pcre2test.linkLibC(); + pcre2test.linkLibrary(lib); + + b.installArtifact(pcre2test); +} diff --git a/libpcre/cmake/COPYING-CMAKE-SCRIPTS b/libpcre/cmake/COPYING-CMAKE-SCRIPTS index 4b417765f..53b6b71eb 100644 --- a/libpcre/cmake/COPYING-CMAKE-SCRIPTS +++ b/libpcre/cmake/COPYING-CMAKE-SCRIPTS @@ -7,7 +7,7 @@ are met: 2. Redistributions in binary form must reproduce the copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. -3. The name of the author may not be used to endorse or promote products +3. The name of the author may not be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR diff --git a/libpcre/cmake/FindPackageHandleStandardArgs.cmake b/libpcre/cmake/FindPackageHandleStandardArgs.cmake deleted file mode 100644 index 151d81250..000000000 --- a/libpcre/cmake/FindPackageHandleStandardArgs.cmake +++ /dev/null @@ -1,58 +0,0 @@ -# FIND_PACKAGE_HANDLE_STANDARD_ARGS(NAME (DEFAULT_MSG|"Custom failure message") VAR1 ... ) -# This macro is intended to be used in FindXXX.cmake modules files. -# It handles the REQUIRED and QUIET argument to FIND_PACKAGE() and -# it also sets the _FOUND variable. -# The package is found if all variables listed are TRUE. -# Example: -# -# FIND_PACKAGE_HANDLE_STANDARD_ARGS(LibXml2 DEFAULT_MSG LIBXML2_LIBRARIES LIBXML2_INCLUDE_DIR) -# -# LibXml2 is considered to be found, if both LIBXML2_LIBRARIES and -# LIBXML2_INCLUDE_DIR are valid. Then also LIBXML2_FOUND is set to TRUE. -# If it is not found and REQUIRED was used, it fails with FATAL_ERROR, -# independent whether QUIET was used or not. -# If it is found, the location is reported using the VAR1 argument, so -# here a message "Found LibXml2: /usr/lib/libxml2.so" will be printed out. -# If the second argument is DEFAULT_MSG, the message in the failure case will -# be "Could NOT find LibXml2", if you don't like this message you can specify -# your own custom failure message there. - -MACRO(FIND_PACKAGE_HANDLE_STANDARD_ARGS _NAME _FAIL_MSG _VAR1 ) - - IF("${_FAIL_MSG}" STREQUAL "DEFAULT_MSG") - IF (${_NAME}_FIND_REQUIRED) - SET(_FAIL_MESSAGE "Could not find REQUIRED package ${_NAME}") - ELSE (${_NAME}_FIND_REQUIRED) - SET(_FAIL_MESSAGE "Could not find OPTIONAL package ${_NAME}") - ENDIF (${_NAME}_FIND_REQUIRED) - ELSE("${_FAIL_MSG}" STREQUAL "DEFAULT_MSG") - SET(_FAIL_MESSAGE "${_FAIL_MSG}") - ENDIF("${_FAIL_MSG}" STREQUAL "DEFAULT_MSG") - - STRING(TOUPPER ${_NAME} _NAME_UPPER) - - SET(${_NAME_UPPER}_FOUND TRUE) - IF(NOT ${_VAR1}) - SET(${_NAME_UPPER}_FOUND FALSE) - ENDIF(NOT ${_VAR1}) - - FOREACH(_CURRENT_VAR ${ARGN}) - IF(NOT ${_CURRENT_VAR}) - SET(${_NAME_UPPER}_FOUND FALSE) - ENDIF(NOT ${_CURRENT_VAR}) - ENDFOREACH(_CURRENT_VAR) - - IF (${_NAME_UPPER}_FOUND) - IF (NOT ${_NAME}_FIND_QUIETLY) - MESSAGE(STATUS "Found ${_NAME}: ${${_VAR1}}") - ENDIF (NOT ${_NAME}_FIND_QUIETLY) - ELSE (${_NAME_UPPER}_FOUND) - IF (${_NAME}_FIND_REQUIRED) - MESSAGE(FATAL_ERROR "${_FAIL_MESSAGE}") - ELSE (${_NAME}_FIND_REQUIRED) - IF (NOT ${_NAME}_FIND_QUIETLY) - MESSAGE(STATUS "${_FAIL_MESSAGE}") - ENDIF (NOT ${_NAME}_FIND_QUIETLY) - ENDIF (${_NAME}_FIND_REQUIRED) - ENDIF (${_NAME_UPPER}_FOUND) -ENDMACRO(FIND_PACKAGE_HANDLE_STANDARD_ARGS) diff --git a/libpcre/cmake/pcre2-config-version.cmake.in b/libpcre/cmake/pcre2-config-version.cmake.in index dac149eb9..db0060633 100644 --- a/libpcre/cmake/pcre2-config-version.cmake.in +++ b/libpcre/cmake/pcre2-config-version.cmake.in @@ -4,8 +4,7 @@ set(PACKAGE_VERSION_PATCH 0) set(PACKAGE_VERSION @PCRE2_MAJOR@.@PCRE2_MINOR@.0) # Check whether the requested PACKAGE_FIND_VERSION is compatible -if(PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION OR - PACKAGE_VERSION_MAJOR GREATER PACKAGE_FIND_VERSION_MAJOR) +if(PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION OR PACKAGE_VERSION_MAJOR GREATER PACKAGE_FIND_VERSION_MAJOR) set(PACKAGE_VERSION_COMPATIBLE FALSE) else() set(PACKAGE_VERSION_COMPATIBLE TRUE) diff --git a/libpcre/cmake/pcre2-config.cmake.in b/libpcre/cmake/pcre2-config.cmake.in index 12f3a35ad..082dc1983 100644 --- a/libpcre/cmake/pcre2-config.cmake.in +++ b/libpcre/cmake/pcre2-config.cmake.in @@ -30,31 +30,49 @@ set(PCRE2_16BIT_NAME pcre2-16) set(PCRE2_32BIT_NAME pcre2-32) set(PCRE2_POSIX_NAME pcre2-posix) find_path(PCRE2_INCLUDE_DIR NAMES pcre2.h DOC "PCRE2 include directory") -if (PCRE2_USE_STATIC_LIBS) - if (MSVC) +if(PCRE2_USE_STATIC_LIBS) + if(MSVC) set(PCRE2_8BIT_NAME pcre2-8-static) set(PCRE2_16BIT_NAME pcre2-16-static) set(PCRE2_32BIT_NAME pcre2-32-static) set(PCRE2_POSIX_NAME pcre2-posix-static) - endif () + endif() set(PCRE2_PREFIX ${CMAKE_STATIC_LIBRARY_PREFIX}) set(PCRE2_SUFFIX ${CMAKE_STATIC_LIBRARY_SUFFIX}) -else () +else() set(PCRE2_PREFIX ${CMAKE_SHARED_LIBRARY_PREFIX}) - if (MINGW AND PCRE2_NON_STANDARD_LIB_PREFIX) + if(MINGW AND PCRE2_NON_STANDARD_LIB_PREFIX) set(PCRE2_PREFIX "") - endif () + endif() set(PCRE2_SUFFIX ${CMAKE_SHARED_LIBRARY_SUFFIX}) - if (MINGW AND PCRE2_NON_STANDARD_LIB_SUFFIX) + if(MINGW AND PCRE2_NON_STANDARD_LIB_SUFFIX) set(PCRE2_SUFFIX "-0.dll") - endif () -endif () -find_library(PCRE2_8BIT_LIBRARY NAMES ${PCRE2_PREFIX}${PCRE2_8BIT_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_8BIT_NAME}d${PCRE2_SUFFIX} DOC "8 bit PCRE2 library") -find_library(PCRE2_16BIT_LIBRARY NAMES ${PCRE2_PREFIX}${PCRE2_16BIT_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_16BIT_NAME}d${PCRE2_SUFFIX} DOC "16 bit PCRE2 library") -find_library(PCRE2_32BIT_LIBRARY NAMES ${PCRE2_PREFIX}${PCRE2_32BIT_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_32BIT_NAME}d${PCRE2_SUFFIX} DOC "32 bit PCRE2 library") -find_library(PCRE2_POSIX_LIBRARY NAMES ${PCRE2_PREFIX}${PCRE2_POSIX_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_POSIX_NAME}d${PCRE2_SUFFIX} DOC "8 bit POSIX PCRE2 library") + elseif(MSVC) + set(PCRE2_SUFFIX ${CMAKE_STATIC_LIBRARY_SUFFIX}) + endif() +endif() +find_library( + PCRE2_8BIT_LIBRARY + NAMES ${PCRE2_PREFIX}${PCRE2_8BIT_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_8BIT_NAME}d${PCRE2_SUFFIX} + DOC "8 bit PCRE2 library" +) +find_library( + PCRE2_16BIT_LIBRARY + NAMES ${PCRE2_PREFIX}${PCRE2_16BIT_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_16BIT_NAME}d${PCRE2_SUFFIX} + DOC "16 bit PCRE2 library" +) +find_library( + PCRE2_32BIT_LIBRARY + NAMES ${PCRE2_PREFIX}${PCRE2_32BIT_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_32BIT_NAME}d${PCRE2_SUFFIX} + DOC "32 bit PCRE2 library" +) +find_library( + PCRE2_POSIX_LIBRARY + NAMES ${PCRE2_PREFIX}${PCRE2_POSIX_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_POSIX_NAME}d${PCRE2_SUFFIX} + DOC "8 bit POSIX PCRE2 library" +) unset(PCRE2_NON_STANDARD_LIB_PREFIX) unset(PCRE2_NON_STANDARD_LIB_SUFFIX) unset(PCRE2_8BIT_NAME) @@ -63,51 +81,55 @@ unset(PCRE2_32BIT_NAME) unset(PCRE2_POSIX_NAME) # Set version -if (PCRE2_INCLUDE_DIR) +if(PCRE2_INCLUDE_DIR) set(PCRE2_VERSION "@PCRE2_MAJOR@.@PCRE2_MINOR@.0") -endif () +endif() # Which components have been found. -if (PCRE2_8BIT_LIBRARY) +if(PCRE2_8BIT_LIBRARY) set(PCRE2_8BIT_FOUND TRUE) -endif () -if (PCRE2_16BIT_LIBRARY) +endif() +if(PCRE2_16BIT_LIBRARY) set(PCRE2_16BIT_FOUND TRUE) -endif () -if (PCRE2_32BIT_LIBRARY) +endif() +if(PCRE2_32BIT_LIBRARY) set(PCRE2_32BIT_FOUND TRUE) -endif () -if (PCRE2_POSIX_LIBRARY) +endif() +if(PCRE2_POSIX_LIBRARY) set(PCRE2_POSIX_FOUND TRUE) -endif () +endif() # Check if at least one component has been specified. list(LENGTH PCRE2_FIND_COMPONENTS PCRE2_NCOMPONENTS) -if (PCRE2_NCOMPONENTS LESS 1) +if(PCRE2_NCOMPONENTS LESS 1) message(FATAL_ERROR "No components have been specified. This is not allowed. Please, specify at least one component.") -endif () +endif() unset(PCRE2_NCOMPONENTS) # When POSIX component has been specified make sure that also 8BIT component is specified. set(PCRE2_8BIT_COMPONENT FALSE) set(PCRE2_POSIX_COMPONENT FALSE) foreach(component ${PCRE2_FIND_COMPONENTS}) - if (component STREQUAL "8BIT") + if(component STREQUAL "8BIT") set(PCRE2_8BIT_COMPONENT TRUE) - elseif (component STREQUAL "POSIX") + elseif(component STREQUAL "POSIX") set(PCRE2_POSIX_COMPONENT TRUE) - endif () + endif() endforeach() -if (PCRE2_POSIX_COMPONENT AND NOT PCRE2_8BIT_COMPONENT) - message(FATAL_ERROR "The component POSIX is specified while the 8BIT one is not. This is not allowed. Please, also specify the 8BIT component.") +if(PCRE2_POSIX_COMPONENT AND NOT PCRE2_8BIT_COMPONENT) + message( + FATAL_ERROR + "The component POSIX is specified while the 8BIT one is not. This is not allowed. Please, also specify the 8BIT component." + ) endif() unset(PCRE2_8BIT_COMPONENT) unset(PCRE2_POSIX_COMPONENT) include(FindPackageHandleStandardArgs) set(${CMAKE_FIND_PACKAGE_NAME}_CONFIG "${CMAKE_CURRENT_LIST_FILE}") -find_package_handle_standard_args(PCRE2 +find_package_handle_standard_args( + PCRE2 FOUND_VAR PCRE2_FOUND REQUIRED_VARS PCRE2_INCLUDE_DIR HANDLE_COMPONENTS @@ -116,31 +138,31 @@ find_package_handle_standard_args(PCRE2 ) set(PCRE2_LIBRARIES) -if (PCRE2_FOUND) +if(PCRE2_FOUND) foreach(component ${PCRE2_FIND_COMPONENTS}) - if (PCRE2_USE_STATIC_LIBS) + if(PCRE2_USE_STATIC_LIBS) add_library(PCRE2::${component} STATIC IMPORTED) target_compile_definitions(PCRE2::${component} INTERFACE PCRE2_STATIC) - else () + else() add_library(PCRE2::${component} SHARED IMPORTED) - endif () - set_target_properties(PCRE2::${component} PROPERTIES - IMPORTED_LOCATION "${PCRE2_${component}_LIBRARY}" - IMPORTED_IMPLIB "${PCRE2_${component}_LIBRARY}" - INTERFACE_INCLUDE_DIRECTORIES "${PCRE2_INCLUDE_DIR}" + endif() + set_target_properties( + PCRE2::${component} + PROPERTIES + IMPORTED_LOCATION "${PCRE2_${component}_LIBRARY}" + IMPORTED_IMPLIB "${PCRE2_${component}_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES "${PCRE2_INCLUDE_DIR}" ) - if (component STREQUAL "POSIX") - set_target_properties(PCRE2::${component} PROPERTIES - INTERFACE_LINK_LIBRARIES "PCRE2::8BIT" - LINK_LIBRARIES "PCRE2::8BIT" + if(component STREQUAL "POSIX") + set_target_properties( + PCRE2::${component} + PROPERTIES INTERFACE_LINK_LIBRARIES "PCRE2::8BIT" LINK_LIBRARIES "PCRE2::8BIT" ) - endif () + endif() set(PCRE2_LIBRARIES ${PCRE2_LIBRARIES} ${PCRE2_${component}_LIBRARY}) mark_as_advanced(PCRE2_${component}_LIBRARY) endforeach() -endif () +endif() -mark_as_advanced( - PCRE2_INCLUDE_DIR -) +mark_as_advanced(PCRE2_INCLUDE_DIR) diff --git a/libpcre/config-cmake.h.in b/libpcre/config-cmake.h.in index 48d87d57a..0eff0e0f7 100644 --- a/libpcre/config-cmake.h.in +++ b/libpcre/config-cmake.h.in @@ -1,6 +1,9 @@ /* config.h for CMake builds */ +#cmakedefine HAVE_ASSERT_H 1 +#cmakedefine HAVE_BUILTIN_ASSUME 1 #cmakedefine HAVE_BUILTIN_MUL_OVERFLOW 1 +#cmakedefine HAVE_BUILTIN_UNREACHABLE 1 #cmakedefine HAVE_ATTRIBUTE_UNINITIALIZED 1 #cmakedefine HAVE_DIRENT_H 1 #cmakedefine HAVE_SYS_STAT_H 1 @@ -17,7 +20,6 @@ #cmakedefine SUPPORT_PCRE2_8 1 #cmakedefine SUPPORT_PCRE2_16 1 #cmakedefine SUPPORT_PCRE2_32 1 -#cmakedefine PCRE2_DEBUG 1 #cmakedefine DISABLE_PERCENT_ZT 1 #cmakedefine SUPPORT_LIBBZ2 1 @@ -39,18 +41,18 @@ #cmakedefine HEAP_MATCH_RECURSE 1 #cmakedefine NEVER_BACKSLASH_C 1 -#define PCRE2_EXPORT @PCRE2_EXPORT@ -#define LINK_SIZE @PCRE2_LINK_SIZE@ +#define PCRE2_EXPORT @PCRE2_EXPORT@ +#define LINK_SIZE @PCRE2_LINK_SIZE@ #define HEAP_LIMIT @PCRE2_HEAP_LIMIT@ -#define MATCH_LIMIT @PCRE2_MATCH_LIMIT@ -#define MATCH_LIMIT_DEPTH @PCRE2_MATCH_LIMIT_DEPTH@ +#define MATCH_LIMIT @PCRE2_MATCH_LIMIT@ +#define MATCH_LIMIT_DEPTH @PCRE2_MATCH_LIMIT_DEPTH@ #define MAX_VARLOOKBEHIND @PCRE2_MAX_VARLOOKBEHIND@ #define NEWLINE_DEFAULT @NEWLINE_DEFAULT@ #define PARENS_NEST_LIMIT @PCRE2_PARENS_NEST_LIMIT@ #define PCRE2GREP_BUFSIZE @PCRE2GREP_BUFSIZE@ #define PCRE2GREP_MAX_BUFSIZE @PCRE2GREP_MAX_BUFSIZE@ -#define MAX_NAME_SIZE 32 +#define MAX_NAME_SIZE 128 #define MAX_NAME_COUNT 10000 /* end config.h for CMake builds */ diff --git a/libpcre/config.guess b/libpcre/config.guess index e81d3ae7c..7f76b6228 100755 --- a/libpcre/config.guess +++ b/libpcre/config.guess @@ -1,14 +1,14 @@ #! /bin/sh # Attempt to guess a canonical system name. -# Copyright 1992-2021 Free Software Foundation, Inc. +# Copyright 1992-2022 Free Software Foundation, Inc. # shellcheck disable=SC2006,SC2268 # see below for rationale -timestamp='2021-06-03' +timestamp='2022-01-09' # This file is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 3 of the License, or +# the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but @@ -60,7 +60,7 @@ version="\ GNU config.guess ($timestamp) Originally written by Per Bothner. -Copyright 1992-2021 Free Software Foundation, Inc. +Copyright 1992-2022 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." @@ -437,7 +437,7 @@ case $UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION in # This test works for both compilers. if test "$CC_FOR_BUILD" != no_compiler_found; then if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \ - (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ + (CCOPTS="" $CC_FOR_BUILD -m64 -E - 2>/dev/null) | \ grep IS_64BIT_ARCH >/dev/null then SUN_ARCH=x86_64 @@ -929,6 +929,9 @@ EOF i*:PW*:*) GUESS=$UNAME_MACHINE-pc-pw32 ;; + *:SerenityOS:*:*) + GUESS=$UNAME_MACHINE-pc-serenity + ;; *:Interix*:*) case $UNAME_MACHINE in x86) @@ -1522,6 +1525,9 @@ EOF i*86:rdos:*:*) GUESS=$UNAME_MACHINE-pc-rdos ;; + i*86:Fiwix:*:*) + GUESS=$UNAME_MACHINE-pc-fiwix + ;; *:AROS:*:*) GUESS=$UNAME_MACHINE-unknown-aros ;; diff --git a/libpcre/config.sub b/libpcre/config.sub index d74fb6dea..dba16e84c 100755 --- a/libpcre/config.sub +++ b/libpcre/config.sub @@ -1,14 +1,14 @@ #! /bin/sh # Configuration validation subroutine script. -# Copyright 1992-2021 Free Software Foundation, Inc. +# Copyright 1992-2022 Free Software Foundation, Inc. # shellcheck disable=SC2006,SC2268 # see below for rationale -timestamp='2021-08-14' +timestamp='2022-01-03' # This file is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 3 of the License, or +# the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but @@ -76,7 +76,7 @@ Report bugs and patches to ." version="\ GNU config.sub ($timestamp) -Copyright 1992-2021 Free Software Foundation, Inc. +Copyright 1992-2022 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." @@ -1020,6 +1020,11 @@ case $cpu-$vendor in ;; # Here we normalize CPU types with a missing or matching vendor + armh-unknown | armh-alt) + cpu=armv7l + vendor=alt + basic_os=${basic_os:-linux-gnueabihf} + ;; dpx20-unknown | dpx20-bull) cpu=rs6000 vendor=bull @@ -1121,7 +1126,7 @@ case $cpu-$vendor in xscale-* | xscalee[bl]-*) cpu=`echo "$cpu" | sed 's/^xscale/arm/'` ;; - arm64-*) + arm64-* | aarch64le-*) cpu=aarch64 ;; @@ -1304,7 +1309,7 @@ esac if test x$basic_os != x then -# First recognize some ad-hoc caes, or perhaps split kernel-os, or else just +# First recognize some ad-hoc cases, or perhaps split kernel-os, or else just # set os. case $basic_os in gnu/linux*) @@ -1748,7 +1753,8 @@ case $os in | skyos* | haiku* | rdos* | toppers* | drops* | es* \ | onefs* | tirtos* | phoenix* | fuchsia* | redox* | bme* \ | midnightbsd* | amdhsa* | unleashed* | emscripten* | wasi* \ - | nsk* | powerunix* | genode* | zvmoe* | qnx* | emx* | zephyr*) + | nsk* | powerunix* | genode* | zvmoe* | qnx* | emx* | zephyr* \ + | fiwix* ) ;; # This one is extra strict with allowed versions sco3.2v2 | sco3.2v[4-9]* | sco5v6*) diff --git a/libpcre/configure b/libpcre/configure index b906d93a1..4bd105317 100755 --- a/libpcre/configure +++ b/libpcre/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.71 for PCRE2 10.43. +# Generated by GNU Autoconf 2.71 for PCRE2 10.45. # # # Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation, @@ -618,8 +618,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='PCRE2' PACKAGE_TARNAME='pcre2' -PACKAGE_VERSION='10.43' -PACKAGE_STRING='PCRE2 10.43' +PACKAGE_VERSION='10.45' +PACKAGE_STRING='PCRE2 10.45' PACKAGE_BUGREPORT='' PACKAGE_URL='' @@ -670,21 +670,37 @@ GCOV_CFLAGS GENHTML LCOV SHTOOL +VALGRIND_LIBS +VALGRIND_CFLAGS +PKG_CONFIG_LIBDIR +PKG_CONFIG_PATH +PKG_CONFIG +LIBBZ2 +LIBZ DISTCHECK_CONFIGURE_FLAGS EXTRA_LIBPCRE2_POSIX_LDFLAGS EXTRA_LIBPCRE2_32_LDFLAGS EXTRA_LIBPCRE2_16_LDFLAGS EXTRA_LIBPCRE2_8_LDFLAGS +PTHREAD_CFLAGS +PTHREAD_LIBS +PTHREAD_CC +ax_pthread_config PCRE2POSIX_CFLAG PCRE2_STATIC_CFLAG +LIBREADLINE +WITH_DIFF_FUZZ_SUPPORT_FALSE +WITH_DIFF_FUZZ_SUPPORT_TRUE +WITH_FUZZ_SUPPORT_FALSE +WITH_FUZZ_SUPPORT_TRUE WITH_VALGRIND_FALSE WITH_VALGRIND_TRUE WITH_UNICODE_FALSE WITH_UNICODE_TRUE +WITH_JIT_FALSE +WITH_JIT_TRUE WITH_REBUILD_CHARTABLES_FALSE WITH_REBUILD_CHARTABLES_TRUE -WITH_DEBUG_FALSE -WITH_DEBUG_TRUE WITH_PCRE2_32_FALSE WITH_PCRE2_32_TRUE WITH_PCRE2_16_FALSE @@ -699,7 +715,6 @@ PCRE2_PRERELEASE PCRE2_MINOR PCRE2_MAJOR HAVE_VISIBILITY -VISIBILITY_CXXFLAGS VISIBILITY_CFLAGS LT_SYS_LIBRARY_PATH OTOOL64 @@ -709,6 +724,7 @@ NMEDIT DSYMUTIL MANIFEST_TOOL RANLIB +FILECMD LN_S NM ac_ct_DUMPBIN @@ -748,6 +764,9 @@ CPPFLAGS LDFLAGS CFLAGS CC +MAINT +MAINTAINER_MODE_FALSE +MAINTAINER_MODE_TRUE AM_BACKSLASH AM_DEFAULT_VERBOSITY AM_DEFAULT_V @@ -822,6 +841,7 @@ ac_subst_files='' ac_user_opts=' enable_option_checking enable_silent_rules +enable_maintainer_mode enable_dependency_tracking enable_shared enable_static @@ -839,6 +859,11 @@ enable_pcre2_8 enable_pcre2_16 enable_pcre2_32 enable_debug +enable_jit +enable_jit_sealloc +enable_pcre2grep_jit +enable_pcre2grep_callout +enable_pcre2grep_callout_fork enable_rebuild_chartables enable_unicode enable_newline_is_cr @@ -851,6 +876,12 @@ enable_bsr_anycrlf enable_never_backslash_C enable_ebcdic enable_ebcdic_nl25 +enable_pcre2grep_libz +enable_pcre2grep_libbz2 +with_pcre2grep_bufsize +with_pcre2grep_max_bufsize +enable_pcre2test_libedit +enable_pcre2test_libreadline with_link_size with_max_varlookbehind with_parens_nest_limit @@ -860,6 +891,8 @@ with_match_limit_depth with_match_limit_recursion enable_valgrind enable_coverage +enable_fuzz_support +enable_diff_fuzz_support enable_stack_for_recursion enable_percent_zt ' @@ -872,6 +905,11 @@ LDFLAGS LIBS CPPFLAGS LT_SYS_LIBRARY_PATH +PKG_CONFIG +PKG_CONFIG_PATH +PKG_CONFIG_LIBDIR +VALGRIND_CFLAGS +VALGRIND_LIBS LCOV GENHTML' @@ -1422,7 +1460,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures PCRE2 10.43 to adapt to many kinds of systems. +\`configure' configures PCRE2 10.45 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1493,7 +1531,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of PCRE2 10.43:";; + short | recursive ) echo "Configuration of PCRE2 10.45:";; esac cat <<\_ACEOF @@ -1503,6 +1541,9 @@ Optional Features: --enable-FEATURE[=ARG] include FEATURE [ARG=yes] --enable-silent-rules less verbose build output (undo: "make V=1") --disable-silent-rules verbose build output (undo: "make V=0") + --enable-maintainer-mode + enable make rules and dependencies not useful (and + sometimes confusing) to the casual installer --enable-dependency-tracking do not reject slow dependency extractors --disable-dependency-tracking @@ -1518,6 +1559,14 @@ Optional Features: --enable-pcre2-16 enable 16 bit character support --enable-pcre2-32 enable 32 bit character support --enable-debug enable debugging code + --enable-jit enable Just-In-Time compiling support + --enable-jit-sealloc enable SELinux compatible execmem allocator in JIT + (experimental) + --disable-pcre2grep-jit disable JIT support in pcre2grep + --disable-pcre2grep-callout + disable callout script support in pcre2grep + --disable-pcre2grep-callout-fork + disable callout script fork support in pcre2grep --enable-rebuild-chartables rebuild character tables in current locale --disable-unicode disable Unicode support @@ -1533,12 +1582,22 @@ Optional Features: --enable-never-backslash-C use of \C causes an error --enable-ebcdic assume EBCDIC coding rather than ASCII; incompatible - with --enable-utf; use only in (uncommon) EBCDIC + with --enable-unicode; use only in (uncommon) EBCDIC environments; it implies --enable-rebuild-chartables --enable-ebcdic-nl25 set EBCDIC code for NL to 0x25 instead of 0x15; it implies --enable-ebcdic + --enable-pcre2grep-libz link pcre2grep with libz to handle .gz files + --enable-pcre2grep-libbz2 + link pcre2grep with libbz2 to handle .bz2 files + --enable-pcre2test-libedit + link pcre2test with libedit + --enable-pcre2test-libreadline + link pcre2test with libreadline --enable-valgrind enable valgrind support --enable-coverage enable code coverage reports using gcov + --enable-fuzz-support enable fuzzer support + --enable-diff-fuzz-support + enable differential fuzzer support --disable-percent-zt disable the use of z and t formatting modifiers Optional Packages: @@ -1552,6 +1611,12 @@ Optional Packages: --with-gnu-ld assume the C compiler uses GNU ld [default=no] --with-sysroot[=DIR] Search for dependent libraries within DIR (or the compiler's sysroot if not specified). + --with-pcre2grep-bufsize=N + pcre2grep initial buffer size (default=20480, + minimum=8192) + --with-pcre2grep-max-bufsize=N + pcre2grep maximum buffer size (default=1048576, + minimum=8192) --with-link-size=N internal link size (2, 3, or 4 allowed; default=2) --with-max-varlookbehind=N maximum length of variable lookbehind (default=255) @@ -1575,6 +1640,15 @@ Some influential environment variables: you have headers in a nonstandard directory LT_SYS_LIBRARY_PATH User-defined run-time library search path. + PKG_CONFIG path to pkg-config utility + PKG_CONFIG_PATH + directories to add to pkg-config's search path + PKG_CONFIG_LIBDIR + path overriding pkg-config's built-in search path + VALGRIND_CFLAGS + C compiler flags for VALGRIND, overriding pkg-config + VALGRIND_LIBS + linker flags for VALGRIND, overriding pkg-config LCOV the ltp lcov program GENHTML the ltp genhtml program @@ -1645,7 +1719,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -PCRE2 configure 10.43 +PCRE2 configure 10.45 generated by GNU Autoconf 2.71 Copyright (C) 2021 Free Software Foundation, Inc. @@ -2000,7 +2074,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by PCRE2 $as_me 10.43, which was +It was created by PCRE2 $as_me 10.45, which was generated by GNU Autoconf 2.71. Invocation command line was $ $0$ac_configure_args_raw @@ -3272,7 +3346,7 @@ fi # Define the identity of the package. PACKAGE='pcre2' - VERSION='10.43' + VERSION='10.45' printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h @@ -3375,6 +3449,30 @@ END fi fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether to enable maintainer-specific portions of Makefiles" >&5 +printf %s "checking whether to enable maintainer-specific portions of Makefiles... " >&6; } + # Check whether --enable-maintainer-mode was given. +if test ${enable_maintainer_mode+y} +then : + enableval=$enable_maintainer_mode; USE_MAINTAINER_MODE=$enableval +else $as_nop + USE_MAINTAINER_MODE=no +fi + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $USE_MAINTAINER_MODE" >&5 +printf "%s\n" "$USE_MAINTAINER_MODE" >&6; } + if test $USE_MAINTAINER_MODE = yes; then + MAINTAINER_MODE_TRUE= + MAINTAINER_MODE_FALSE='#' +else + MAINTAINER_MODE_TRUE='#' + MAINTAINER_MODE_FALSE= +fi + + MAINT=$MAINTAINER_MODE_TRUE + + # Check whether --enable-silent-rules was given. if test ${enable_silent_rules+y} then : @@ -5071,8 +5169,8 @@ esac -macro_version='2.4.6' -macro_revision='2.4.6' +macro_version='2.4.7' +macro_revision='2.4.7' @@ -5700,13 +5798,13 @@ else mingw*) lt_bad_file=conftest.nm/nofile ;; *) lt_bad_file=/dev/null ;; esac - case `"$tmp_nm" -B $lt_bad_file 2>&1 | sed '1q'` in + case `"$tmp_nm" -B $lt_bad_file 2>&1 | $SED '1q'` in *$lt_bad_file* | *'Invalid file or object type'*) lt_cv_path_NM="$tmp_nm -B" break 2 ;; *) - case `"$tmp_nm" -p /dev/null 2>&1 | sed '1q'` in + case `"$tmp_nm" -p /dev/null 2>&1 | $SED '1q'` in */dev/null*) lt_cv_path_NM="$tmp_nm -p" break 2 @@ -5844,7 +5942,7 @@ esac fi fi - case `$DUMPBIN -symbols -headers /dev/null 2>&1 | sed '1q'` in + case `$DUMPBIN -symbols -headers /dev/null 2>&1 | $SED '1q'` in *COFF*) DUMPBIN="$DUMPBIN -symbols -headers" ;; @@ -5948,7 +6046,7 @@ else $as_nop lt_cv_sys_max_cmd_len=8192; ;; - bitrig* | darwin* | dragonfly* | freebsd* | netbsd* | openbsd*) + bitrig* | darwin* | dragonfly* | freebsd* | midnightbsd* | netbsd* | openbsd*) # This has been around since 386BSD, at least. Likely further. if test -x /sbin/sysctl; then lt_cv_sys_max_cmd_len=`/sbin/sysctl -n kern.argmax` @@ -5991,7 +6089,7 @@ else $as_nop sysv5* | sco5v6* | sysv4.2uw2*) kargmax=`grep ARG_MAX /etc/conf/cf.d/stune 2>/dev/null` if test -n "$kargmax"; then - lt_cv_sys_max_cmd_len=`echo $kargmax | sed 's/.*[ ]//'` + lt_cv_sys_max_cmd_len=`echo $kargmax | $SED 's/.*[ ]//'` else lt_cv_sys_max_cmd_len=32768 fi @@ -6196,6 +6294,114 @@ esac +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}file", so it can be a program name with args. +set dummy ${ac_tool_prefix}file; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_FILECMD+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$FILECMD"; then + ac_cv_prog_FILECMD="$FILECMD" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_FILECMD="${ac_tool_prefix}file" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +FILECMD=$ac_cv_prog_FILECMD +if test -n "$FILECMD"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $FILECMD" >&5 +printf "%s\n" "$FILECMD" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_FILECMD"; then + ac_ct_FILECMD=$FILECMD + # Extract the first word of "file", so it can be a program name with args. +set dummy file; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_FILECMD+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$ac_ct_FILECMD"; then + ac_cv_prog_ac_ct_FILECMD="$ac_ct_FILECMD" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_FILECMD="file" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_FILECMD=$ac_cv_prog_ac_ct_FILECMD +if test -n "$ac_ct_FILECMD"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_FILECMD" >&5 +printf "%s\n" "$ac_ct_FILECMD" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_FILECMD" = x; then + FILECMD=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + FILECMD=$ac_ct_FILECMD + fi +else + FILECMD="$ac_cv_prog_FILECMD" +fi + + + + + + + if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}objdump", so it can be a program name with args. set dummy ${ac_tool_prefix}objdump; ac_word=$2 @@ -6336,7 +6542,7 @@ beos*) bsdi[45]*) lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (shared object|dynamic lib)' - lt_cv_file_magic_cmd='/usr/bin/file -L' + lt_cv_file_magic_cmd='$FILECMD -L' lt_cv_file_magic_test_file=/shlib/libc.so ;; @@ -6370,14 +6576,14 @@ darwin* | rhapsody*) lt_cv_deplibs_check_method=pass_all ;; -freebsd* | dragonfly*) +freebsd* | dragonfly* | midnightbsd*) if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then case $host_cpu in i*86 ) # Not sure whether the presence of OpenBSD here was a mistake. # Let's accept both of them until this is cleared up. lt_cv_deplibs_check_method='file_magic (FreeBSD|OpenBSD|DragonFly)/i[3-9]86 (compact )?demand paged shared library' - lt_cv_file_magic_cmd=/usr/bin/file + lt_cv_file_magic_cmd=$FILECMD lt_cv_file_magic_test_file=`echo /usr/lib/libc.so.*` ;; esac @@ -6391,7 +6597,7 @@ haiku*) ;; hpux10.20* | hpux11*) - lt_cv_file_magic_cmd=/usr/bin/file + lt_cv_file_magic_cmd=$FILECMD case $host_cpu in ia64*) lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|ELF-[0-9][0-9]) shared object file - IA64' @@ -6438,7 +6644,7 @@ netbsd* | netbsdelf*-gnu) newos6*) lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (executable|dynamic lib)' - lt_cv_file_magic_cmd=/usr/bin/file + lt_cv_file_magic_cmd=$FILECMD lt_cv_file_magic_test_file=/usr/lib/libnls.so ;; @@ -6808,13 +7014,29 @@ esac fi : ${AR=ar} -: ${AR_FLAGS=cru} +# Use ARFLAGS variable as AR's operation code to sync the variable naming with +# Automake. If both AR_FLAGS and ARFLAGS are specified, AR_FLAGS should have +# higher priority because thats what people were doing historically (setting +# ARFLAGS for automake and AR_FLAGS for libtool). FIXME: Make the AR_FLAGS +# variable obsoleted/removed. + +test ${AR_FLAGS+y} || AR_FLAGS=${ARFLAGS-cr} +lt_ar_flags=$AR_FLAGS + + + + + + +# Make AR_FLAGS overridable by 'make ARFLAGS='. Don't try to run-time override +# by AR_FLAGS because that was never working and AR_FLAGS is about to die. + @@ -7231,7 +7453,7 @@ esac if test "$lt_cv_nm_interface" = "MS dumpbin"; then # Gets list of data symbols to import. - lt_cv_sys_global_symbol_to_import="sed -n -e 's/^I .* \(.*\)$/\1/p'" + lt_cv_sys_global_symbol_to_import="$SED -n -e 's/^I .* \(.*\)$/\1/p'" # Adjust the below global symbol transforms to fixup imported variables. lt_cdecl_hook=" -e 's/^I .* \(.*\)$/extern __declspec(dllimport) char \1;/p'" lt_c_name_hook=" -e 's/^I .* \(.*\)$/ {\"\1\", (void *) 0},/p'" @@ -7249,20 +7471,20 @@ fi # Transform an extracted symbol line into a proper C declaration. # Some systems (esp. on ia64) link data and code symbols differently, # so use this general approach. -lt_cv_sys_global_symbol_to_cdecl="sed -n"\ +lt_cv_sys_global_symbol_to_cdecl="$SED -n"\ $lt_cdecl_hook\ " -e 's/^T .* \(.*\)$/extern int \1();/p'"\ " -e 's/^$symcode$symcode* .* \(.*\)$/extern char \1;/p'" # Transform an extracted symbol line into symbol name and symbol address -lt_cv_sys_global_symbol_to_c_name_address="sed -n"\ +lt_cv_sys_global_symbol_to_c_name_address="$SED -n"\ $lt_c_name_hook\ " -e 's/^: \(.*\) .*$/ {\"\1\", (void *) 0},/p'"\ " -e 's/^$symcode$symcode* .* \(.*\)$/ {\"\1\", (void *) \&\1},/p'" # Transform an extracted symbol line into symbol name with lib prefix and # symbol address. -lt_cv_sys_global_symbol_to_c_name_address_lib_prefix="sed -n"\ +lt_cv_sys_global_symbol_to_c_name_address_lib_prefix="$SED -n"\ $lt_c_name_lib_hook\ " -e 's/^: \(.*\) .*$/ {\"\1\", (void *) 0},/p'"\ " -e 's/^$symcode$symcode* .* \(lib.*\)$/ {\"\1\", (void *) \&\1},/p'"\ @@ -7286,7 +7508,7 @@ for ac_symprfx in "" "_"; do if test "$lt_cv_nm_interface" = "MS dumpbin"; then # Fake it for dumpbin and say T for any non-static function, # D for any global variable and I for any imported variable. - # Also find C++ and __fastcall symbols from MSVC++, + # Also find C++ and __fastcall symbols from MSVC++ or ICC, # which start with @ or ?. lt_cv_sys_global_symbol_pipe="$AWK '"\ " {last_section=section; section=\$ 3};"\ @@ -7304,9 +7526,9 @@ for ac_symprfx in "" "_"; do " s[1]~prfx {split(s[1],t,\"@\"); print f,t[1],substr(t[1],length(prfx))}"\ " ' prfx=^$ac_symprfx" else - lt_cv_sys_global_symbol_pipe="sed -n -e 's/^.*[ ]\($symcode$symcode*\)[ ][ ]*$ac_symprfx$sympat$opt_cr$/$symxfrm/p'" + lt_cv_sys_global_symbol_pipe="$SED -n -e 's/^.*[ ]\($symcode$symcode*\)[ ][ ]*$ac_symprfx$sympat$opt_cr$/$symxfrm/p'" fi - lt_cv_sys_global_symbol_pipe="$lt_cv_sys_global_symbol_pipe | sed '/ __gnu_lto/d'" + lt_cv_sys_global_symbol_pipe="$lt_cv_sys_global_symbol_pipe | $SED '/ __gnu_lto/d'" # Check to see that the pipe works correctly. pipe_works=no @@ -7332,11 +7554,8 @@ _LT_EOF test $ac_status = 0; }; then # Now try to grab the symbols. nlist=conftest.nm - if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$NM conftest.$ac_objext \| "$lt_cv_sys_global_symbol_pipe" \> $nlist\""; } >&5 - (eval $NM conftest.$ac_objext \| "$lt_cv_sys_global_symbol_pipe" \> $nlist) 2>&5 - ac_status=$? - printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; } && test -s "$nlist"; then + $ECHO "$as_me:$LINENO: $NM conftest.$ac_objext | $lt_cv_sys_global_symbol_pipe > $nlist" >&5 + if eval "$NM" conftest.$ac_objext \| "$lt_cv_sys_global_symbol_pipe" \> $nlist 2>&5 && test -s "$nlist"; then # Try sorting and uniquifying the output. if sort "$nlist" | uniq > "$nlist"T; then mv -f "$nlist"T "$nlist" @@ -7509,7 +7728,7 @@ case $with_sysroot in #( fi ;; #( /*) - lt_sysroot=`echo "$with_sysroot" | sed -e "$sed_quote_subst"` + lt_sysroot=`echo "$with_sysroot" | $SED -e "$sed_quote_subst"` ;; #( no|'') ;; #( @@ -7634,7 +7853,7 @@ ia64-*-hpux*) ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then - case `/usr/bin/file conftest.$ac_objext` in + case `$FILECMD conftest.$ac_objext` in *ELF-32*) HPUX_IA64_MODE=32 ;; @@ -7655,7 +7874,7 @@ ia64-*-hpux*) printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then if test yes = "$lt_cv_prog_gnu_ld"; then - case `/usr/bin/file conftest.$ac_objext` in + case `$FILECMD conftest.$ac_objext` in *32-bit*) LD="${LD-ld} -melf32bsmip" ;; @@ -7667,7 +7886,7 @@ ia64-*-hpux*) ;; esac else - case `/usr/bin/file conftest.$ac_objext` in + case `$FILECMD conftest.$ac_objext` in *32-bit*) LD="${LD-ld} -32" ;; @@ -7693,7 +7912,7 @@ mips64*-*linux*) printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then emul=elf - case `/usr/bin/file conftest.$ac_objext` in + case `$FILECMD conftest.$ac_objext` in *32-bit*) emul="${emul}32" ;; @@ -7701,7 +7920,7 @@ mips64*-*linux*) emul="${emul}64" ;; esac - case `/usr/bin/file conftest.$ac_objext` in + case `$FILECMD conftest.$ac_objext` in *MSB*) emul="${emul}btsmip" ;; @@ -7709,7 +7928,7 @@ mips64*-*linux*) emul="${emul}ltsmip" ;; esac - case `/usr/bin/file conftest.$ac_objext` in + case `$FILECMD conftest.$ac_objext` in *N32*) emul="${emul}n32" ;; @@ -7733,14 +7952,14 @@ s390*-*linux*|s390*-*tpf*|sparc*-*linux*) ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then - case `/usr/bin/file conftest.o` in + case `$FILECMD conftest.o` in *32-bit*) case $host in x86_64-*kfreebsd*-gnu) LD="${LD-ld} -m elf_i386_fbsd" ;; x86_64-*linux*) - case `/usr/bin/file conftest.o` in + case `$FILECMD conftest.o` in *x86-64*) LD="${LD-ld} -m elf32_x86_64" ;; @@ -7848,7 +8067,7 @@ printf "%s\n" "$lt_cv_cc_needs_belf" >&6; } ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then - case `/usr/bin/file conftest.o` in + case `$FILECMD conftest.o` in *64-bit*) case $lt_cv_prog_gnu_ld in yes*) @@ -8631,8 +8850,8 @@ int forced_loaded() { return 2;} _LT_EOF echo "$LTCC $LTCFLAGS -c -o conftest.o conftest.c" >&5 $LTCC $LTCFLAGS -c -o conftest.o conftest.c 2>&5 - echo "$AR cru libconftest.a conftest.o" >&5 - $AR cru libconftest.a conftest.o 2>&5 + echo "$AR $AR_FLAGS libconftest.a conftest.o" >&5 + $AR $AR_FLAGS libconftest.a conftest.o 2>&5 echo "$RANLIB libconftest.a" >&5 $RANLIB libconftest.a 2>&5 cat > conftest.c << _LT_EOF @@ -8659,17 +8878,12 @@ printf "%s\n" "$lt_cv_ld_force_load" >&6; } _lt_dar_allow_undefined='$wl-undefined ${wl}suppress' ;; darwin1.*) _lt_dar_allow_undefined='$wl-flat_namespace $wl-undefined ${wl}suppress' ;; - darwin*) # darwin 5.x on - # if running on 10.5 or later, the deployment target defaults - # to the OS version, if on x86, and 10.4, the deployment - # target defaults to 10.4. Don't you love it? - case ${MACOSX_DEPLOYMENT_TARGET-10.0},$host in - 10.0,*86*-darwin8*|10.0,*-darwin[91]*) - _lt_dar_allow_undefined='$wl-undefined ${wl}dynamic_lookup' ;; - 10.[012][,.]*) - _lt_dar_allow_undefined='$wl-flat_namespace $wl-undefined ${wl}suppress' ;; - 10.*) - _lt_dar_allow_undefined='$wl-undefined ${wl}dynamic_lookup' ;; + darwin*) + case $MACOSX_DEPLOYMENT_TARGET,$host in + 10.[012],*|,*powerpc*-darwin[5-8]*) + _lt_dar_allow_undefined='$wl-flat_namespace $wl-undefined ${wl}suppress' ;; + *) + _lt_dar_allow_undefined='$wl-undefined ${wl}dynamic_lookup' ;; esac ;; esac @@ -9356,8 +9570,8 @@ esac ofile=libtool can_build_shared=yes -# All known linkers require a '.a' archive for static linking (except MSVC, -# which needs '.lib'). +# All known linkers require a '.a' archive for static linking (except MSVC and +# ICC, which need '.lib'). libext=a with_gnu_ld=$lt_cv_prog_gnu_ld @@ -9825,6 +10039,12 @@ lt_prog_compiler_static= lt_prog_compiler_pic='-KPIC' lt_prog_compiler_static='-static' ;; + # flang / f18. f95 an alias for gfortran or flang on Debian + flang* | f18* | f95*) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-fPIC' + lt_prog_compiler_static='-static' + ;; # icc used to be incompatible with GCC. # ICC 10 doesn't accept -KPIC any more. icc* | ifort*) @@ -9869,7 +10089,7 @@ lt_prog_compiler_static= lt_prog_compiler_static='-qstaticlink' ;; *) - case `$CC -V 2>&1 | sed 5q` in + case `$CC -V 2>&1 | $SED 5q` in *Sun\ Ceres\ Fortran* | *Sun*Fortran*\ [1-7].* | *Sun*Fortran*\ 8.[0-3]*) # Sun Fortran 8.3 passes all unrecognized flags to the linker lt_prog_compiler_pic='-KPIC' @@ -10292,15 +10512,15 @@ printf %s "checking whether the $compiler linker ($LD) supports shared libraries case $host_os in cygwin* | mingw* | pw32* | cegcc*) - # FIXME: the MSVC++ port hasn't been tested in a loooong time + # FIXME: the MSVC++ and ICC port hasn't been tested in a loooong time # When not using gcc, we currently assume that we are using - # Microsoft Visual C++. + # Microsoft Visual C++ or Intel C++ Compiler. if test yes != "$GCC"; then with_gnu_ld=no fi ;; interix*) - # we just hope/assume this is gcc and not c89 (= MSVC++) + # we just hope/assume this is gcc and not c89 (= MSVC++ or ICC) with_gnu_ld=yes ;; openbsd* | bitrig*) @@ -10355,7 +10575,7 @@ printf %s "checking whether the $compiler linker ($LD) supports shared libraries whole_archive_flag_spec= fi supports_anon_versioning=no - case `$LD -v | $SED -e 's/(^)\+)\s\+//' 2>&1` in + case `$LD -v | $SED -e 's/([^)]\+)\s\+//' 2>&1` in *GNU\ gold*) supports_anon_versioning=yes ;; *\ [01].* | *\ 2.[0-9].* | *\ 2.10.*) ;; # catch versions < 2.11 *\ 2.11.93.0.2\ *) supports_anon_versioning=yes ;; # RH7.3 ... @@ -10467,6 +10687,7 @@ _LT_EOF emximp -o $lib $output_objdir/$libname.def' old_archive_From_new_cmds='emximp -o $output_objdir/${libname}_dll.a $output_objdir/$libname.def' enable_shared_with_static_runtimes=yes + file_list_spec='@' ;; interix[3-9]*) @@ -10481,7 +10702,7 @@ _LT_EOF # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link # time. Moving up from 0x10000000 also allows more sbrk(2) space. archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' - archive_expsym_cmds='sed "s|^|_|" $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--retain-symbols-file,$output_objdir/$soname.expsym $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' + archive_expsym_cmds='$SED "s|^|_|" $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--retain-symbols-file,$output_objdir/$soname.expsym $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' ;; gnu* | linux* | tpf* | k*bsd*-gnu | kopensolaris*-gnu) @@ -10524,7 +10745,7 @@ _LT_EOF compiler_needs_object=yes ;; esac - case `$CC -V 2>&1 | sed 5q` in + case `$CC -V 2>&1 | $SED 5q` in *Sun\ C*) # Sun C 5.9 whole_archive_flag_spec='$wl--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' compiler_needs_object=yes @@ -10536,13 +10757,14 @@ _LT_EOF if test yes = "$supports_anon_versioning"; then archive_expsym_cmds='echo "{ global:" > $output_objdir/$libname.ver~ - cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ + cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ echo "local: *; };" >> $output_objdir/$libname.ver~ $CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-version-script $wl$output_objdir/$libname.ver -o $lib' fi case $cc_basename in tcc*) + hardcode_libdir_flag_spec='$wl-rpath $wl$libdir' export_dynamic_flag_spec='-rdynamic' ;; xlf* | bgf* | bgxlf* | mpixlf*) @@ -10552,7 +10774,7 @@ _LT_EOF archive_cmds='$LD -shared $libobjs $deplibs $linker_flags -soname $soname -o $lib' if test yes = "$supports_anon_versioning"; then archive_expsym_cmds='echo "{ global:" > $output_objdir/$libname.ver~ - cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ + cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ echo "local: *; };" >> $output_objdir/$libname.ver~ $LD -shared $libobjs $deplibs $linker_flags -soname $soname -version-script $output_objdir/$libname.ver -o $lib' fi @@ -10684,7 +10906,7 @@ _LT_EOF if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then export_symbols_cmds='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && (substr(\$ 3,1,1) != ".")) { if (\$ 2 == "W") { print \$ 3 " weak" } else { print \$ 3 } } }'\'' | sort -u > $export_symbols' else - export_symbols_cmds='`func_echo_all $NM | $SED -e '\''s/B\([^B]*\)$/P\1/'\''` -PCpgl $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) && (substr(\$ 1,1,1) != ".")) { if ((\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) { print \$ 1 " weak" } else { print \$ 1 } } }'\'' | sort -u > $export_symbols' + export_symbols_cmds='`func_echo_all $NM | $SED -e '\''s/B\([^B]*\)$/P\1/'\''` -PCpgl $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "L") || (\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) && (substr(\$ 1,1,1) != ".")) { if ((\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) { print \$ 1 " weak" } else { print \$ 1 } } }'\'' | sort -u > $export_symbols' fi aix_use_runtimelinking=no @@ -10955,12 +11177,12 @@ fi cygwin* | mingw* | pw32* | cegcc*) # When not using gcc, we currently assume that we are using - # Microsoft Visual C++. + # Microsoft Visual C++ or Intel C++ Compiler. # hardcode_libdir_flag_spec is actually meaningless, as there is # no search path for DLLs. case $cc_basename in - cl*) - # Native MSVC + cl* | icl*) + # Native MSVC or ICC hardcode_libdir_flag_spec=' ' allow_undefined_flag=unsupported always_export_symbols=yes @@ -11001,7 +11223,7 @@ fi fi' ;; *) - # Assume MSVC wrapper + # Assume MSVC and ICC wrapper hardcode_libdir_flag_spec=' ' allow_undefined_flag=unsupported # Tell ltmain to make .lib files, not .a files. @@ -11042,8 +11264,8 @@ fi output_verbose_link_cmd=func_echo_all archive_cmds="\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod$_lt_dsymutil" module_cmds="\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags$_lt_dsymutil" - archive_expsym_cmds="sed 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod$_lt_dar_export_syms$_lt_dsymutil" - module_expsym_cmds="sed -e 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags$_lt_dar_export_syms$_lt_dsymutil" + archive_expsym_cmds="$SED 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod$_lt_dar_export_syms$_lt_dsymutil" + module_expsym_cmds="$SED -e 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags$_lt_dar_export_syms$_lt_dsymutil" else ld_shlibs=no @@ -11077,7 +11299,7 @@ fi ;; # FreeBSD 3 and greater uses gcc -shared to do shared libraries. - freebsd* | dragonfly*) + freebsd* | dragonfly* | midnightbsd*) archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' hardcode_libdir_flag_spec='-R$libdir' hardcode_direct=yes @@ -11258,6 +11480,7 @@ printf "%s\n" "$lt_cv_irix_exported_symbol" >&6; } # Fabrice Bellard et al's Tiny C Compiler ld_shlibs=yes archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + hardcode_libdir_flag_spec='$wl-rpath $wl$libdir' ;; esac ;; @@ -11329,6 +11552,7 @@ printf "%s\n" "$lt_cv_irix_exported_symbol" >&6; } emximp -o $lib $output_objdir/$libname.def' old_archive_From_new_cmds='emximp -o $output_objdir/${libname}_dll.a $output_objdir/$libname.def' enable_shared_with_static_runtimes=yes + file_list_spec='@' ;; osf3*) @@ -12021,7 +12245,7 @@ cygwin* | mingw* | pw32* | cegcc*) case $host_os in cygwin*) # Cygwin DLLs use 'cyg' prefix rather than 'lib' - soname_spec='`echo $libname | sed -e 's/^lib/cyg/'``echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext' + soname_spec='`echo $libname | $SED -e 's/^lib/cyg/'``echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext' sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/lib/w32api" ;; @@ -12031,14 +12255,14 @@ cygwin* | mingw* | pw32* | cegcc*) ;; pw32*) # pw32 DLLs use 'pw' prefix rather than 'lib' - library_names_spec='`echo $libname | sed -e 's/^lib/pw/'``echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext' + library_names_spec='`echo $libname | $SED -e 's/^lib/pw/'``echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext' ;; esac dynamic_linker='Win32 ld.exe' ;; - *,cl*) - # Native MSVC + *,cl* | *,icl*) + # Native MSVC or ICC libname_spec='$name' soname_spec='$libname`echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext' library_names_spec='$libname.dll.lib' @@ -12057,7 +12281,7 @@ cygwin* | mingw* | pw32* | cegcc*) done IFS=$lt_save_ifs # Convert to MSYS style. - sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | sed -e 's|\\\\|/|g' -e 's| \\([a-zA-Z]\\):| /\\1|g' -e 's|^ ||'` + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's|\\\\|/|g' -e 's| \\([a-zA-Z]\\):| /\\1|g' -e 's|^ ||'` ;; cygwin*) # Convert to unix form, then to dos form, then back to unix form @@ -12094,7 +12318,7 @@ cygwin* | mingw* | pw32* | cegcc*) ;; *) - # Assume MSVC wrapper + # Assume MSVC and ICC wrapper library_names_spec='$libname`echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext $libname.lib' dynamic_linker='Win32 ld.exe' ;; @@ -12127,7 +12351,7 @@ dgux*) shlibpath_var=LD_LIBRARY_PATH ;; -freebsd* | dragonfly*) +freebsd* | dragonfly* | midnightbsd*) # DragonFly does not have aout. When/if they implement a new # versioning mechanism, adjust this. if test -x /usr/bin/objformat; then @@ -13292,30 +13516,41 @@ striplib= old_striplib= { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether stripping libraries is possible" >&5 printf %s "checking whether stripping libraries is possible... " >&6; } -if test -n "$STRIP" && $STRIP -V 2>&1 | $GREP "GNU strip" >/dev/null; then - test -z "$old_striplib" && old_striplib="$STRIP --strip-debug" - test -z "$striplib" && striplib="$STRIP --strip-unneeded" - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -printf "%s\n" "yes" >&6; } +if test -z "$STRIP"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } else -# FIXME - insert some real tests, host_os isn't really good enough - case $host_os in - darwin*) - if test -n "$STRIP"; then + if $STRIP -V 2>&1 | $GREP "GNU strip" >/dev/null; then + old_striplib="$STRIP --strip-debug" + striplib="$STRIP --strip-unneeded" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + else + case $host_os in + darwin*) + # FIXME - insert some real tests, host_os isn't really good enough striplib="$STRIP -x" old_striplib="$STRIP -S" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } - else + ;; + freebsd*) + if $STRIP -V 2>&1 | $GREP "elftoolchain" >/dev/null; then + old_striplib="$STRIP --strip-debug" + striplib="$STRIP --strip-unneeded" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + fi + ;; + *) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } - fi - ;; - *) - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 -printf "%s\n" "no" >&6; } - ;; - esac + ;; + esac + fi fi @@ -13626,19 +13861,18 @@ fi VISIBILITY_CFLAGS= - VISIBILITY_CXXFLAGS= HAVE_VISIBILITY=0 - if test -n "$GCC"; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the -Werror option is usable" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the -Werror option is usable" >&5 printf %s "checking whether the -Werror option is usable... " >&6; } - if test ${pcre2_cv_cc_vis_werror+y} + if test ${pcre2_cv_cc_vis_werror+y} then : printf %s "(cached) " >&6 else $as_nop - pcre2_save_CFLAGS="$CFLAGS" - CFLAGS="$CFLAGS -Werror" - cat confdefs.h - <<_ACEOF >conftest.$ac_ext + pcre2_save_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS -Werror" + pcre2_cv_cc_vis_werror=no + cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int @@ -13651,33 +13885,53 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - pcre2_cv_cc_vis_werror=yes + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + #warning e + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + else $as_nop - pcre2_cv_cc_vis_werror=no + pcre2_cv_cc_vis_werror=yes + fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS="$pcre2_save_CFLAGS" + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS="$pcre2_save_CFLAGS" fi - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pcre2_cv_cc_vis_werror" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pcre2_cv_cc_vis_werror" >&5 printf "%s\n" "$pcre2_cv_cc_vis_werror" >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for simple visibility declarations" >&5 -printf %s "checking for simple visibility declarations... " >&6; } + if test -n "$pcre2_cv_cc_vis_werror" && test $pcre2_cv_cc_vis_werror = yes + then + WORKING_WERROR=1 + else + WORKING_WERROR=0 + fi + if test $pcre2_cv_cc_vis_werror = yes; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for GCC compatible visibility declarations" >&5 +printf %s "checking for GCC compatible visibility declarations... " >&6; } if test ${pcre2_cv_cc_visibility+y} then : printf %s "(cached) " >&6 else $as_nop pcre2_save_CFLAGS="$CFLAGS" - CFLAGS="$CFLAGS -fvisibility=hidden" - if test $pcre2_cv_cc_vis_werror = yes; then - CFLAGS="$CFLAGS -Werror" - fi - cat confdefs.h - <<_ACEOF >conftest.$ac_ext + CFLAGS="$CFLAGS -Werror -fvisibility=hidden" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ -extern __attribute__((__visibility__("hidden"))) int hiddenvar; - extern __attribute__((__visibility__("default"))) int exportedvar; - extern __attribute__((__visibility__("hidden"))) int hiddenfunc (void); +extern __attribute__((__visibility__("hidden"))) int hiddenfunc (void); extern __attribute__((__visibility__("default"))) int exportedfunc (void); void dummyfunc (void) {} @@ -13699,29 +13953,118 @@ rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS="$pcre2_save_CFLAGS" fi - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pcre2_cv_cc_visibility" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pcre2_cv_cc_visibility" >&5 printf "%s\n" "$pcre2_cv_cc_visibility" >&6; } - if test $pcre2_cv_cc_visibility = yes; then - VISIBILITY_CFLAGS="-fvisibility=hidden" - VISIBILITY_CXXFLAGS="-fvisibility=hidden -fvisibility-inlines-hidden" - HAVE_VISIBILITY=1 + fi + if test -n "$pcre2_cv_cc_visibility" && test $pcre2_cv_cc_visibility = yes + then + VISIBILITY_CFLAGS="-fvisibility=hidden" + HAVE_VISIBILITY=1 printf "%s\n" "#define PCRE2_EXPORT __attribute__ ((visibility (\"default\")))" >>confdefs.h - else + else printf "%s\n" "#define PCRE2_EXPORT /**/" >>confdefs.h - fi fi - printf "%s\n" "#define HAVE_VISIBILITY $HAVE_VISIBILITY" >>confdefs.h +# Check for Clang __attribute__((uninitialized)) feature + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for __attribute__((uninitialized))" >&5 +printf %s "checking for __attribute__((uninitialized))... " >&6; } +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +tmp_CFLAGS=$CFLAGS +if test $WORKING_WERROR -eq 1; then + CFLAGS="$CFLAGS -Werror" +fi +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +char buf[128] __attribute__((uninitialized));(void)buf + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + pcre2_cc_cv_attribute_uninitialized=yes +else $as_nop + pcre2_cc_cv_attribute_uninitialized=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pcre2_cc_cv_attribute_uninitialized" >&5 +printf "%s\n" "$pcre2_cc_cv_attribute_uninitialized" >&6; } +if test "$pcre2_cc_cv_attribute_uninitialized" = yes; then + +printf "%s\n" "#define HAVE_ATTRIBUTE_UNINITIALIZED 1" >>confdefs.h + +fi +CFLAGS=$tmp_CFLAGS +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +# Check for the assume() builtin + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for __assume()" >&5 +printf %s "checking for __assume()... " >&6; } +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +__assume(1) + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + pcre2_cc_cv_builtin_assume=yes +else $as_nop + pcre2_cc_cv_builtin_assume=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pcre2_cc_cv_builtin_assume" >&5 +printf "%s\n" "$pcre2_cc_cv_builtin_assume" >&6; } +if test "$pcre2_cc_cv_builtin_assume" = yes; then + +printf "%s\n" "#define HAVE_BUILTIN_ASSUME 1" >>confdefs.h + +fi +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + # Check for the mul_overflow() builtin { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for __builtin_mul_overflow()" >&5 @@ -13773,44 +14116,42 @@ ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $ ac_compiler_gnu=$ac_cv_c_compiler_gnu -# Check for Clang __attribute__((uninitialized)) feature +# Check for the unreachable() builtin -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for __attribute__((uninitialized))" >&5 -printf %s "checking for __attribute__((uninitialized))... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for __builtin_unreachable()" >&5 +printf %s "checking for __builtin_unreachable()... " >&6; } ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu -tmp_CFLAGS=$CFLAGS -CFLAGS="$CFLAGS -Werror" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ - +int r; int main (void) { -char buf[128] __attribute__((uninitialized));(void)buf +if (r) __builtin_unreachable() ; return 0; } _ACEOF -if ac_fn_c_try_compile "$LINENO" +if ac_fn_c_try_link "$LINENO" then : - pcre2_cc_cv_attribute_uninitialized=yes + pcre2_cc_cv_builtin_unreachable=yes else $as_nop - pcre2_cc_cv_attribute_uninitialized=no + pcre2_cc_cv_builtin_unreachable=no fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pcre2_cc_cv_attribute_uninitialized" >&5 -printf "%s\n" "$pcre2_cc_cv_attribute_uninitialized" >&6; } -if test "$pcre2_cc_cv_attribute_uninitialized" = yes; then +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pcre2_cc_cv_builtin_unreachable" >&5 +printf "%s\n" "$pcre2_cc_cv_builtin_unreachable" >&6; } +if test "$pcre2_cc_cv_builtin_unreachable" = yes; then -printf "%s\n" "#define HAVE_ATTRIBUTE_UNINITIALIZED 1" >>confdefs.h +printf "%s\n" "#define HAVE_BUILTIN_UNREACHABLE 1" >>confdefs.h fi -CFLAGS=$tmp_CFLAGS ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' @@ -13821,9 +14162,9 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu # Versioning PCRE2_MAJOR="10" -PCRE2_MINOR="43" +PCRE2_MINOR="45" PCRE2_PRERELEASE="" -PCRE2_DATE="2024-02-16" +PCRE2_DATE="2025-02-05" if test "$PCRE2_MINOR" = "08" -o "$PCRE2_MINOR" = "09" then @@ -13920,6 +14261,93 @@ else $as_nop fi +# Handle --enable-jit (disabled by default) +# Check whether --enable-jit was given. +if test ${enable_jit+y} +then : + enableval=$enable_jit; +else $as_nop + enable_jit=no +fi + + +# This code enables JIT if the hardware supports it. +if test "$enable_jit" = "auto"; then + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + SAVE_CPPFLAGS=$CPPFLAGS + CPPFLAGS=-I$srcdir + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + #define SLJIT_CONFIG_AUTO 1 + #include "deps/sljit/sljit_src/sljitConfigCPU.h" + #if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) + #error unsupported + #endif +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + enable_jit=yes +else $as_nop + enable_jit=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CPPFLAGS=$SAVE_CPPFLAGS + echo checking for JIT support on this hardware... $enable_jit +fi + +# Handle --enable-jit-sealloc (disabled by default and only experimental) +case $host_os in + linux* | netbsd*) + # Check whether --enable-jit-sealloc was given. +if test ${enable_jit_sealloc+y} +then : + enableval=$enable_jit_sealloc; +else $as_nop + enable_jit_sealloc=no +fi + + ;; + *) + enable_jit_sealloc=unsupported + ;; +esac + +# Handle --disable-pcre2grep-jit (enabled by default) +# Check whether --enable-pcre2grep-jit was given. +if test ${enable_pcre2grep_jit+y} +then : + enableval=$enable_pcre2grep_jit; +else $as_nop + enable_pcre2grep_jit=yes +fi + + +# Handle --disable-pcre2grep-callout (enabled by default) +# Check whether --enable-pcre2grep-callout was given. +if test ${enable_pcre2grep_callout+y} +then : + enableval=$enable_pcre2grep_callout; +else $as_nop + enable_pcre2grep_callout=yes +fi + + +# Handle --disable-pcre2grep-callout-fork (enabled by default) +# Check whether --enable-pcre2grep-callout-fork was given. +if test ${enable_pcre2grep_callout_fork+y} +then : + enableval=$enable_pcre2grep_callout_fork; +else $as_nop + enable_pcre2grep_callout_fork=yes +fi + + # Handle --enable-rebuild-chartables # Check whether --enable-rebuild-chartables was given. if test ${enable_rebuild_chartables+y} @@ -13936,7 +14364,7 @@ if test ${enable_unicode+y} then : enableval=$enable_unicode; else $as_nop - enable_unicode=unset + enable_unicode=no fi @@ -14020,6 +14448,68 @@ else $as_nop fi +# Handle --enable-pcre2grep-libz +# Check whether --enable-pcre2grep-libz was given. +if test ${enable_pcre2grep_libz+y} +then : + enableval=$enable_pcre2grep_libz; +else $as_nop + enable_pcre2grep_libz=no +fi + + +# Handle --enable-pcre2grep-libbz2 +# Check whether --enable-pcre2grep-libbz2 was given. +if test ${enable_pcre2grep_libbz2+y} +then : + enableval=$enable_pcre2grep_libbz2; +else $as_nop + enable_pcre2grep_libbz2=no +fi + + +# Handle --with-pcre2grep-bufsize=N + +# Check whether --with-pcre2grep-bufsize was given. +if test ${with_pcre2grep_bufsize+y} +then : + withval=$with_pcre2grep_bufsize; +else $as_nop + with_pcre2grep_bufsize=20480 +fi + + +# Handle --with-pcre2grep-max-bufsize=N + +# Check whether --with-pcre2grep-max-bufsize was given. +if test ${with_pcre2grep_max_bufsize+y} +then : + withval=$with_pcre2grep_max_bufsize; +else $as_nop + with_pcre2grep_max_bufsize=1048576 +fi + + +# Handle --enable-pcre2test-libedit +# Check whether --enable-pcre2test-libedit was given. +if test ${enable_pcre2test_libedit+y} +then : + enableval=$enable_pcre2test_libedit; +else $as_nop + enable_pcre2test_libedit=no +fi + + +# Handle --enable-pcre2test-libreadline +# Check whether --enable-pcre2test-libreadline was given. +if test ${enable_pcre2test_libreadline+y} +then : + enableval=$enable_pcre2test_libreadline; +else $as_nop + enable_pcre2test_libreadline=no +fi + + # Handle --with-link-size=N # Check whether --with-link-size was given. @@ -14124,6 +14614,26 @@ else $as_nop fi +# Handle --enable-fuzz-support +# Check whether --enable-fuzz_support was given. +if test ${enable_fuzz_support+y} +then : + enableval=$enable_fuzz_support; +else $as_nop + enable_fuzz_support=no +fi + + +# Handle --enable-diff-fuzz-support +# Check whether --enable-diff_fuzz_support was given. +if test ${enable_diff_fuzz_support+y} +then : + enableval=$enable_diff_fuzz_support; +else $as_nop + enable_diff_fuzz_support=no +fi + + # Handle --disable-stack-for-recursion # This option became obsolete at release 10.30. # Check whether --enable-stack-for-recursion was given. @@ -14228,6 +14738,12 @@ esac # Checks for header files. +ac_fn_c_check_header_compile "$LINENO" "assert.h" "ac_cv_header_assert_h" "$ac_includes_default" +if test "x$ac_cv_header_assert_h" = xyes +then : + printf "%s\n" "#define HAVE_ASSERT_H 1" >>confdefs.h + +fi ac_fn_c_check_header_compile "$LINENO" "limits.h" "ac_cv_header_limits_h" "$ac_includes_default" if test "x$ac_cv_header_limits_h" = xyes then : @@ -14299,14 +14815,6 @@ else WITH_PCRE2_32_FALSE= fi - if test "x$enable_debug" = "xyes"; then - WITH_DEBUG_TRUE= - WITH_DEBUG_FALSE='#' -else - WITH_DEBUG_TRUE='#' - WITH_DEBUG_FALSE= -fi - if test "x$enable_rebuild_chartables" = "xyes"; then WITH_REBUILD_CHARTABLES_TRUE= WITH_REBUILD_CHARTABLES_FALSE='#' @@ -14315,6 +14823,14 @@ else WITH_REBUILD_CHARTABLES_FALSE= fi + if test "x$enable_jit" = "xyes"; then + WITH_JIT_TRUE= + WITH_JIT_FALSE='#' +else + WITH_JIT_TRUE='#' + WITH_JIT_FALSE= +fi + if test "x$enable_unicode" = "xyes"; then WITH_UNICODE_TRUE= WITH_UNICODE_FALSE='#' @@ -14331,6 +14847,41 @@ else WITH_VALGRIND_FALSE= fi + if test "x$enable_fuzz_support" = "xyes"; then + WITH_FUZZ_SUPPORT_TRUE= + WITH_FUZZ_SUPPORT_FALSE='#' +else + WITH_FUZZ_SUPPORT_TRUE='#' + WITH_FUZZ_SUPPORT_FALSE= +fi + + if test "x$enable_diff_fuzz_support" = "xyes"; then + WITH_DIFF_FUZZ_SUPPORT_TRUE= + WITH_DIFF_FUZZ_SUPPORT_FALSE='#' +else + WITH_DIFF_FUZZ_SUPPORT_TRUE='#' + WITH_DIFF_FUZZ_SUPPORT_FALSE= +fi + + +if test "$enable_fuzz_support" = "yes" -a "$enable_pcre2_8" = "no"; then + echo "** ERROR: Fuzzer support requires the 8-bit library" + exit 1 +fi + +if test "$enable_diff_fuzz_support" = "yes"; then + if test "$enable_fuzz_support" = "no"; then + echo "** ERROR: Differential fuzzing support requires fuzzing support" + exit 1 + fi + if test "$enable_jit" = "no"; then + echo "** ERROR: Differential fuzzing support requires Just-in-Time compilation support" + exit 1 + fi + +printf "%s\n" "#define SUPPORT_DIFF_FUZZ /**/" >>confdefs.h + +fi # Checks for typedefs, structures, and compiler characteristics. @@ -14501,6 +15052,466 @@ fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext +# Check for the availability of libz (aka zlib) + + for ac_header in zlib.h +do : + ac_fn_c_check_header_compile "$LINENO" "zlib.h" "ac_cv_header_zlib_h" "$ac_includes_default" +if test "x$ac_cv_header_zlib_h" = xyes +then : + printf "%s\n" "#define HAVE_ZLIB_H 1" >>confdefs.h + HAVE_ZLIB_H=1 +fi + +done +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for gzopen in -lz" >&5 +printf %s "checking for gzopen in -lz... " >&6; } +if test ${ac_cv_lib_z_gzopen+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lz $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char gzopen (); +int +main (void) +{ +return gzopen (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_z_gzopen=yes +else $as_nop + ac_cv_lib_z_gzopen=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_z_gzopen" >&5 +printf "%s\n" "$ac_cv_lib_z_gzopen" >&6; } +if test "x$ac_cv_lib_z_gzopen" = xyes +then : + HAVE_LIBZ=1 +fi + + +# Check for the availability of libbz2. Originally we just used AC_CHECK_LIB, +# as for libz. However, this had the following problem, diagnosed and fixed by +# a user: +# +# - libbz2 uses the Pascal calling convention (WINAPI) for the functions +# under Win32. +# - The standard autoconf AC_CHECK_LIB fails to include "bzlib.h", +# therefore missing the function definition. +# - The compiler thus generates a "C" signature for the test function. +# - The linker fails to find the "C" function. +# - PCRE2 fails to configure if asked to do so against libbz2. +# +# Solution: +# +# - Replace the AC_CHECK_LIB test with a custom test. + + for ac_header in bzlib.h +do : + ac_fn_c_check_header_compile "$LINENO" "bzlib.h" "ac_cv_header_bzlib_h" "$ac_includes_default" +if test "x$ac_cv_header_bzlib_h" = xyes +then : + printf "%s\n" "#define HAVE_BZLIB_H 1" >>confdefs.h + HAVE_BZLIB_H=1 +fi + +done +# Original test +# AC_CHECK_LIB([bz2], [BZ2_bzopen], [HAVE_LIBBZ2=1]) +# +# Custom test follows + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for libbz2" >&5 +printf %s "checking for libbz2... " >&6; } +OLD_LIBS="$LIBS" +LIBS="$LIBS -lbz2" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#ifdef HAVE_BZLIB_H +#include +#endif +int +main (void) +{ +return (int)BZ2_bzopen("conftest", "rb"); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; };HAVE_LIBBZ2=1; break; +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS="$OLD_LIBS" + +# Check for the availabiity of libreadline + +if test "$enable_pcre2test_libreadline" = "yes"; then + for ac_header in readline/readline.h +do : + ac_fn_c_check_header_compile "$LINENO" "readline/readline.h" "ac_cv_header_readline_readline_h" "$ac_includes_default" +if test "x$ac_cv_header_readline_readline_h" = xyes +then : + printf "%s\n" "#define HAVE_READLINE_READLINE_H 1" >>confdefs.h + HAVE_READLINE_H=1 +fi + +done + for ac_header in readline/history.h +do : + ac_fn_c_check_header_compile "$LINENO" "readline/history.h" "ac_cv_header_readline_history_h" "$ac_includes_default" +if test "x$ac_cv_header_readline_history_h" = xyes +then : + printf "%s\n" "#define HAVE_READLINE_HISTORY_H 1" >>confdefs.h + HAVE_HISTORY_H=1 +fi + +done + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for readline in -lreadline" >&5 +printf %s "checking for readline in -lreadline... " >&6; } +if test ${ac_cv_lib_readline_readline+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lreadline $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char readline (); +int +main (void) +{ +return readline (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_readline_readline=yes +else $as_nop + ac_cv_lib_readline_readline=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_readline_readline" >&5 +printf "%s\n" "$ac_cv_lib_readline_readline" >&6; } +if test "x$ac_cv_lib_readline_readline" = xyes +then : + LIBREADLINE="-lreadline" +else $as_nop + unset ac_cv_lib_readline_readline; + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for readline in -lreadline" >&5 +printf %s "checking for readline in -lreadline... " >&6; } +if test ${ac_cv_lib_readline_readline+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lreadline -ltinfo $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char readline (); +int +main (void) +{ +return readline (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_readline_readline=yes +else $as_nop + ac_cv_lib_readline_readline=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_readline_readline" >&5 +printf "%s\n" "$ac_cv_lib_readline_readline" >&6; } +if test "x$ac_cv_lib_readline_readline" = xyes +then : + LIBREADLINE="-ltinfo" +else $as_nop + unset ac_cv_lib_readline_readline; + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for readline in -lreadline" >&5 +printf %s "checking for readline in -lreadline... " >&6; } +if test ${ac_cv_lib_readline_readline+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lreadline -lcurses $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char readline (); +int +main (void) +{ +return readline (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_readline_readline=yes +else $as_nop + ac_cv_lib_readline_readline=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_readline_readline" >&5 +printf "%s\n" "$ac_cv_lib_readline_readline" >&6; } +if test "x$ac_cv_lib_readline_readline" = xyes +then : + LIBREADLINE="-lcurses" +else $as_nop + unset ac_cv_lib_readline_readline; + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for readline in -lreadline" >&5 +printf %s "checking for readline in -lreadline... " >&6; } +if test ${ac_cv_lib_readline_readline+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lreadline -lncurses $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char readline (); +int +main (void) +{ +return readline (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_readline_readline=yes +else $as_nop + ac_cv_lib_readline_readline=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_readline_readline" >&5 +printf "%s\n" "$ac_cv_lib_readline_readline" >&6; } +if test "x$ac_cv_lib_readline_readline" = xyes +then : + LIBREADLINE="-lncurses" +else $as_nop + unset ac_cv_lib_readline_readline; + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for readline in -lreadline" >&5 +printf %s "checking for readline in -lreadline... " >&6; } +if test ${ac_cv_lib_readline_readline+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lreadline -lncursesw $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char readline (); +int +main (void) +{ +return readline (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_readline_readline=yes +else $as_nop + ac_cv_lib_readline_readline=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_readline_readline" >&5 +printf "%s\n" "$ac_cv_lib_readline_readline" >&6; } +if test "x$ac_cv_lib_readline_readline" = xyes +then : + LIBREADLINE="-lncursesw" +else $as_nop + unset ac_cv_lib_readline_readline; + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for readline in -lreadline" >&5 +printf %s "checking for readline in -lreadline... " >&6; } +if test ${ac_cv_lib_readline_readline+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lreadline -ltermcap $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char readline (); +int +main (void) +{ +return readline (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_readline_readline=yes +else $as_nop + ac_cv_lib_readline_readline=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_readline_readline" >&5 +printf "%s\n" "$ac_cv_lib_readline_readline" >&6; } +if test "x$ac_cv_lib_readline_readline" = xyes +then : + LIBREADLINE="-ltermcap" +else $as_nop + LIBREADLINE="" +fi + +fi + +fi + +fi + +fi + +fi + + + if test -n "$LIBREADLINE"; then + if test "$LIBREADLINE" != "-lreadline"; then + echo "-lreadline needs $LIBREADLINE" + LIBREADLINE="-lreadline $LIBREADLINE" + fi + fi +fi + +# Check for the availability of libedit. Different distributions put its +# headers in different places. Try to cover the most common ones. + +if test "$enable_pcre2test_libedit" = "yes"; then + for ac_header in editline/readline.h edit/readline/readline.h readline.h +do : + as_ac_Header=`printf "%s\n" "ac_cv_header_$ac_header" | $as_tr_sh` +ac_fn_c_check_header_compile "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default" +if eval test \"x\$"$as_ac_Header"\" = x"yes" +then : + cat >>confdefs.h <<_ACEOF +#define `printf "%s\n" "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + + HAVE_LIBEDIT_HEADER=1 + break + +fi + +done + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for readline in -ledit" >&5 +printf %s "checking for readline in -ledit... " >&6; } +if test ${ac_cv_lib_edit_readline+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-ledit $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char readline (); +int +main (void) +{ +return readline (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_edit_readline=yes +else $as_nop + ac_cv_lib_edit_readline=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_edit_readline" >&5 +printf "%s\n" "$ac_cv_lib_edit_readline" >&6; } +if test "x$ac_cv_lib_edit_readline" = xyes +then : + LIBEDIT="-ledit" +fi + +fi + PCRE2_STATIC_CFLAG="" if test "x$enable_shared" = "xno" ; then @@ -14550,12 +15561,513 @@ else enable_percent_zt=auto fi +# Unless running under Windows, JIT support requires pthreads. + +if test "$enable_jit" = "yes"; then + if test "$HAVE_WINDOWS_H" != "1"; then + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +ax_pthread_ok=no + +# We used to check for pthread.h first, but this fails if pthread.h +# requires special compiler flags (e.g. on True64 or Sequent). +# It gets checked for in the link test anyway. + +# First of all, check if the user has set any of the PTHREAD_LIBS, +# etcetera environment variables, and if threads linking works using +# them: +if test x"$PTHREAD_LIBS$PTHREAD_CFLAGS" != x; then + save_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS $PTHREAD_CFLAGS" + save_LIBS="$LIBS" + LIBS="$PTHREAD_LIBS $LIBS" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for pthread_join in LIBS=$PTHREAD_LIBS with CFLAGS=$PTHREAD_CFLAGS" >&5 +printf %s "checking for pthread_join in LIBS=$PTHREAD_LIBS with CFLAGS=$PTHREAD_CFLAGS... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char pthread_join (); +int +main (void) +{ +return pthread_join (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ax_pthread_ok=yes +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_pthread_ok" >&5 +printf "%s\n" "$ax_pthread_ok" >&6; } + if test x"$ax_pthread_ok" = xno; then + PTHREAD_LIBS="" + PTHREAD_CFLAGS="" + fi + LIBS="$save_LIBS" + CFLAGS="$save_CFLAGS" +fi + +# We must check for the threads library under a number of different +# names; the ordering is very important because some systems +# (e.g. DEC) have both -lpthread and -lpthreads, where one of the +# libraries is broken (non-POSIX). + +# Create a list of thread flags to try. Items starting with a "-" are +# C compiler flags, and other items are library names, except for "none" +# which indicates that we try without any flags at all, and "pthread-config" +# which is a program returning the flags for the Pth emulation library. + +ax_pthread_flags="pthreads none -Kthread -kthread lthread -pthread -pthreads -mthreads pthread --thread-safe -mt pthread-config" + +# The ordering *is* (sometimes) important. Some notes on the +# individual items follow: + +# pthreads: AIX (must check this before -lpthread) +# none: in case threads are in libc; should be tried before -Kthread and +# other compiler flags to prevent continual compiler warnings +# -Kthread: Sequent (threads in libc, but -Kthread needed for pthread.h) +# -kthread: FreeBSD kernel threads (preferred to -pthread since SMP-able) +# lthread: LinuxThreads port on FreeBSD (also preferred to -pthread) +# -pthread: Linux/gcc (kernel threads), BSD/gcc (userland threads) +# -pthreads: Solaris/gcc +# -mthreads: Mingw32/gcc, Lynx/gcc +# -mt: Sun Workshop C (may only link SunOS threads [-lthread], but it +# doesn't hurt to check since this sometimes defines pthreads too; +# also defines -D_REENTRANT) +# ... -mt is also the pthreads flag for HP/aCC +# pthread: Linux, etcetera +# --thread-safe: KAI C++ +# pthread-config: use pthread-config program (for GNU Pth library) + +case ${host_os} in + solaris*) + + # On Solaris (at least, for some versions), libc contains stubbed + # (non-functional) versions of the pthreads routines, so link-based + # tests will erroneously succeed. (We need to link with -pthreads/-mt/ + # -lpthread.) (The stubs are missing pthread_cleanup_push, or rather + # a function called by this macro, so we could check for that, but + # who knows whether they'll stub that too in a future libc.) So, + # we'll just look for -pthreads and -lpthread first: + + ax_pthread_flags="-pthreads pthread -mt -pthread $ax_pthread_flags" + ;; + + darwin*) + ax_pthread_flags="-pthread $ax_pthread_flags" + ;; +esac + +if test x"$ax_pthread_ok" = xno; then +for flag in $ax_pthread_flags; do + + case $flag in + none) + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether pthreads work without any flags" >&5 +printf %s "checking whether pthreads work without any flags... " >&6; } + ;; + + -*) + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether pthreads work with $flag" >&5 +printf %s "checking whether pthreads work with $flag... " >&6; } + PTHREAD_CFLAGS="$flag" + ;; + + pthread-config) + # Extract the first word of "pthread-config", so it can be a program name with args. +set dummy pthread-config; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ax_pthread_config+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$ax_pthread_config"; then + ac_cv_prog_ax_pthread_config="$ax_pthread_config" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ax_pthread_config="yes" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + test -z "$ac_cv_prog_ax_pthread_config" && ac_cv_prog_ax_pthread_config="no" +fi +fi +ax_pthread_config=$ac_cv_prog_ax_pthread_config +if test -n "$ax_pthread_config"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_pthread_config" >&5 +printf "%s\n" "$ax_pthread_config" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + if test x"$ax_pthread_config" = xno; then continue; fi + PTHREAD_CFLAGS="`pthread-config --cflags`" + PTHREAD_LIBS="`pthread-config --ldflags` `pthread-config --libs`" + ;; + + *) + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for the pthreads library -l$flag" >&5 +printf %s "checking for the pthreads library -l$flag... " >&6; } + PTHREAD_LIBS="-l$flag" + ;; + esac + + save_LIBS="$LIBS" + save_CFLAGS="$CFLAGS" + LIBS="$PTHREAD_LIBS $LIBS" + CFLAGS="$CFLAGS $PTHREAD_CFLAGS" + + # Check for various functions. We must include pthread.h, + # since some functions may be macros. (On the Sequent, we + # need a special flag -Kthread to make this header compile.) + # We check for pthread_join because it is in -lpthread on IRIX + # while pthread_create is in libc. We check for pthread_attr_init + # due to DEC craziness with -lpthreads. We check for + # pthread_cleanup_push because it is one of the few pthread + # functions on Solaris that doesn't have a non-functional libc stub. + # We try pthread_create on general principles. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + static void routine(void *a) { a = 0; } + static void *start_routine(void *a) { return a; } +int +main (void) +{ +pthread_t th; pthread_attr_t attr; + pthread_create(&th, 0, start_routine, 0); + pthread_join(th, 0); + pthread_attr_init(&attr); + pthread_cleanup_push(routine, 0); + pthread_cleanup_pop(0) /* ; */ + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ax_pthread_ok=yes +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + + LIBS="$save_LIBS" + CFLAGS="$save_CFLAGS" + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_pthread_ok" >&5 +printf "%s\n" "$ax_pthread_ok" >&6; } + if test "x$ax_pthread_ok" = xyes; then + break; + fi + + PTHREAD_LIBS="" + PTHREAD_CFLAGS="" +done +fi + +# Various other checks: +if test "x$ax_pthread_ok" = xyes; then + save_LIBS="$LIBS" + LIBS="$PTHREAD_LIBS $LIBS" + save_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS $PTHREAD_CFLAGS" + + # Detect AIX lossage: JOINABLE attribute is called UNDETACHED. + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for joinable pthread attribute" >&5 +printf %s "checking for joinable pthread attribute... " >&6; } + attr_name=unknown + for attr in PTHREAD_CREATE_JOINABLE PTHREAD_CREATE_UNDETACHED; do + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main (void) +{ +int attr = $attr; return attr /* ; */ + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + attr_name=$attr; break +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + done + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $attr_name" >&5 +printf "%s\n" "$attr_name" >&6; } + if test "$attr_name" != PTHREAD_CREATE_JOINABLE; then + +printf "%s\n" "#define PTHREAD_CREATE_JOINABLE $attr_name" >>confdefs.h + + fi + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if more special flags are required for pthreads" >&5 +printf %s "checking if more special flags are required for pthreads... " >&6; } + flag=no + case ${host_os} in + aix* | freebsd* | darwin*) flag="-D_THREAD_SAFE";; + osf* | hpux*) flag="-D_REENTRANT";; + solaris*) + if test "$GCC" = "yes"; then + flag="-D_REENTRANT" + else + flag="-mt -D_REENTRANT" + fi + ;; + esac + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: ${flag}" >&5 +printf "%s\n" "${flag}" >&6; } + if test "x$flag" != xno; then + PTHREAD_CFLAGS="$flag $PTHREAD_CFLAGS" + fi + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for PTHREAD_PRIO_INHERIT" >&5 +printf %s "checking for PTHREAD_PRIO_INHERIT... " >&6; } +if test ${ax_cv_PTHREAD_PRIO_INHERIT+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + #include +int +main (void) +{ +int i = PTHREAD_PRIO_INHERIT; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ax_cv_PTHREAD_PRIO_INHERIT=yes +else $as_nop + ax_cv_PTHREAD_PRIO_INHERIT=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_PTHREAD_PRIO_INHERIT" >&5 +printf "%s\n" "$ax_cv_PTHREAD_PRIO_INHERIT" >&6; } + if test "x$ax_cv_PTHREAD_PRIO_INHERIT" = "xyes" +then : + +printf "%s\n" "#define HAVE_PTHREAD_PRIO_INHERIT 1" >>confdefs.h + +fi + + LIBS="$save_LIBS" + CFLAGS="$save_CFLAGS" + + # More AIX lossage: must compile with xlc_r or cc_r + if test x"$GCC" != xyes; then + for ac_prog in xlc_r cc_r +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_PTHREAD_CC+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$PTHREAD_CC"; then + ac_cv_prog_PTHREAD_CC="$PTHREAD_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_PTHREAD_CC="$ac_prog" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +PTHREAD_CC=$ac_cv_prog_PTHREAD_CC +if test -n "$PTHREAD_CC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $PTHREAD_CC" >&5 +printf "%s\n" "$PTHREAD_CC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + test -n "$PTHREAD_CC" && break +done +test -n "$PTHREAD_CC" || PTHREAD_CC="${CC}" + + else + PTHREAD_CC=$CC + fi +else + PTHREAD_CC="$CC" +fi + + + + + +# Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND: +if test x"$ax_pthread_ok" = xyes; then + +printf "%s\n" "#define HAVE_PTHREAD 1" >>confdefs.h + + : +else + ax_pthread_ok=no + as_fn_error $? "JIT support requires pthreads" "$LINENO" 5 +fi +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + CC="$PTHREAD_CC" + CFLAGS="$PTHREAD_CFLAGS $CFLAGS" + LIBS="$PTHREAD_LIBS $LIBS" + fi + +printf "%s\n" "#define SUPPORT_JIT /**/" >>confdefs.h + +else + enable_pcre2grep_jit="no" +fi + +if test "$enable_jit_sealloc" = "yes"; then + +printf "%s\n" "#define SLJIT_PROT_EXECUTABLE_ALLOCATOR 1" >>confdefs.h + +fi + +if test "$enable_pcre2grep_jit" = "yes"; then + +printf "%s\n" "#define SUPPORT_PCRE2GREP_JIT /**/" >>confdefs.h + +fi + +if test "$enable_pcre2grep_callout" = "yes"; then + if test "$enable_pcre2grep_callout_fork" = "yes"; then + if test "$HAVE_WINDOWS_H" != "1"; then + if test "$HAVE_SYS_WAIT_H" != "1"; then + as_fn_error $? "Callout script support needs sys/wait.h." "$LINENO" 5 + fi + fi + +printf "%s\n" "#define SUPPORT_PCRE2GREP_CALLOUT_FORK /**/" >>confdefs.h + + fi + +printf "%s\n" "#define SUPPORT_PCRE2GREP_CALLOUT /**/" >>confdefs.h + +else + enable_pcre2grep_callout_fork="no" +fi + if test "$enable_unicode" = "yes"; then printf "%s\n" "#define SUPPORT_UNICODE /**/" >>confdefs.h fi +if test "$enable_pcre2grep_libz" = "yes"; then + +printf "%s\n" "#define SUPPORT_LIBZ /**/" >>confdefs.h + +fi + +if test "$enable_pcre2grep_libbz2" = "yes"; then + +printf "%s\n" "#define SUPPORT_LIBBZ2 /**/" >>confdefs.h + +fi + +if test $with_pcre2grep_bufsize -lt 8192 ; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: $with_pcre2grep_bufsize is too small for --with-pcre2grep-bufsize; using 8192" >&5 +printf "%s\n" "$as_me: WARNING: $with_pcre2grep_bufsize is too small for --with-pcre2grep-bufsize; using 8192" >&2;} + with_pcre2grep_bufsize="8192" +else + if test $? -gt 1 ; then + as_fn_error $? "Bad value for --with-pcre2grep-bufsize" "$LINENO" 5 + fi +fi + +if test $with_pcre2grep_max_bufsize -lt $with_pcre2grep_bufsize ; then + with_pcre2grep_max_bufsize="$with_pcre2grep_bufsize" +else + if test $? -gt 1 ; then + as_fn_error $? "Bad value for --with-pcre2grep-max-bufsize" "$LINENO" 5 + fi +fi + + +printf "%s\n" "#define PCRE2GREP_BUFSIZE $with_pcre2grep_bufsize" >>confdefs.h + + + +printf "%s\n" "#define PCRE2GREP_MAX_BUFSIZE $with_pcre2grep_max_bufsize" >>confdefs.h + + +if test "$enable_pcre2test_libedit" = "yes"; then + +printf "%s\n" "#define SUPPORT_LIBEDIT /**/" >>confdefs.h + + LIBREADLINE="$LIBEDIT" +elif test "$enable_pcre2test_libreadline" = "yes"; then + +printf "%s\n" "#define SUPPORT_LIBREADLINE /**/" >>confdefs.h + +fi + printf "%s\n" "#define NEWLINE_DEFAULT $ac_pcre2_newline_value" >>confdefs.h @@ -14612,7 +16124,7 @@ printf "%s\n" "#define HEAP_LIMIT $with_heap_limit" >>confdefs.h -printf "%s\n" "#define MAX_NAME_SIZE 32" >>confdefs.h +printf "%s\n" "#define MAX_NAME_SIZE 128" >>confdefs.h @@ -14655,16 +16167,16 @@ esac # are m4 variables, assigned above. EXTRA_LIBPCRE2_8_LDFLAGS="$EXTRA_LIBPCRE2_8_LDFLAGS \ - $NO_UNDEFINED -version-info 12:0:12" + $NO_UNDEFINED -version-info 14:0:14" EXTRA_LIBPCRE2_16_LDFLAGS="$EXTRA_LIBPCRE2_16_LDFLAGS \ - $NO_UNDEFINED -version-info 12:0:12" + $NO_UNDEFINED -version-info 14:0:14" EXTRA_LIBPCRE2_32_LDFLAGS="$EXTRA_LIBPCRE2_32_LDFLAGS \ - $NO_UNDEFINED -version-info 12:0:12" + $NO_UNDEFINED -version-info 14:0:14" EXTRA_LIBPCRE2_POSIX_LDFLAGS="$EXTRA_LIBPCRE2_POSIX_LDFLAGS \ - $NO_UNDEFINED -version-info 3:5:0" + $NO_UNDEFINED -version-info 3:6:0" @@ -14676,10 +16188,292 @@ EXTRA_LIBPCRE2_POSIX_LDFLAGS="$EXTRA_LIBPCRE2_POSIX_LDFLAGS \ DISTCHECK_CONFIGURE_FLAGS="CFLAGS='' CXXFLAGS='' --enable-pcre2-16 --enable-pcre2-32 --enable-jit" +# Check that, if --enable-pcre2grep-libz or --enable-pcre2grep-libbz2 is +# specified, the relevant library is available. + +if test "$enable_pcre2grep_libz" = "yes"; then + if test "$HAVE_ZLIB_H" != "1"; then + echo "** Cannot --enable-pcre2grep-libz because zlib.h was not found" + exit 1 + fi + if test "$HAVE_LIBZ" != "1"; then + echo "** Cannot --enable-pcre2grep-libz because libz was not found" + exit 1 + fi + LIBZ="-lz" +fi + + +if test "$enable_pcre2grep_libbz2" = "yes"; then + if test "$HAVE_BZLIB_H" != "1"; then + echo "** Cannot --enable-pcre2grep-libbz2 because bzlib.h was not found" + exit 1 + fi + if test "$HAVE_LIBBZ2" != "1"; then + echo "** Cannot --enable-pcre2grep-libbz2 because libbz2 was not found" + exit 1 + fi + LIBBZ2="-lbz2" +fi + + +# Similarly for --enable-pcre2test-readline + +if test "$enable_pcre2test_libedit" = "yes"; then + if test "$enable_pcre2test_libreadline" = "yes"; then + echo "** Cannot use both --enable-pcre2test-libedit and --enable-pcre2test-readline" + exit 1 + fi + if test -z "$HAVE_LIBEDIT_HEADER"; then + echo "** Cannot --enable-pcre2test-libedit because neither editline/readline.h," + echo "** edit/readline/readline.h nor a compatible header was found." + exit 1 + fi + if test -z "$LIBEDIT"; then + echo "** Cannot --enable-pcre2test-libedit because libedit library was not found." + exit 1 + fi +fi + +if test "$enable_pcre2test_libreadline" = "yes"; then + if test "$HAVE_READLINE_H" != "1"; then + echo "** Cannot --enable-pcre2test-readline because readline/readline.h was not found." + exit 1 + fi + if test "$HAVE_HISTORY_H" != "1"; then + echo "** Cannot --enable-pcre2test-readline because readline/history.h was not found." + exit 1 + fi + if test -z "$LIBREADLINE"; then + echo "** Cannot --enable-pcre2test-readline because readline library was not found." + exit 1 + fi +fi + # Handle valgrind support if test "$enable_valgrind" = "yes"; then - as_fn_error $? "pkg-config not supported" "$LINENO" 5 + + + + + + + +if test "x$ac_cv_env_PKG_CONFIG_set" != "xset"; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}pkg-config", so it can be a program name with args. +set dummy ${ac_tool_prefix}pkg-config; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_path_PKG_CONFIG+y} +then : + printf %s "(cached) " >&6 +else $as_nop + case $PKG_CONFIG in + [\\/]* | ?:[\\/]*) + ac_cv_path_PKG_CONFIG="$PKG_CONFIG" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_path_PKG_CONFIG="$as_dir$ac_word$ac_exec_ext" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +PKG_CONFIG=$ac_cv_path_PKG_CONFIG +if test -n "$PKG_CONFIG"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $PKG_CONFIG" >&5 +printf "%s\n" "$PKG_CONFIG" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_path_PKG_CONFIG"; then + ac_pt_PKG_CONFIG=$PKG_CONFIG + # Extract the first word of "pkg-config", so it can be a program name with args. +set dummy pkg-config; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_path_ac_pt_PKG_CONFIG+y} +then : + printf %s "(cached) " >&6 +else $as_nop + case $ac_pt_PKG_CONFIG in + [\\/]* | ?:[\\/]*) + ac_cv_path_ac_pt_PKG_CONFIG="$ac_pt_PKG_CONFIG" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_path_ac_pt_PKG_CONFIG="$as_dir$ac_word$ac_exec_ext" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +ac_pt_PKG_CONFIG=$ac_cv_path_ac_pt_PKG_CONFIG +if test -n "$ac_pt_PKG_CONFIG"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_pt_PKG_CONFIG" >&5 +printf "%s\n" "$ac_pt_PKG_CONFIG" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_pt_PKG_CONFIG" = x; then + PKG_CONFIG="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + PKG_CONFIG=$ac_pt_PKG_CONFIG + fi +else + PKG_CONFIG="$ac_cv_path_PKG_CONFIG" +fi + +fi +if test -n "$PKG_CONFIG"; then + _pkg_min_version=0.9.0 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking pkg-config is at least version $_pkg_min_version" >&5 +printf %s "checking pkg-config is at least version $_pkg_min_version... " >&6; } + if $PKG_CONFIG --atleast-pkgconfig-version $_pkg_min_version; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + PKG_CONFIG="" + fi +fi + +pkg_failed=no +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for valgrind" >&5 +printf %s "checking for valgrind... " >&6; } + +if test -n "$VALGRIND_CFLAGS"; then + pkg_cv_VALGRIND_CFLAGS="$VALGRIND_CFLAGS" + elif test -n "$PKG_CONFIG"; then + if test -n "$PKG_CONFIG" && \ + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"valgrind\""; } >&5 + ($PKG_CONFIG --exists --print-errors "valgrind") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_VALGRIND_CFLAGS=`$PKG_CONFIG --cflags "valgrind" 2>/dev/null` + test "x$?" != "x0" && pkg_failed=yes +else + pkg_failed=yes +fi + else + pkg_failed=untried +fi +if test -n "$VALGRIND_LIBS"; then + pkg_cv_VALGRIND_LIBS="$VALGRIND_LIBS" + elif test -n "$PKG_CONFIG"; then + if test -n "$PKG_CONFIG" && \ + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"valgrind\""; } >&5 + ($PKG_CONFIG --exists --print-errors "valgrind") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_VALGRIND_LIBS=`$PKG_CONFIG --libs "valgrind" 2>/dev/null` + test "x$?" != "x0" && pkg_failed=yes +else + pkg_failed=yes +fi + else + pkg_failed=untried +fi + + + +if test $pkg_failed = yes; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + +if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then + _pkg_short_errors_supported=yes +else + _pkg_short_errors_supported=no +fi + if test $_pkg_short_errors_supported = yes; then + VALGRIND_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "valgrind" 2>&1` + else + VALGRIND_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "valgrind" 2>&1` + fi + # Put the nasty error message in config.log where it belongs + echo "$VALGRIND_PKG_ERRORS" >&5 + + as_fn_error $? "Package requirements (valgrind) were not met: + +$VALGRIND_PKG_ERRORS + +Consider adjusting the PKG_CONFIG_PATH environment variable if you +installed software in a non-standard prefix. + +Alternatively, you may set the environment variables VALGRIND_CFLAGS +and VALGRIND_LIBS to avoid the need to call pkg-config. +See the pkg-config man page for more details." "$LINENO" 5 +elif test $pkg_failed = untried; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "The pkg-config script could not be found or is too old. Make sure it +is in your PATH or set the PKG_CONFIG environment variable to the full +path to pkg-config. + +Alternatively, you may set the environment variables VALGRIND_CFLAGS +and VALGRIND_LIBS to avoid the need to call pkg-config. +See the pkg-config man page for more details. + +To get pkg-config, see . +See \`config.log' for more details" "$LINENO" 5; } +else + VALGRIND_CFLAGS=$pkg_cv_VALGRIND_CFLAGS + VALGRIND_LIBS=$pkg_cv_VALGRIND_LIBS + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +fi fi # Handle code coverage reporting support @@ -15050,6 +16844,10 @@ else am__EXEEXT_FALSE= fi +if test -z "${MAINTAINER_MODE_TRUE}" && test -z "${MAINTAINER_MODE_FALSE}"; then + as_fn_error $? "conditional \"MAINTAINER_MODE\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi if test -z "${AMDEP_TRUE}" && test -z "${AMDEP_FALSE}"; then as_fn_error $? "conditional \"AMDEP\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 @@ -15070,14 +16868,14 @@ if test -z "${WITH_PCRE2_32_TRUE}" && test -z "${WITH_PCRE2_32_FALSE}"; then as_fn_error $? "conditional \"WITH_PCRE2_32\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi -if test -z "${WITH_DEBUG_TRUE}" && test -z "${WITH_DEBUG_FALSE}"; then - as_fn_error $? "conditional \"WITH_DEBUG\" was never defined. -Usually this means the macro was only invoked conditionally." "$LINENO" 5 -fi if test -z "${WITH_REBUILD_CHARTABLES_TRUE}" && test -z "${WITH_REBUILD_CHARTABLES_FALSE}"; then as_fn_error $? "conditional \"WITH_REBUILD_CHARTABLES\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi +if test -z "${WITH_JIT_TRUE}" && test -z "${WITH_JIT_FALSE}"; then + as_fn_error $? "conditional \"WITH_JIT\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi if test -z "${WITH_UNICODE_TRUE}" && test -z "${WITH_UNICODE_FALSE}"; then as_fn_error $? "conditional \"WITH_UNICODE\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 @@ -15086,6 +16884,14 @@ if test -z "${WITH_VALGRIND_TRUE}" && test -z "${WITH_VALGRIND_FALSE}"; then as_fn_error $? "conditional \"WITH_VALGRIND\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi +if test -z "${WITH_FUZZ_SUPPORT_TRUE}" && test -z "${WITH_FUZZ_SUPPORT_FALSE}"; then + as_fn_error $? "conditional \"WITH_FUZZ_SUPPORT\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${WITH_DIFF_FUZZ_SUPPORT_TRUE}" && test -z "${WITH_DIFF_FUZZ_SUPPORT_FALSE}"; then + as_fn_error $? "conditional \"WITH_DIFF_FUZZ_SUPPORT\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi if test -z "${WITH_GCOV_TRUE}" && test -z "${WITH_GCOV_FALSE}"; then as_fn_error $? "conditional \"WITH_GCOV\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 @@ -15480,7 +17286,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by PCRE2 $as_me 10.43, which was +This file was extended by PCRE2 $as_me 10.45, which was generated by GNU Autoconf 2.71. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -15548,7 +17354,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\ cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config='$ac_cs_config_escaped' ac_cs_version="\\ -PCRE2 config.status 10.43 +PCRE2 config.status 10.45 configured by $0, generated by GNU Autoconf 2.71, with options \\"\$ac_cs_config\\" @@ -15714,12 +17520,14 @@ lt_cv_to_host_file_cmd='`$ECHO "$lt_cv_to_host_file_cmd" | $SED "$delay_single_q lt_cv_to_tool_file_cmd='`$ECHO "$lt_cv_to_tool_file_cmd" | $SED "$delay_single_quote_subst"`' reload_flag='`$ECHO "$reload_flag" | $SED "$delay_single_quote_subst"`' reload_cmds='`$ECHO "$reload_cmds" | $SED "$delay_single_quote_subst"`' +FILECMD='`$ECHO "$FILECMD" | $SED "$delay_single_quote_subst"`' deplibs_check_method='`$ECHO "$deplibs_check_method" | $SED "$delay_single_quote_subst"`' file_magic_cmd='`$ECHO "$file_magic_cmd" | $SED "$delay_single_quote_subst"`' file_magic_glob='`$ECHO "$file_magic_glob" | $SED "$delay_single_quote_subst"`' want_nocaseglob='`$ECHO "$want_nocaseglob" | $SED "$delay_single_quote_subst"`' sharedlib_from_linklib_cmd='`$ECHO "$sharedlib_from_linklib_cmd" | $SED "$delay_single_quote_subst"`' AR='`$ECHO "$AR" | $SED "$delay_single_quote_subst"`' +lt_ar_flags='`$ECHO "$lt_ar_flags" | $SED "$delay_single_quote_subst"`' AR_FLAGS='`$ECHO "$AR_FLAGS" | $SED "$delay_single_quote_subst"`' archiver_list_spec='`$ECHO "$archiver_list_spec" | $SED "$delay_single_quote_subst"`' STRIP='`$ECHO "$STRIP" | $SED "$delay_single_quote_subst"`' @@ -15843,13 +17651,13 @@ LN_S \ lt_SP2NL \ lt_NL2SP \ reload_flag \ +FILECMD \ deplibs_check_method \ file_magic_cmd \ file_magic_glob \ want_nocaseglob \ sharedlib_from_linklib_cmd \ AR \ -AR_FLAGS \ archiver_list_spec \ STRIP \ RANLIB \ @@ -16676,7 +18484,6 @@ See \`config.log' for more details" "$LINENO" 5; } cat <<_LT_EOF >> "$cfgfile" #! $SHELL # Generated automatically by $as_me ($PACKAGE) $VERSION -# Libtool was configured on host `(hostname || uname -n) 2>/dev/null | sed 1q`: # NOTE: Changes made to this file will be lost: look at ltmain.sh. # Provide generalized library-building support services. @@ -16805,6 +18612,9 @@ to_host_file_cmd=$lt_cv_to_host_file_cmd # convert \$build files to toolchain format. to_tool_file_cmd=$lt_cv_to_tool_file_cmd +# A file(cmd) program that detects file types. +FILECMD=$lt_FILECMD + # Method to check whether dependent libraries are shared objects. deplibs_check_method=$lt_deplibs_check_method @@ -16823,8 +18633,11 @@ sharedlib_from_linklib_cmd=$lt_sharedlib_from_linklib_cmd # The archiver. AR=$lt_AR +# Flags to create an archive (by configure). +lt_ar_flags=$lt_ar_flags + # Flags to create an archive. -AR_FLAGS=$lt_AR_FLAGS +AR_FLAGS=\${ARFLAGS-"\$lt_ar_flags"} # How to feed a file listing to the archiver. archiver_list_spec=$lt_archiver_list_spec @@ -17200,7 +19013,7 @@ ltmain=$ac_aux_dir/ltmain.sh # if finds mixed CR/LF and LF-only lines. Since sed operates in # text mode, it properly converts lines to CR/LF. This bash problem # is reportedly fixed, but why not run on old versions too? - sed '$q' "$ltmain" >> "$cfgfile" \ + $SED '$q' "$ltmain" >> "$cfgfile" \ || (rm -f "$cfgfile"; exit 1) mv -f "$cfgfile" "$ofile" || @@ -17285,6 +19098,8 @@ $PACKAGE-$VERSION configuration summary: Build 16-bit pcre2 library ......... : ${enable_pcre2_16} Build 32-bit pcre2 library ......... : ${enable_pcre2_32} Include debugging code ............. : ${enable_debug} + Enable JIT compiling support ....... : ${enable_jit} + Use SELinux allocator in JIT ....... : ${enable_jit_sealloc} Enable Unicode support ............. : ${enable_unicode} Newline char/sequence .............. : ${enable_newline} \R matches only ANYCRLF ............ : ${enable_bsr_anycrlf} @@ -17300,8 +19115,19 @@ $PACKAGE-$VERSION configuration summary: Match depth limit .................. : ${with_match_limit_depth} Build shared libs .................. : ${enable_shared} Build static libs .................. : ${enable_static} + Use JIT in pcre2grep ............... : ${enable_pcre2grep_jit} + Enable callouts in pcre2grep ....... : ${enable_pcre2grep_callout} + Enable fork in pcre2grep callouts .. : ${enable_pcre2grep_callout_fork} + Initial buffer size for pcre2grep .. : ${with_pcre2grep_bufsize} + Maximum buffer size for pcre2grep .. : ${with_pcre2grep_max_bufsize} + Link pcre2grep with libz ........... : ${enable_pcre2grep_libz} + Link pcre2grep with libbz2 ......... : ${enable_pcre2grep_libbz2} + Link pcre2test with libedit ........ : ${enable_pcre2test_libedit} + Link pcre2test with libreadline .... : ${enable_pcre2test_libreadline} Valgrind support ................... : ${enable_valgrind} Code coverage ...................... : ${enable_coverage} + Fuzzer support ..................... : ${enable_fuzz_support} + Differential fuzzer support ........ : ${enable_diff_fuzz_support} Use %zu and %td .................... : ${enable_percent_zt} EOF diff --git a/libpcre/configure.ac b/libpcre/configure.ac index 3c5493714..2b0d7198c 100644 --- a/libpcre/configure.ac +++ b/libpcre/configure.ac @@ -9,23 +9,32 @@ dnl The PCRE2_PRERELEASE feature is for identifying release candidates. It might dnl be defined as -RC2, for example. For real releases, it should be empty. m4_define(pcre2_major, [10]) -m4_define(pcre2_minor, [43]) +m4_define(pcre2_minor, [45]) m4_define(pcre2_prerelease, []) -m4_define(pcre2_date, [2024-02-16]) +m4_define(pcre2_date, [2025-02-05]) # Libtool shared library interface versions (current:revision:age) -m4_define(libpcre2_8_version, [12:0:12]) -m4_define(libpcre2_16_version, [12:0:12]) -m4_define(libpcre2_32_version, [12:0:12]) -m4_define(libpcre2_posix_version, [3:5:0]) +m4_define(libpcre2_8_version, [14:0:14]) +m4_define(libpcre2_16_version, [14:0:14]) +m4_define(libpcre2_32_version, [14:0:14]) +m4_define(libpcre2_posix_version, [3:6:0]) # NOTE: The CMakeLists.txt file searches for the above variables in the first # 50 lines of this file. Please update that if the variables above are moved. -AC_PREREQ([2.62]) +AC_PREREQ([2.60]) AC_INIT([PCRE2],pcre2_major.pcre2_minor[]pcre2_prerelease,[],[pcre2]) AC_CONFIG_SRCDIR([src/pcre2.h.in]) -AM_INIT_AUTOMAKE([foreign]) +AM_INIT_AUTOMAKE([dist-bzip2 dist-zip foreign]) +ifelse(pcre2_prerelease, [-DEV], + [dnl For development builds, ./configure is not checked in to Git, so we are + dnl happy to have it regenerated as needed. + AM_MAINTAINER_MODE([enable])], + [dnl For a release build (or RC), the ./configure script we ship in the + dnl tarball (and check in to the Git tag) should not be regenerated + dnl implicitly. This is important if users want to check out a release tag + dnl using Git. + AM_MAINTAINER_MODE]) m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) AC_CONFIG_HEADERS(src/config.h) @@ -73,6 +82,40 @@ AC_SYS_LARGEFILE PCRE2_VISIBILITY +# Check for Clang __attribute__((uninitialized)) feature + +AC_MSG_CHECKING([for __attribute__((uninitialized))]) +AC_LANG_PUSH([C]) +tmp_CFLAGS=$CFLAGS +if test $WORKING_WERROR -eq 1; then + CFLAGS="$CFLAGS -Werror" +fi +AC_COMPILE_IFELSE([AC_LANG_PROGRAM(, + [[char buf[128] __attribute__((uninitialized));(void)buf]])], + [pcre2_cc_cv_attribute_uninitialized=yes], + [pcre2_cc_cv_attribute_uninitialized=no]) +AC_MSG_RESULT([$pcre2_cc_cv_attribute_uninitialized]) +if test "$pcre2_cc_cv_attribute_uninitialized" = yes; then + AC_DEFINE([HAVE_ATTRIBUTE_UNINITIALIZED], 1, [Define this if your compiler + supports __attribute__((uninitialized))]) +fi +CFLAGS=$tmp_CFLAGS +AC_LANG_POP([C]) + +# Check for the assume() builtin + +AC_MSG_CHECKING([for __assume()]) +AC_LANG_PUSH([C]) +AC_LINK_IFELSE([AC_LANG_PROGRAM([[]], [[__assume(1)]])], + [pcre2_cc_cv_builtin_assume=yes], + [pcre2_cc_cv_builtin_assume=no]) +AC_MSG_RESULT([$pcre2_cc_cv_builtin_assume]) +if test "$pcre2_cc_cv_builtin_assume" = yes; then + AC_DEFINE([HAVE_BUILTIN_ASSUME], 1, + [Define this if your compiler provides __assume()]) +fi +AC_LANG_POP([C]) + # Check for the mul_overflow() builtin AC_MSG_CHECKING([for __builtin_mul_overflow()]) @@ -95,22 +138,18 @@ if test "$pcre2_cc_cv_builtin_mul_overflow" = yes; then fi AC_LANG_POP([C]) -# Check for Clang __attribute__((uninitialized)) feature +# Check for the unreachable() builtin -AC_MSG_CHECKING([for __attribute__((uninitialized))]) +AC_MSG_CHECKING([for __builtin_unreachable()]) AC_LANG_PUSH([C]) -tmp_CFLAGS=$CFLAGS -CFLAGS="$CFLAGS -Werror" -AC_COMPILE_IFELSE([AC_LANG_PROGRAM(, - [[char buf[128] __attribute__((uninitialized));(void)buf]])], - [pcre2_cc_cv_attribute_uninitialized=yes], - [pcre2_cc_cv_attribute_uninitialized=no]) -AC_MSG_RESULT([$pcre2_cc_cv_attribute_uninitialized]) -if test "$pcre2_cc_cv_attribute_uninitialized" = yes; then - AC_DEFINE([HAVE_ATTRIBUTE_UNINITIALIZED], 1, [Define this if your compiler - supports __attribute__((uninitialized))]) +AC_LINK_IFELSE([AC_LANG_PROGRAM([[int r;]], [[if (r) __builtin_unreachable()]])], + [pcre2_cc_cv_builtin_unreachable=yes], + [pcre2_cc_cv_builtin_unreachable=no]) +AC_MSG_RESULT([$pcre2_cc_cv_builtin_unreachable]) +if test "$pcre2_cc_cv_builtin_unreachable" = yes; then + AC_DEFINE([HAVE_BUILTIN_UNREACHABLE], 1, + [Define this if your compiler provides __builtin_unreachable()]) fi -CFLAGS=$tmp_CFLAGS AC_LANG_POP([C]) # Versioning @@ -178,6 +217,58 @@ AC_ARG_ENABLE(debug, [enable debugging code]), , enable_debug=no) +# Handle --enable-jit (disabled by default) +AC_ARG_ENABLE(jit, + AS_HELP_STRING([--enable-jit], + [enable Just-In-Time compiling support]), + , enable_jit=no) + +# This code enables JIT if the hardware supports it. +if test "$enable_jit" = "auto"; then + AC_LANG(C) + SAVE_CPPFLAGS=$CPPFLAGS + CPPFLAGS=-I$srcdir + AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ + #define SLJIT_CONFIG_AUTO 1 + #include "deps/sljit/sljit_src/sljitConfigCPU.h" + #if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) + #error unsupported + #endif]])], enable_jit=yes, enable_jit=no) + CPPFLAGS=$SAVE_CPPFLAGS + echo checking for JIT support on this hardware... $enable_jit +fi + +# Handle --enable-jit-sealloc (disabled by default and only experimental) +case $host_os in + linux* | netbsd*) + AC_ARG_ENABLE(jit-sealloc, + AS_HELP_STRING([--enable-jit-sealloc], + [enable SELinux compatible execmem allocator in JIT (experimental)]), + ,enable_jit_sealloc=no) + ;; + *) + enable_jit_sealloc=unsupported + ;; +esac + +# Handle --disable-pcre2grep-jit (enabled by default) +AC_ARG_ENABLE(pcre2grep-jit, + AS_HELP_STRING([--disable-pcre2grep-jit], + [disable JIT support in pcre2grep]), + , enable_pcre2grep_jit=yes) + +# Handle --disable-pcre2grep-callout (enabled by default) +AC_ARG_ENABLE(pcre2grep-callout, + AS_HELP_STRING([--disable-pcre2grep-callout], + [disable callout script support in pcre2grep]), + , enable_pcre2grep_callout=yes) + +# Handle --disable-pcre2grep-callout-fork (enabled by default) +AC_ARG_ENABLE(pcre2grep-callout-fork, + AS_HELP_STRING([--disable-pcre2grep-callout-fork], + [disable callout script fork support in pcre2grep]), + , enable_pcre2grep_callout_fork=yes) + # Handle --enable-rebuild-chartables AC_ARG_ENABLE(rebuild-chartables, AS_HELP_STRING([--enable-rebuild-chartables], @@ -188,7 +279,7 @@ AC_ARG_ENABLE(rebuild-chartables, AC_ARG_ENABLE(unicode, AS_HELP_STRING([--disable-unicode], [disable Unicode support]), - , enable_unicode=unset) + , enable_unicode=no) # Handle newline options ac_pcre2_newline=lf @@ -233,7 +324,7 @@ AC_ARG_ENABLE(never-backslash-C, # Handle --enable-ebcdic AC_ARG_ENABLE(ebcdic, AS_HELP_STRING([--enable-ebcdic], - [assume EBCDIC coding rather than ASCII; incompatible with --enable-utf; use only in (uncommon) EBCDIC environments; it implies --enable-rebuild-chartables]), + [assume EBCDIC coding rather than ASCII; incompatible with --enable-unicode; use only in (uncommon) EBCDIC environments; it implies --enable-rebuild-chartables]), , enable_ebcdic=no) # Handle --enable-ebcdic-nl25 @@ -242,6 +333,42 @@ AC_ARG_ENABLE(ebcdic-nl25, [set EBCDIC code for NL to 0x25 instead of 0x15; it implies --enable-ebcdic]), , enable_ebcdic_nl25=no) +# Handle --enable-pcre2grep-libz +AC_ARG_ENABLE(pcre2grep-libz, + AS_HELP_STRING([--enable-pcre2grep-libz], + [link pcre2grep with libz to handle .gz files]), + , enable_pcre2grep_libz=no) + +# Handle --enable-pcre2grep-libbz2 +AC_ARG_ENABLE(pcre2grep-libbz2, + AS_HELP_STRING([--enable-pcre2grep-libbz2], + [link pcre2grep with libbz2 to handle .bz2 files]), + , enable_pcre2grep_libbz2=no) + +# Handle --with-pcre2grep-bufsize=N +AC_ARG_WITH(pcre2grep-bufsize, + AS_HELP_STRING([--with-pcre2grep-bufsize=N], + [pcre2grep initial buffer size (default=20480, minimum=8192)]), + , with_pcre2grep_bufsize=20480) + +# Handle --with-pcre2grep-max-bufsize=N +AC_ARG_WITH(pcre2grep-max-bufsize, + AS_HELP_STRING([--with-pcre2grep-max-bufsize=N], + [pcre2grep maximum buffer size (default=1048576, minimum=8192)]), + , with_pcre2grep_max_bufsize=1048576) + +# Handle --enable-pcre2test-libedit +AC_ARG_ENABLE(pcre2test-libedit, + AS_HELP_STRING([--enable-pcre2test-libedit], + [link pcre2test with libedit]), + , enable_pcre2test_libedit=no) + +# Handle --enable-pcre2test-libreadline +AC_ARG_ENABLE(pcre2test-libreadline, + AS_HELP_STRING([--enable-pcre2test-libreadline], + [link pcre2test with libreadline]), + , enable_pcre2test_libreadline=no) + # Handle --with-link-size=N AC_ARG_WITH(link-size, AS_HELP_STRING([--with-link-size=N], @@ -301,6 +428,18 @@ AC_ARG_ENABLE(coverage, [enable code coverage reports using gcov]), , enable_coverage=no) +# Handle --enable-fuzz-support +AC_ARG_ENABLE(fuzz_support, + AS_HELP_STRING([--enable-fuzz-support], + [enable fuzzer support]), + , enable_fuzz_support=no) + +# Handle --enable-diff-fuzz-support +AC_ARG_ENABLE(diff_fuzz_support, + AS_HELP_STRING([--enable-diff-fuzz-support], + [enable differential fuzzer support]), + , enable_diff_fuzz_support=no) + # Handle --disable-stack-for-recursion # This option became obsolete at release 10.30. AC_ARG_ENABLE(stack-for-recursion,, @@ -421,7 +560,7 @@ HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make sure both macros are undefined; an emulation function will then be used. */]) # Checks for header files. -AC_CHECK_HEADERS(limits.h sys/types.h sys/stat.h dirent.h) +AC_CHECK_HEADERS(assert.h limits.h sys/types.h sys/stat.h dirent.h) AC_CHECK_HEADERS([windows.h], [HAVE_WINDOWS_H=1]) AC_CHECK_HEADERS([sys/wait.h], [HAVE_SYS_WAIT_H=1]) @@ -429,10 +568,30 @@ AC_CHECK_HEADERS([sys/wait.h], [HAVE_SYS_WAIT_H=1]) AM_CONDITIONAL(WITH_PCRE2_8, test "x$enable_pcre2_8" = "xyes") AM_CONDITIONAL(WITH_PCRE2_16, test "x$enable_pcre2_16" = "xyes") AM_CONDITIONAL(WITH_PCRE2_32, test "x$enable_pcre2_32" = "xyes") -AM_CONDITIONAL(WITH_DEBUG, test "x$enable_debug" = "xyes") AM_CONDITIONAL(WITH_REBUILD_CHARTABLES, test "x$enable_rebuild_chartables" = "xyes") +AM_CONDITIONAL(WITH_JIT, test "x$enable_jit" = "xyes") AM_CONDITIONAL(WITH_UNICODE, test "x$enable_unicode" = "xyes") AM_CONDITIONAL(WITH_VALGRIND, test "x$enable_valgrind" = "xyes") +AM_CONDITIONAL(WITH_FUZZ_SUPPORT, test "x$enable_fuzz_support" = "xyes") +AM_CONDITIONAL(WITH_DIFF_FUZZ_SUPPORT, test "x$enable_diff_fuzz_support" = "xyes") + +if test "$enable_fuzz_support" = "yes" -a "$enable_pcre2_8" = "no"; then + echo "** ERROR: Fuzzer support requires the 8-bit library" + exit 1 +fi + +if test "$enable_diff_fuzz_support" = "yes"; then + if test "$enable_fuzz_support" = "no"; then + echo "** ERROR: Differential fuzzing support requires fuzzing support" + exit 1 + fi + if test "$enable_jit" = "no"; then + echo "** ERROR: Differential fuzzing support requires Just-in-Time compilation support" + exit 1 + fi + AC_DEFINE([SUPPORT_DIFF_FUZZ], [], [ + Define to any value to enable differential fuzzing support.]) +fi # Checks for typedefs, structures, and compiler characteristics. @@ -456,6 +615,87 @@ realpath(".", buffer); ], AC_MSG_RESULT([no])) +# Check for the availability of libz (aka zlib) + +AC_CHECK_HEADERS([zlib.h], [HAVE_ZLIB_H=1]) +AC_CHECK_LIB([z], [gzopen], [HAVE_LIBZ=1]) + +# Check for the availability of libbz2. Originally we just used AC_CHECK_LIB, +# as for libz. However, this had the following problem, diagnosed and fixed by +# a user: +# +# - libbz2 uses the Pascal calling convention (WINAPI) for the functions +# under Win32. +# - The standard autoconf AC_CHECK_LIB fails to include "bzlib.h", +# therefore missing the function definition. +# - The compiler thus generates a "C" signature for the test function. +# - The linker fails to find the "C" function. +# - PCRE2 fails to configure if asked to do so against libbz2. +# +# Solution: +# +# - Replace the AC_CHECK_LIB test with a custom test. + +AC_CHECK_HEADERS([bzlib.h], [HAVE_BZLIB_H=1]) +# Original test +# AC_CHECK_LIB([bz2], [BZ2_bzopen], [HAVE_LIBBZ2=1]) +# +# Custom test follows + +AC_MSG_CHECKING([for libbz2]) +OLD_LIBS="$LIBS" +LIBS="$LIBS -lbz2" +AC_LINK_IFELSE([AC_LANG_PROGRAM([[ +#ifdef HAVE_BZLIB_H +#include +#endif]], +[[return (int)BZ2_bzopen("conftest", "rb");]])], +[AC_MSG_RESULT([yes]);HAVE_LIBBZ2=1; break;], +AC_MSG_RESULT([no])) +LIBS="$OLD_LIBS" + +# Check for the availabiity of libreadline + +if test "$enable_pcre2test_libreadline" = "yes"; then + AC_CHECK_HEADERS([readline/readline.h], [HAVE_READLINE_H=1]) + AC_CHECK_HEADERS([readline/history.h], [HAVE_HISTORY_H=1]) + AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-lreadline"], + [unset ac_cv_lib_readline_readline; + AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-ltinfo"], + [unset ac_cv_lib_readline_readline; + AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-lcurses"], + [unset ac_cv_lib_readline_readline; + AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-lncurses"], + [unset ac_cv_lib_readline_readline; + AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-lncursesw"], + [unset ac_cv_lib_readline_readline; + AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-ltermcap"], + [LIBREADLINE=""], + [-ltermcap])], + [-lncursesw])], + [-lncurses])], + [-lcurses])], + [-ltinfo])]) + AC_SUBST(LIBREADLINE) + if test -n "$LIBREADLINE"; then + if test "$LIBREADLINE" != "-lreadline"; then + echo "-lreadline needs $LIBREADLINE" + LIBREADLINE="-lreadline $LIBREADLINE" + fi + fi +fi + +# Check for the availability of libedit. Different distributions put its +# headers in different places. Try to cover the most common ones. + +if test "$enable_pcre2test_libedit" = "yes"; then + AC_CHECK_HEADERS([editline/readline.h edit/readline/readline.h readline.h], [ + HAVE_LIBEDIT_HEADER=1 + break + ]) + AC_CHECK_LIB([edit], [readline], [LIBEDIT="-ledit"]) +fi + PCRE2_STATIC_CFLAG="" if test "x$enable_shared" = "xno" ; then AC_DEFINE([PCRE2_STATIC], [1], [ @@ -500,6 +740,52 @@ else enable_percent_zt=auto fi +# Unless running under Windows, JIT support requires pthreads. + +if test "$enable_jit" = "yes"; then + if test "$HAVE_WINDOWS_H" != "1"; then + AX_PTHREAD([], [AC_MSG_ERROR([JIT support requires pthreads])]) + CC="$PTHREAD_CC" + CFLAGS="$PTHREAD_CFLAGS $CFLAGS" + LIBS="$PTHREAD_LIBS $LIBS" + fi + AC_DEFINE([SUPPORT_JIT], [], [ + Define to any value to enable support for Just-In-Time compiling.]) +else + enable_pcre2grep_jit="no" +fi + +if test "$enable_jit_sealloc" = "yes"; then + AC_DEFINE([SLJIT_PROT_EXECUTABLE_ALLOCATOR], [1], [ + Define to any non-zero number to enable support for SELinux + compatible executable memory allocator in JIT. Note that this + will have no effect unless SUPPORT_JIT is also defined.]) +fi + +if test "$enable_pcre2grep_jit" = "yes"; then + AC_DEFINE([SUPPORT_PCRE2GREP_JIT], [], [ + Define to any value to enable JIT support in pcre2grep. Note that this will + have no effect unless SUPPORT_JIT is also defined.]) +fi + +if test "$enable_pcre2grep_callout" = "yes"; then + if test "$enable_pcre2grep_callout_fork" = "yes"; then + if test "$HAVE_WINDOWS_H" != "1"; then + if test "$HAVE_SYS_WAIT_H" != "1"; then + AC_MSG_ERROR([Callout script support needs sys/wait.h.]) + fi + fi + AC_DEFINE([SUPPORT_PCRE2GREP_CALLOUT_FORK], [], [ + Define to any value to enable fork support in pcre2grep callout scripts. + This will have no effect unless SUPPORT_PCRE2GREP_CALLOUT is also + defined.]) + fi + AC_DEFINE([SUPPORT_PCRE2GREP_CALLOUT], [], [ + Define to any value to enable callout script support in pcre2grep.]) +else + enable_pcre2grep_callout_fork="no" +fi + if test "$enable_unicode" = "yes"; then AC_DEFINE([SUPPORT_UNICODE], [], [ Define to any value to enable support for Unicode and UTF encoding. @@ -508,6 +794,57 @@ if test "$enable_unicode" = "yes"; then code *or* ASCII/Unicode, but not both at once.]) fi +if test "$enable_pcre2grep_libz" = "yes"; then + AC_DEFINE([SUPPORT_LIBZ], [], [ + Define to any value to allow pcre2grep to be linked with libz, so that it is + able to handle .gz files.]) +fi + +if test "$enable_pcre2grep_libbz2" = "yes"; then + AC_DEFINE([SUPPORT_LIBBZ2], [], [ + Define to any value to allow pcre2grep to be linked with libbz2, so that it + is able to handle .bz2 files.]) +fi + +if test $with_pcre2grep_bufsize -lt 8192 ; then + AC_MSG_WARN([$with_pcre2grep_bufsize is too small for --with-pcre2grep-bufsize; using 8192]) + with_pcre2grep_bufsize="8192" +else + if test $? -gt 1 ; then + AC_MSG_ERROR([Bad value for --with-pcre2grep-bufsize]) + fi +fi + +if test $with_pcre2grep_max_bufsize -lt $with_pcre2grep_bufsize ; then + with_pcre2grep_max_bufsize="$with_pcre2grep_bufsize" +else + if test $? -gt 1 ; then + AC_MSG_ERROR([Bad value for --with-pcre2grep-max-bufsize]) + fi +fi + +AC_DEFINE_UNQUOTED([PCRE2GREP_BUFSIZE], [$with_pcre2grep_bufsize], [ + The value of PCRE2GREP_BUFSIZE is the starting size of the buffer used by + pcre2grep to hold parts of the file it is searching. The buffer will be + expanded up to PCRE2GREP_MAX_BUFSIZE if necessary, for files containing very + long lines. The actual amount of memory used by pcre2grep is three times this + number, because it allows for the buffering of "before" and "after" lines.]) + +AC_DEFINE_UNQUOTED([PCRE2GREP_MAX_BUFSIZE], [$with_pcre2grep_max_bufsize], [ + The value of PCRE2GREP_MAX_BUFSIZE specifies the maximum size of the buffer + used by pcre2grep to hold parts of the file it is searching. The actual + amount of memory used by pcre2grep is three times this number, because it + allows for the buffering of "before" and "after" lines.]) + +if test "$enable_pcre2test_libedit" = "yes"; then + AC_DEFINE([SUPPORT_LIBEDIT], [], [ + Define to any value to allow pcre2test to be linked with libedit.]) + LIBREADLINE="$LIBEDIT" +elif test "$enable_pcre2test_libreadline" = "yes"; then + AC_DEFINE([SUPPORT_LIBREADLINE], [], [ + Define to any value to allow pcre2test to be linked with libreadline.]) +fi + AC_DEFINE_UNQUOTED([NEWLINE_DEFAULT], [$ac_pcre2_newline_value], [ The value of NEWLINE_DEFAULT determines the default newline character sequence. PCRE2 client programs can override this by selecting other values @@ -584,7 +921,7 @@ AC_DEFINE_UNQUOTED([HEAP_LIMIT], [$with_heap_limit], [ a pattern. It applies to both pcre2_match() and pcre2_dfa_match(). It does not apply to JIT matching. The value is in kibibytes (units of 1024 bytes).]) -AC_DEFINE([MAX_NAME_SIZE], [32], [ +AC_DEFINE([MAX_NAME_SIZE], [128], [ This limit is parameterized just in case anybody ever wants to change it. Care must be taken if it is increased, because it guards against integer overflow caused by enormously large patterns.]) @@ -666,6 +1003,68 @@ AC_SUBST(EXTRA_LIBPCRE2_POSIX_LDFLAGS) DISTCHECK_CONFIGURE_FLAGS="CFLAGS='' CXXFLAGS='' --enable-pcre2-16 --enable-pcre2-32 --enable-jit" AC_SUBST(DISTCHECK_CONFIGURE_FLAGS) +# Check that, if --enable-pcre2grep-libz or --enable-pcre2grep-libbz2 is +# specified, the relevant library is available. + +if test "$enable_pcre2grep_libz" = "yes"; then + if test "$HAVE_ZLIB_H" != "1"; then + echo "** Cannot --enable-pcre2grep-libz because zlib.h was not found" + exit 1 + fi + if test "$HAVE_LIBZ" != "1"; then + echo "** Cannot --enable-pcre2grep-libz because libz was not found" + exit 1 + fi + LIBZ="-lz" +fi +AC_SUBST(LIBZ) + +if test "$enable_pcre2grep_libbz2" = "yes"; then + if test "$HAVE_BZLIB_H" != "1"; then + echo "** Cannot --enable-pcre2grep-libbz2 because bzlib.h was not found" + exit 1 + fi + if test "$HAVE_LIBBZ2" != "1"; then + echo "** Cannot --enable-pcre2grep-libbz2 because libbz2 was not found" + exit 1 + fi + LIBBZ2="-lbz2" +fi +AC_SUBST(LIBBZ2) + +# Similarly for --enable-pcre2test-readline + +if test "$enable_pcre2test_libedit" = "yes"; then + if test "$enable_pcre2test_libreadline" = "yes"; then + echo "** Cannot use both --enable-pcre2test-libedit and --enable-pcre2test-readline" + exit 1 + fi + if test -z "$HAVE_LIBEDIT_HEADER"; then + echo "** Cannot --enable-pcre2test-libedit because neither editline/readline.h," + echo "** edit/readline/readline.h nor a compatible header was found." + exit 1 + fi + if test -z "$LIBEDIT"; then + echo "** Cannot --enable-pcre2test-libedit because libedit library was not found." + exit 1 + fi +fi + +if test "$enable_pcre2test_libreadline" = "yes"; then + if test "$HAVE_READLINE_H" != "1"; then + echo "** Cannot --enable-pcre2test-readline because readline/readline.h was not found." + exit 1 + fi + if test "$HAVE_HISTORY_H" != "1"; then + echo "** Cannot --enable-pcre2test-readline because readline/history.h was not found." + exit 1 + fi + if test -z "$LIBREADLINE"; then + echo "** Cannot --enable-pcre2test-readline because readline library was not found." + exit 1 + fi +fi + # Handle valgrind support if test "$enable_valgrind" = "yes"; then @@ -788,6 +1187,8 @@ $PACKAGE-$VERSION configuration summary: Build 16-bit pcre2 library ......... : ${enable_pcre2_16} Build 32-bit pcre2 library ......... : ${enable_pcre2_32} Include debugging code ............. : ${enable_debug} + Enable JIT compiling support ....... : ${enable_jit} + Use SELinux allocator in JIT ....... : ${enable_jit_sealloc} Enable Unicode support ............. : ${enable_unicode} Newline char/sequence .............. : ${enable_newline} \R matches only ANYCRLF ............ : ${enable_bsr_anycrlf} @@ -803,8 +1204,19 @@ $PACKAGE-$VERSION configuration summary: Match depth limit .................. : ${with_match_limit_depth} Build shared libs .................. : ${enable_shared} Build static libs .................. : ${enable_static} + Use JIT in pcre2grep ............... : ${enable_pcre2grep_jit} + Enable callouts in pcre2grep ....... : ${enable_pcre2grep_callout} + Enable fork in pcre2grep callouts .. : ${enable_pcre2grep_callout_fork} + Initial buffer size for pcre2grep .. : ${with_pcre2grep_bufsize} + Maximum buffer size for pcre2grep .. : ${with_pcre2grep_max_bufsize} + Link pcre2grep with libz ........... : ${enable_pcre2grep_libz} + Link pcre2grep with libbz2 ......... : ${enable_pcre2grep_libbz2} + Link pcre2test with libedit ........ : ${enable_pcre2test_libedit} + Link pcre2test with libreadline .... : ${enable_pcre2test_libreadline} Valgrind support ................... : ${enable_valgrind} Code coverage ...................... : ${enable_coverage} + Fuzzer support ..................... : ${enable_fuzz_support} + Differential fuzzer support ........ : ${enable_diff_fuzz_support} Use %zu and %td .................... : ${enable_percent_zt} EOF diff --git a/libpcre/ltmain.sh b/libpcre/ltmain.sh old mode 100644 new mode 100755 index 147d758ab..977e5237b --- a/libpcre/ltmain.sh +++ b/libpcre/ltmain.sh @@ -1,12 +1,12 @@ -#! /bin/sh +#! /usr/bin/env sh ## DO NOT EDIT - This file generated from ./build-aux/ltmain.in -## by inline-source v2014-01-03.01 +## by inline-source v2019-02-19.15 -# libtool (GNU libtool) 2.4.6 +# libtool (GNU libtool) 2.4.7 # Provide generalized library-building support services. # Written by Gordon Matzigkeit , 1996 -# Copyright (C) 1996-2015 Free Software Foundation, Inc. +# Copyright (C) 1996-2019, 2021-2022 Free Software Foundation, Inc. # This is free software; see the source for copying conditions. There is NO # warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. @@ -31,8 +31,8 @@ PROGRAM=libtool PACKAGE=libtool -VERSION="2.4.6 Debian-2.4.6-0.1" -package_revision=2.4.6 +VERSION="2.4.7 Debian-2.4.7-7build1" +package_revision=2.4.7 ## ------ ## @@ -64,34 +64,25 @@ package_revision=2.4.6 # libraries, which are installed to $pkgauxdir. # Set a version string for this script. -scriptversion=2015-01-20.17; # UTC +scriptversion=2019-02-19.15; # UTC # General shell script boiler plate, and helper functions. # Written by Gary V. Vaughan, 2004 -# Copyright (C) 2004-2015 Free Software Foundation, Inc. -# This is free software; see the source for copying conditions. There is NO -# warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# This is free software. There is NO warranty; not even for +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Copyright (C) 2004-2019, 2021 Bootstrap Authors +# +# This file is dual licensed under the terms of the MIT license +# , and GPL version 2 or later +# . You must apply one of +# these licenses when using or redistributing this software or any of +# the files within it. See the URLs above, or the file `LICENSE` +# included in the Bootstrap distribution for the full license texts. -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. - -# As a special exception to the GNU General Public License, if you distribute -# this file as part of a program or library that is built using GNU Libtool, -# you may include this file under the same distribution terms that you use -# for the rest of that program. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNES FOR A PARTICULAR PURPOSE. See the GNU -# General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - -# Please report bugs or propose patches to gary@gnu.org. +# Please report bugs or propose patches to: +# ## ------ ## @@ -139,9 +130,12 @@ do _G_safe_locale=\"$_G_var=C; \$_G_safe_locale\" fi" done - -# CDPATH. -(unset CDPATH) >/dev/null 2>&1 && unset CDPATH +# These NLS vars are set unconditionally (bootstrap issue #24). Unset those +# in case the environment reset is needed later and the $save_* variant is not +# defined (see the code above). +LC_ALL=C +LANGUAGE=C +export LANGUAGE LC_ALL # Make sure IFS has a sensible default sp=' ' @@ -159,6 +153,26 @@ if test "${PATH_SEPARATOR+set}" != set; then fi +# func_unset VAR +# -------------- +# Portably unset VAR. +# In some shells, an 'unset VAR' statement leaves a non-zero return +# status if VAR is already unset, which might be problematic if the +# statement is used at the end of a function (thus poisoning its return +# value) or when 'set -e' is active (causing even a spurious abort of +# the script in this case). +func_unset () +{ + { eval $1=; (eval unset $1) >/dev/null 2>&1 && eval unset $1 || : ; } +} + + +# Make sure CDPATH doesn't cause `cd` commands to output the target dir. +func_unset CDPATH + +# Make sure ${,E,F}GREP behave sanely. +func_unset GREP_OPTIONS + ## ------------------------- ## ## Locate command utilities. ## @@ -259,7 +273,7 @@ test -z "$SED" && { rm -f conftest.in conftest.tmp conftest.nl conftest.out } - func_path_progs "sed gsed" func_check_prog_sed $PATH:/usr/xpg4/bin + func_path_progs "sed gsed" func_check_prog_sed "$PATH:/usr/xpg4/bin" rm -f conftest.sed SED=$func_path_progs_result } @@ -295,7 +309,7 @@ test -z "$GREP" && { rm -f conftest.in conftest.tmp conftest.nl conftest.out } - func_path_progs "grep ggrep" func_check_prog_grep $PATH:/usr/xpg4/bin + func_path_progs "grep ggrep" func_check_prog_grep "$PATH:/usr/xpg4/bin" GREP=$func_path_progs_result } @@ -360,6 +374,35 @@ sed_double_backslash="\ s/\\([^$_G_bs]\\)$_G_bs2$_G_dollar/\\1$_G_bs2$_G_bs$_G_dollar/g s/\n//g" +# require_check_ifs_backslash +# --------------------------- +# Check if we can use backslash as IFS='\' separator, and set +# $check_ifs_backshlash_broken to ':' or 'false'. +require_check_ifs_backslash=func_require_check_ifs_backslash +func_require_check_ifs_backslash () +{ + _G_save_IFS=$IFS + IFS='\' + _G_check_ifs_backshlash='a\\b' + for _G_i in $_G_check_ifs_backshlash + do + case $_G_i in + a) + check_ifs_backshlash_broken=false + ;; + '') + break + ;; + *) + check_ifs_backshlash_broken=: + break + ;; + esac + done + IFS=$_G_save_IFS + require_check_ifs_backslash=: +} + ## ----------------- ## ## Global variables. ## @@ -387,7 +430,7 @@ EXIT_SKIP=77 # $? = 77 is used to indicate a skipped test to automake. # putting '$debug_cmd' at the start of all your functions, you can get # bash to show function call trace with: # -# debug_cmd='eval echo "${FUNCNAME[0]} $*" >&2' bash your-script-name +# debug_cmd='echo "${FUNCNAME[0]} $*" >&2' bash your-script-name debug_cmd=${debug_cmd-":"} exit_cmd=: @@ -529,27 +572,15 @@ func_require_term_colors () # --------------------- # Append VALUE onto the existing contents of VAR. - # We should try to minimise forks, especially on Windows where they are - # unreasonably slow, so skip the feature probes when bash or zsh are - # being used: - if test set = "${BASH_VERSION+set}${ZSH_VERSION+set}"; then - : ${_G_HAVE_ARITH_OP="yes"} - : ${_G_HAVE_XSI_OPS="yes"} - # The += operator was introduced in bash 3.1 - case $BASH_VERSION in - [12].* | 3.0 | 3.0*) ;; - *) - : ${_G_HAVE_PLUSEQ_OP="yes"} - ;; - esac - fi - # _G_HAVE_PLUSEQ_OP # Can be empty, in which case the shell is probed, "yes" if += is # useable or anything else if it does not work. - test -z "$_G_HAVE_PLUSEQ_OP" \ - && (eval 'x=a; x+=" b"; test "a b" = "$x"') 2>/dev/null \ - && _G_HAVE_PLUSEQ_OP=yes + if test -z "$_G_HAVE_PLUSEQ_OP" && \ + __PLUSEQ_TEST="a" && \ + __PLUSEQ_TEST+=" b" 2>/dev/null && \ + test "a b" = "$__PLUSEQ_TEST"; then + _G_HAVE_PLUSEQ_OP=yes + fi if test yes = "$_G_HAVE_PLUSEQ_OP" then @@ -580,16 +611,16 @@ if test yes = "$_G_HAVE_PLUSEQ_OP"; then { $debug_cmd - func_quote_for_eval "$2" - eval "$1+=\\ \$func_quote_for_eval_result" + func_quote_arg pretty "$2" + eval "$1+=\\ \$func_quote_arg_result" }' else func_append_quoted () { $debug_cmd - func_quote_for_eval "$2" - eval "$1=\$$1\\ \$func_quote_for_eval_result" + func_quote_arg pretty "$2" + eval "$1=\$$1\\ \$func_quote_arg_result" } fi @@ -1091,85 +1122,203 @@ func_relative_path () } -# func_quote_for_eval ARG... -# -------------------------- -# Aesthetically quote ARGs to be evaled later. -# This function returns two values: -# i) func_quote_for_eval_result -# double-quoted, suitable for a subsequent eval -# ii) func_quote_for_eval_unquoted_result -# has all characters that are still active within double -# quotes backslashified. -func_quote_for_eval () +# func_quote_portable EVAL ARG +# ---------------------------- +# Internal function to portably implement func_quote_arg. Note that we still +# keep attention to performance here so we as much as possible try to avoid +# calling sed binary (so far O(N) complexity as long as func_append is O(1)). +func_quote_portable () { $debug_cmd - func_quote_for_eval_unquoted_result= - func_quote_for_eval_result= - while test 0 -lt $#; do - case $1 in + $require_check_ifs_backslash + + func_quote_portable_result=$2 + + # one-time-loop (easy break) + while true + do + if $1; then + func_quote_portable_result=`$ECHO "$2" | $SED \ + -e "$sed_double_quote_subst" -e "$sed_double_backslash"` + break + fi + + # Quote for eval. + case $func_quote_portable_result in *[\\\`\"\$]*) - _G_unquoted_arg=`printf '%s\n' "$1" |$SED "$sed_quote_subst"` ;; - *) - _G_unquoted_arg=$1 ;; - esac - if test -n "$func_quote_for_eval_unquoted_result"; then - func_append func_quote_for_eval_unquoted_result " $_G_unquoted_arg" - else - func_append func_quote_for_eval_unquoted_result "$_G_unquoted_arg" - fi + # Fallback to sed for $func_check_bs_ifs_broken=:, or when the string + # contains the shell wildcard characters. + case $check_ifs_backshlash_broken$func_quote_portable_result in + :*|*[\[\*\?]*) + func_quote_portable_result=`$ECHO "$func_quote_portable_result" \ + | $SED "$sed_quote_subst"` + break + ;; + esac - case $_G_unquoted_arg in - # Double-quote args containing shell metacharacters to delay - # word splitting, command substitution and variable expansion - # for a subsequent eval. - # Many Bourne shells cannot handle close brackets correctly - # in scan sets, so we specify it separately. - *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") - _G_quoted_arg=\"$_G_unquoted_arg\" + func_quote_portable_old_IFS=$IFS + for _G_char in '\' '`' '"' '$' + do + # STATE($1) PREV($2) SEPARATOR($3) + set start "" "" + func_quote_portable_result=dummy"$_G_char$func_quote_portable_result$_G_char"dummy + IFS=$_G_char + for _G_part in $func_quote_portable_result + do + case $1 in + quote) + func_append func_quote_portable_result "$3$2" + set quote "$_G_part" "\\$_G_char" + ;; + start) + set first "" "" + func_quote_portable_result= + ;; + first) + set quote "$_G_part" "" + ;; + esac + done + done + IFS=$func_quote_portable_old_IFS ;; - *) - _G_quoted_arg=$_G_unquoted_arg - ;; + *) ;; esac - - if test -n "$func_quote_for_eval_result"; then - func_append func_quote_for_eval_result " $_G_quoted_arg" - else - func_append func_quote_for_eval_result "$_G_quoted_arg" - fi - shift + break done + + func_quote_portable_unquoted_result=$func_quote_portable_result + case $func_quote_portable_result in + # double-quote args containing shell metacharacters to delay + # word splitting, command substitution and variable expansion + # for a subsequent eval. + # many bourne shells cannot handle close brackets correctly + # in scan sets, so we specify it separately. + *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") + func_quote_portable_result=\"$func_quote_portable_result\" + ;; + esac } -# func_quote_for_expand ARG -# ------------------------- -# Aesthetically quote ARG to be evaled later; same as above, -# but do not quote variable references. -func_quote_for_expand () +# func_quotefast_eval ARG +# ----------------------- +# Quote one ARG (internal). This is equivalent to 'func_quote_arg eval ARG', +# but optimized for speed. Result is stored in $func_quotefast_eval. +if test xyes = `(x=; printf -v x %q yes; echo x"$x") 2>/dev/null`; then + printf -v _GL_test_printf_tilde %q '~' + if test '\~' = "$_GL_test_printf_tilde"; then + func_quotefast_eval () + { + printf -v func_quotefast_eval_result %q "$1" + } + else + # Broken older Bash implementations. Make those faster too if possible. + func_quotefast_eval () + { + case $1 in + '~'*) + func_quote_portable false "$1" + func_quotefast_eval_result=$func_quote_portable_result + ;; + *) + printf -v func_quotefast_eval_result %q "$1" + ;; + esac + } + fi +else + func_quotefast_eval () + { + func_quote_portable false "$1" + func_quotefast_eval_result=$func_quote_portable_result + } +fi + + +# func_quote_arg MODEs ARG +# ------------------------ +# Quote one ARG to be evaled later. MODEs argument may contain zero or more +# specifiers listed below separated by ',' character. This function returns two +# values: +# i) func_quote_arg_result +# double-quoted (when needed), suitable for a subsequent eval +# ii) func_quote_arg_unquoted_result +# has all characters that are still active within double +# quotes backslashified. Available only if 'unquoted' is specified. +# +# Available modes: +# ---------------- +# 'eval' (default) +# - escape shell special characters +# 'expand' +# - the same as 'eval'; but do not quote variable references +# 'pretty' +# - request aesthetic output, i.e. '"a b"' instead of 'a\ b'. This might +# be used later in func_quote to get output like: 'echo "a b"' instead +# of 'echo a\ b'. This is slower than default on some shells. +# 'unquoted' +# - produce also $func_quote_arg_unquoted_result which does not contain +# wrapping double-quotes. +# +# Examples for 'func_quote_arg pretty,unquoted string': +# +# string | *_result | *_unquoted_result +# ------------+-----------------------+------------------- +# " | \" | \" +# a b | "a b" | a b +# "a b" | "\"a b\"" | \"a b\" +# * | "*" | * +# z="${x-$y}" | "z=\"\${x-\$y}\"" | z=\"\${x-\$y}\" +# +# Examples for 'func_quote_arg pretty,unquoted,expand string': +# +# string | *_result | *_unquoted_result +# --------------+---------------------+-------------------- +# z="${x-$y}" | "z=\"${x-$y}\"" | z=\"${x-$y}\" +func_quote_arg () { - $debug_cmd - - case $1 in - *[\\\`\"]*) - _G_arg=`$ECHO "$1" | $SED \ - -e "$sed_double_quote_subst" -e "$sed_double_backslash"` ;; - *) - _G_arg=$1 ;; - esac - - case $_G_arg in - # Double-quote args containing shell metacharacters to delay - # word splitting and command substitution for a subsequent eval. - # Many Bourne shells cannot handle close brackets correctly - # in scan sets, so we specify it separately. - *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") - _G_arg=\"$_G_arg\" + _G_quote_expand=false + case ,$1, in + *,expand,*) + _G_quote_expand=: ;; esac - func_quote_for_expand_result=$_G_arg + case ,$1, in + *,pretty,*|*,expand,*|*,unquoted,*) + func_quote_portable $_G_quote_expand "$2" + func_quote_arg_result=$func_quote_portable_result + func_quote_arg_unquoted_result=$func_quote_portable_unquoted_result + ;; + *) + # Faster quote-for-eval for some shells. + func_quotefast_eval "$2" + func_quote_arg_result=$func_quotefast_eval_result + ;; + esac +} + + +# func_quote MODEs ARGs... +# ------------------------ +# Quote all ARGs to be evaled later and join them into single command. See +# func_quote_arg's description for more info. +func_quote () +{ + $debug_cmd + _G_func_quote_mode=$1 ; shift + func_quote_result= + while test 0 -lt $#; do + func_quote_arg "$_G_func_quote_mode" "$1" + if test -n "$func_quote_result"; then + func_append func_quote_result " $func_quote_arg_result" + else + func_append func_quote_result "$func_quote_arg_result" + fi + shift + done } @@ -1215,8 +1364,8 @@ func_show_eval () _G_cmd=$1 _G_fail_exp=${2-':'} - func_quote_for_expand "$_G_cmd" - eval "func_notquiet $func_quote_for_expand_result" + func_quote_arg pretty,expand "$_G_cmd" + eval "func_notquiet $func_quote_arg_result" $opt_dry_run || { eval "$_G_cmd" @@ -1241,8 +1390,8 @@ func_show_eval_locale () _G_fail_exp=${2-':'} $opt_quiet || { - func_quote_for_expand "$_G_cmd" - eval "func_echo $func_quote_for_expand_result" + func_quote_arg expand,pretty "$_G_cmd" + eval "func_echo $func_quote_arg_result" } $opt_dry_run || { @@ -1369,30 +1518,26 @@ func_lt_ver () # End: #! /bin/sh -# Set a version string for this script. -scriptversion=2014-01-07.03; # UTC - # A portable, pluggable option parser for Bourne shell. # Written by Gary V. Vaughan, 2010 -# Copyright (C) 2010-2015 Free Software Foundation, Inc. -# This is free software; see the source for copying conditions. There is NO -# warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# This is free software. There is NO warranty; not even for +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Copyright (C) 2010-2019, 2021 Bootstrap Authors +# +# This file is dual licensed under the terms of the MIT license +# , and GPL version 2 or later +# . You must apply one of +# these licenses when using or redistributing this software or any of +# the files within it. See the URLs above, or the file `LICENSE` +# included in the Bootstrap distribution for the full license texts. -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. +# Please report bugs or propose patches to: +# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - -# Please report bugs or propose patches to gary@gnu.org. +# Set a version string for this script. +scriptversion=2019-02-19.15; # UTC ## ------ ## @@ -1415,7 +1560,7 @@ scriptversion=2014-01-07.03; # UTC # # In order for the '--version' option to work, you will need to have a # suitably formatted comment like the one at the top of this file -# starting with '# Written by ' and ending with '# warranty; '. +# starting with '# Written by ' and ending with '# Copyright'. # # For '-h' and '--help' to work, you will also need a one line # description of your script's purpose in a comment directly above the @@ -1427,7 +1572,7 @@ scriptversion=2014-01-07.03; # UTC # to display verbose messages only when your user has specified # '--verbose'. # -# After sourcing this file, you can plug processing for additional +# After sourcing this file, you can plug in processing for additional # options by amending the variables from the 'Configuration' section # below, and following the instructions in the 'Option parsing' # section further down. @@ -1476,8 +1621,8 @@ fatal_help="Try '\$progname --help' for more information." ## ------------------------- ## # This section contains functions for adding, removing, and running hooks -# to the main code. A hook is just a named list of of function, that can -# be run in order later on. +# in the main code. A hook is just a list of function names that can be +# run in order later on. # func_hookable FUNC_NAME # ----------------------- @@ -1510,7 +1655,8 @@ func_add_hook () # func_remove_hook FUNC_NAME HOOK_FUNC # ------------------------------------ -# Remove HOOK_FUNC from the list of functions called by FUNC_NAME. +# Remove HOOK_FUNC from the list of hook functions to be called by +# FUNC_NAME. func_remove_hook () { $debug_cmd @@ -1519,10 +1665,28 @@ func_remove_hook () } +# func_propagate_result FUNC_NAME_A FUNC_NAME_B +# --------------------------------------------- +# If the *_result variable of FUNC_NAME_A _is set_, assign its value to +# *_result variable of FUNC_NAME_B. +func_propagate_result () +{ + $debug_cmd + + func_propagate_result_result=: + if eval "test \"\${${1}_result+set}\" = set" + then + eval "${2}_result=\$${1}_result" + else + func_propagate_result_result=false + fi +} + + # func_run_hooks FUNC_NAME [ARG]... # --------------------------------- # Run all hook functions registered to FUNC_NAME. -# It is assumed that the list of hook functions contains nothing more +# It's assumed that the list of hook functions contains nothing more # than a whitespace-delimited list of legal shell function names, and # no effort is wasted trying to catch shell meta-characters or preserve # whitespace. @@ -1530,24 +1694,23 @@ func_run_hooks () { $debug_cmd + _G_rc_run_hooks=false + case " $hookable_fns " in *" $1 "*) ;; - *) func_fatal_error "'$1' does not support hook funcions.n" ;; + *) func_fatal_error "'$1' does not support hook functions." ;; esac eval _G_hook_fns=\$$1_hooks; shift for _G_hook in $_G_hook_fns; do - eval $_G_hook '"$@"' - - # store returned options list back into positional - # parameters for next 'cmd' execution. - eval _G_hook_result=\$${_G_hook}_result - eval set dummy "$_G_hook_result"; shift + func_unset "${_G_hook}_result" + eval $_G_hook '${1+"$@"}' + func_propagate_result $_G_hook func_run_hooks + if $func_propagate_result_result; then + eval set dummy "$func_run_hooks_result"; shift + fi done - - func_quote_for_eval ${1+"$@"} - func_run_hooks_result=$func_quote_for_eval_result } @@ -1557,10 +1720,18 @@ func_run_hooks () ## --------------- ## # In order to add your own option parsing hooks, you must accept the -# full positional parameter list in your hook function, remove any -# options that you action, and then pass back the remaining unprocessed -# options in '_result', escaped suitably for -# 'eval'. Like this: +# full positional parameter list from your hook function. You may remove +# or edit any options that you action, and then pass back the remaining +# unprocessed options in '_result', escaped +# suitably for 'eval'. +# +# The '_result' variable is automatically unset +# before your hook gets called; for best performance, only set the +# *_result variable when necessary (i.e. don't call the 'func_quote' +# function unnecessarily because it can be an expensive operation on some +# machines). +# +# Like this: # # my_options_prep () # { @@ -1570,9 +1741,8 @@ func_run_hooks () # usage_message=$usage_message' # -s, --silent don'\''t print informational messages # ' -# -# func_quote_for_eval ${1+"$@"} -# my_options_prep_result=$func_quote_for_eval_result +# # No change in '$@' (ignored completely by this hook). Leave +# # my_options_prep_result variable intact. # } # func_add_hook func_options_prep my_options_prep # @@ -1581,25 +1751,36 @@ func_run_hooks () # { # $debug_cmd # -# # Note that for efficiency, we parse as many options as we can +# args_changed=false +# +# # Note that, for efficiency, we parse as many options as we can # # recognise in a loop before passing the remainder back to the # # caller on the first unrecognised argument we encounter. # while test $# -gt 0; do # opt=$1; shift # case $opt in -# --silent|-s) opt_silent=: ;; +# --silent|-s) opt_silent=: +# args_changed=: +# ;; # # Separate non-argument short options: # -s*) func_split_short_opt "$_G_opt" # set dummy "$func_split_short_opt_name" \ # "-$func_split_short_opt_arg" ${1+"$@"} # shift +# args_changed=: # ;; -# *) set dummy "$_G_opt" "$*"; shift; break ;; +# *) # Make sure the first unrecognised option "$_G_opt" +# # is added back to "$@" in case we need it later, +# # if $args_changed was set to 'true'. +# set dummy "$_G_opt" ${1+"$@"}; shift; break ;; # esac # done # -# func_quote_for_eval ${1+"$@"} -# my_silent_option_result=$func_quote_for_eval_result +# # Only call 'func_quote' here if we processed at least one argument. +# if $args_changed; then +# func_quote eval ${1+"$@"} +# my_silent_option_result=$func_quote_result +# fi # } # func_add_hook func_parse_options my_silent_option # @@ -1610,17 +1791,26 @@ func_run_hooks () # # $opt_silent && $opt_verbose && func_fatal_help "\ # '--silent' and '--verbose' options are mutually exclusive." -# -# func_quote_for_eval ${1+"$@"} -# my_option_validation_result=$func_quote_for_eval_result # } # func_add_hook func_validate_options my_option_validation # -# You'll alse need to manually amend $usage_message to reflect the extra +# You'll also need to manually amend $usage_message to reflect the extra # options you parse. It's preferable to append if you can, so that # multiple option parsing hooks can be added safely. +# func_options_finish [ARG]... +# ---------------------------- +# Finishing the option parse loop (call 'func_options' hooks ATM). +func_options_finish () +{ + $debug_cmd + + func_run_hooks func_options ${1+"$@"} + func_propagate_result func_run_hooks func_options_finish +} + + # func_options [ARG]... # --------------------- # All the functions called inside func_options are hookable. See the @@ -1630,17 +1820,27 @@ func_options () { $debug_cmd - func_options_prep ${1+"$@"} - eval func_parse_options \ - ${func_options_prep_result+"$func_options_prep_result"} - eval func_validate_options \ - ${func_parse_options_result+"$func_parse_options_result"} + _G_options_quoted=false - eval func_run_hooks func_options \ - ${func_validate_options_result+"$func_validate_options_result"} + for my_func in options_prep parse_options validate_options options_finish + do + func_unset func_${my_func}_result + func_unset func_run_hooks_result + eval func_$my_func '${1+"$@"}' + func_propagate_result func_$my_func func_options + if $func_propagate_result_result; then + eval set dummy "$func_options_result"; shift + _G_options_quoted=: + fi + done - # save modified positional parameters for caller - func_options_result=$func_run_hooks_result + $_G_options_quoted || { + # As we (func_options) are top-level options-parser function and + # nobody quoted "$@" for us yet, we need to do it explicitly for + # caller. + func_quote eval ${1+"$@"} + func_options_result=$func_quote_result + } } @@ -1649,9 +1849,8 @@ func_options () # All initialisations required before starting the option parse loop. # Note that when calling hook functions, we pass through the list of # positional parameters. If a hook function modifies that list, and -# needs to propogate that back to rest of this script, then the complete -# modified list must be put in 'func_run_hooks_result' before -# returning. +# needs to propagate that back to rest of this script, then the complete +# modified list must be put in 'func_run_hooks_result' before returning. func_hookable func_options_prep func_options_prep () { @@ -1662,9 +1861,7 @@ func_options_prep () opt_warning_types= func_run_hooks func_options_prep ${1+"$@"} - - # save modified positional parameters for caller - func_options_prep_result=$func_run_hooks_result + func_propagate_result func_run_hooks func_options_prep } @@ -1676,25 +1873,32 @@ func_parse_options () { $debug_cmd - func_parse_options_result= - + _G_parse_options_requote=false # this just eases exit handling while test $# -gt 0; do # Defer to hook functions for initial option parsing, so they # get priority in the event of reusing an option name. func_run_hooks func_parse_options ${1+"$@"} - - # Adjust func_parse_options positional parameters to match - eval set dummy "$func_run_hooks_result"; shift + func_propagate_result func_run_hooks func_parse_options + if $func_propagate_result_result; then + eval set dummy "$func_parse_options_result"; shift + # Even though we may have changed "$@", we passed the "$@" array + # down into the hook and it quoted it for us (because we are in + # this if-branch). No need to quote it again. + _G_parse_options_requote=false + fi # Break out of the loop if we already parsed every option. test $# -gt 0 || break + # We expect that one of the options parsed in this function matches + # and thus we remove _G_opt from "$@" and need to re-quote. + _G_match_parse_options=: _G_opt=$1 shift case $_G_opt in --debug|-x) debug_cmd='set -x' - func_echo "enabling shell trace mode" + func_echo "enabling shell trace mode" >&2 $debug_cmd ;; @@ -1704,7 +1908,10 @@ func_parse_options () ;; --warnings|--warning|-W) - test $# = 0 && func_missing_arg $_G_opt && break + if test $# = 0 && func_missing_arg $_G_opt; then + _G_parse_options_requote=: + break + fi case " $warning_categories $1" in *" $1 "*) # trailing space prevents matching last $1 above @@ -1757,15 +1964,24 @@ func_parse_options () shift ;; - --) break ;; + --) _G_parse_options_requote=: ; break ;; -*) func_fatal_help "unrecognised option: '$_G_opt'" ;; - *) set dummy "$_G_opt" ${1+"$@"}; shift; break ;; + *) set dummy "$_G_opt" ${1+"$@"}; shift + _G_match_parse_options=false + break + ;; esac + + if $_G_match_parse_options; then + _G_parse_options_requote=: + fi done - # save modified positional parameters for caller - func_quote_for_eval ${1+"$@"} - func_parse_options_result=$func_quote_for_eval_result + if $_G_parse_options_requote; then + # save modified positional parameters for caller + func_quote eval ${1+"$@"} + func_parse_options_result=$func_quote_result + fi } @@ -1782,12 +1998,10 @@ func_validate_options () test -n "$opt_warning_types" || opt_warning_types=" $warning_categories" func_run_hooks func_validate_options ${1+"$@"} + func_propagate_result func_run_hooks func_validate_options # Bail if the options were screwed! $exit_cmd $EXIT_FAILURE - - # save modified positional parameters for caller - func_validate_options_result=$func_run_hooks_result } @@ -1843,8 +2057,8 @@ func_missing_arg () # func_split_equals STRING # ------------------------ -# Set func_split_equals_lhs and func_split_equals_rhs shell variables after -# splitting STRING at the '=' sign. +# Set func_split_equals_lhs and func_split_equals_rhs shell variables +# after splitting STRING at the '=' sign. test -z "$_G_HAVE_XSI_OPS" \ && (eval 'x=a/b/c; test 5aa/bb/cc = "${#x}${x%%/*}${x%/*}${x#*/}${x##*/}"') 2>/dev/null \ @@ -1859,8 +2073,9 @@ then func_split_equals_lhs=${1%%=*} func_split_equals_rhs=${1#*=} - test "x$func_split_equals_lhs" = "x$1" \ - && func_split_equals_rhs= + if test "x$func_split_equals_lhs" = "x$1"; then + func_split_equals_rhs= + fi }' else # ...otherwise fall back to using expr, which is often a shell builtin. @@ -1870,7 +2085,7 @@ else func_split_equals_lhs=`expr "x$1" : 'x\([^=]*\)'` func_split_equals_rhs= - test "x$func_split_equals_lhs" = "x$1" \ + test "x$func_split_equals_lhs=" = "x$1" \ || func_split_equals_rhs=`expr "x$1" : 'x[^=]*=\(.*\)$'` } fi #func_split_equals @@ -1896,7 +2111,7 @@ else { $debug_cmd - func_split_short_opt_name=`expr "x$1" : 'x-\(.\)'` + func_split_short_opt_name=`expr "x$1" : 'x\(-.\)'` func_split_short_opt_arg=`expr "x$1" : 'x-.\(.*\)$'` } fi #func_split_short_opt @@ -1938,31 +2153,44 @@ func_usage_message () # func_version # ------------ # Echo version message to standard output and exit. +# The version message is extracted from the calling file's header +# comments, with leading '# ' stripped: +# 1. First display the progname and version +# 2. Followed by the header comment line matching /^# Written by / +# 3. Then a blank line followed by the first following line matching +# /^# Copyright / +# 4. Immediately followed by any lines between the previous matches, +# except lines preceding the intervening completely blank line. +# For example, see the header comments of this file. func_version () { $debug_cmd printf '%s\n' "$progname $scriptversion" $SED -n ' - /(C)/!b go - :more - /\./!{ - N - s|\n# | | - b more + /^# Written by /!b + s|^# ||; p; n + + :fwd2blnk + /./ { + n + b fwd2blnk } - :go - /^# Written by /,/# warranty; / { - s|^# || - s|^# *$|| - s|\((C)\)[ 0-9,-]*[ ,-]\([1-9][0-9]* \)|\1 \2| - p + p; n + + :holdwrnt + s|^# || + s|^# *$|| + /^Copyright /!{ + /./H + n + b holdwrnt } - /^# Written by / { - s|^# || - p - } - /^warranty; /q' < "$progpath" + + s|\((C)\)[ 0-9,-]*[ ,-]\([1-9][0-9]* \)|\1 \2| + G + s|\(\n\)\n*|\1|g + p; q' < "$progpath" exit $? } @@ -1972,12 +2200,12 @@ func_version () # mode: shell-script # sh-indentation: 2 # eval: (add-hook 'before-save-hook 'time-stamp) -# time-stamp-pattern: "10/scriptversion=%:y-%02m-%02d.%02H; # UTC" +# time-stamp-pattern: "30/scriptversion=%:y-%02m-%02d.%02H; # UTC" # time-stamp-time-zone: "UTC" # End: # Set a version string. -scriptversion='(GNU libtool) 2.4.6' +scriptversion='(GNU libtool) 2.4.7' # func_echo ARG... @@ -2068,7 +2296,7 @@ include the following information: compiler: $LTCC compiler flags: $LTCFLAGS linker: $LD (gnu? $with_gnu_ld) - version: $progname (GNU libtool) 2.4.6 + version: $progname $scriptversion Debian-2.4.7-7build1 automake: `($AUTOMAKE --version) 2>/dev/null |$SED 1q` autoconf: `($AUTOCONF --version) 2>/dev/null |$SED 1q` @@ -2124,7 +2352,7 @@ fi # a configuration failure hint, and exit. func_fatal_configuration () { - func__fatal_error ${1+"$@"} \ + func_fatal_error ${1+"$@"} \ "See the $PACKAGE documentation for more information." \ "Fatal configuration error." } @@ -2270,6 +2498,10 @@ libtool_options_prep () nonopt= preserve_args= + _G_rc_lt_options_prep=: + + _G_rc_lt_options_prep=: + # Shorthand for --mode=foo, only valid as the first argument case $1 in clean|clea|cle|cl) @@ -2293,11 +2525,16 @@ libtool_options_prep () uninstall|uninstal|uninsta|uninst|unins|unin|uni|un|u) shift; set dummy --mode uninstall ${1+"$@"}; shift ;; + *) + _G_rc_lt_options_prep=false + ;; esac - # Pass back the list of options. - func_quote_for_eval ${1+"$@"} - libtool_options_prep_result=$func_quote_for_eval_result + if $_G_rc_lt_options_prep; then + # Pass back the list of options. + func_quote eval ${1+"$@"} + libtool_options_prep_result=$func_quote_result + fi } func_add_hook func_options_prep libtool_options_prep @@ -2309,9 +2546,12 @@ libtool_parse_options () { $debug_cmd + _G_rc_lt_parse_options=false + # Perform our own loop to consume as many options as possible in # each iteration. while test $# -gt 0; do + _G_match_lt_parse_options=: _G_opt=$1 shift case $_G_opt in @@ -2386,15 +2626,20 @@ libtool_parse_options () func_append preserve_args " $_G_opt" ;; - # An option not handled by this hook function: - *) set dummy "$_G_opt" ${1+"$@"}; shift; break ;; + # An option not handled by this hook function: + *) set dummy "$_G_opt" ${1+"$@"} ; shift + _G_match_lt_parse_options=false + break + ;; esac + $_G_match_lt_parse_options && _G_rc_lt_parse_options=: done - - # save modified positional parameters for caller - func_quote_for_eval ${1+"$@"} - libtool_parse_options_result=$func_quote_for_eval_result + if $_G_rc_lt_parse_options; then + # save modified positional parameters for caller + func_quote eval ${1+"$@"} + libtool_parse_options_result=$func_quote_result + fi } func_add_hook func_parse_options libtool_parse_options @@ -2451,8 +2696,8 @@ libtool_validate_options () } # Pass back the unparsed argument list - func_quote_for_eval ${1+"$@"} - libtool_validate_options_result=$func_quote_for_eval_result + func_quote eval ${1+"$@"} + libtool_validate_options_result=$func_quote_result } func_add_hook func_validate_options libtool_validate_options @@ -3418,8 +3663,8 @@ func_mode_compile () esac done - func_quote_for_eval "$libobj" - test "X$libobj" != "X$func_quote_for_eval_result" \ + func_quote_arg pretty "$libobj" + test "X$libobj" != "X$func_quote_arg_result" \ && $ECHO "X$libobj" | $GREP '[]~#^*{};<>?"'"'"' &()|`$[]' \ && func_warning "libobj name '$libobj' may not contain shell special characters." func_dirname_and_basename "$obj" "/" "" @@ -3492,8 +3737,8 @@ compiler." func_to_tool_file "$srcfile" func_convert_file_msys_to_w32 srcfile=$func_to_tool_file_result - func_quote_for_eval "$srcfile" - qsrcfile=$func_quote_for_eval_result + func_quote_arg pretty "$srcfile" + qsrcfile=$func_quote_arg_result # Only build a PIC object if we are building libtool libraries. if test yes = "$build_libtool_libs"; then @@ -3648,7 +3893,8 @@ This mode accepts the following additional options: -prefer-non-pic try to build non-PIC objects only -shared do not build a '.o' file suitable for static linking -static only build a '.o' file suitable for static linking - -Wc,FLAG pass FLAG directly to the compiler + -Wc,FLAG + -Xcompiler FLAG pass FLAG directly to the compiler COMPILE-COMMAND is a command to be used in creating a 'standard' object file from the given SOURCEFILE. @@ -3754,6 +4000,8 @@ The following components of LINK-COMMAND are treated specially: -weak LIBNAME declare that the target provides the LIBNAME interface -Wc,FLAG -Xcompiler FLAG pass linker-specific FLAG directly to the compiler + -Wa,FLAG + -Xassembler FLAG pass linker-specific FLAG directly to the assembler -Wl,FLAG -Xlinker FLAG pass linker-specific FLAG directly to the linker -XCClinker FLAG pass link-specific FLAG to the compiler driver (CC) @@ -4096,8 +4344,8 @@ func_mode_install () case $nonopt in *shtool*) :;; *) false;; esac then # Aesthetically quote it. - func_quote_for_eval "$nonopt" - install_prog="$func_quote_for_eval_result " + func_quote_arg pretty "$nonopt" + install_prog="$func_quote_arg_result " arg=$1 shift else @@ -4107,8 +4355,8 @@ func_mode_install () # The real first argument should be the name of the installation program. # Aesthetically quote it. - func_quote_for_eval "$arg" - func_append install_prog "$func_quote_for_eval_result" + func_quote_arg pretty "$arg" + func_append install_prog "$func_quote_arg_result" install_shared_prog=$install_prog case " $install_prog " in *[\\\ /]cp\ *) install_cp=: ;; @@ -4165,12 +4413,12 @@ func_mode_install () esac # Aesthetically quote the argument. - func_quote_for_eval "$arg" - func_append install_prog " $func_quote_for_eval_result" + func_quote_arg pretty "$arg" + func_append install_prog " $func_quote_arg_result" if test -n "$arg2"; then - func_quote_for_eval "$arg2" + func_quote_arg pretty "$arg2" fi - func_append install_shared_prog " $func_quote_for_eval_result" + func_append install_shared_prog " $func_quote_arg_result" done test -z "$install_prog" && \ @@ -4181,8 +4429,8 @@ func_mode_install () if test -n "$install_override_mode" && $no_mode; then if $install_cp; then :; else - func_quote_for_eval "$install_override_mode" - func_append install_shared_prog " -m $func_quote_for_eval_result" + func_quote_arg pretty "$install_override_mode" + func_append install_shared_prog " -m $func_quote_arg_result" fi fi @@ -4478,8 +4726,8 @@ func_mode_install () relink_command=`$ECHO "$relink_command" | $SED 's%@OUTPUT@%'"$outputname"'%g'` $opt_quiet || { - func_quote_for_expand "$relink_command" - eval "func_echo $func_quote_for_expand_result" + func_quote_arg expand,pretty "$relink_command" + eval "func_echo $func_quote_arg_result" } if eval "$relink_command"; then : else @@ -5258,7 +5506,8 @@ else if test \"\$libtool_execute_magic\" != \"$magic\"; then file=\"\$0\"" - qECHO=`$ECHO "$ECHO" | $SED "$sed_quote_subst"` + func_quote_arg pretty "$ECHO" + qECHO=$func_quote_arg_result $ECHO "\ # A function that is used when there is no print builtin or printf. @@ -5268,7 +5517,7 @@ func_fallback_echo () \$1 _LTECHO_EOF' } - ECHO=\"$qECHO\" + ECHO=$qECHO fi # Very basic option parsing. These options are (a) specific to @@ -6611,9 +6860,9 @@ func_mode_link () while test "$#" -gt 0; do arg=$1 shift - func_quote_for_eval "$arg" - qarg=$func_quote_for_eval_unquoted_result - func_append libtool_args " $func_quote_for_eval_result" + func_quote_arg pretty,unquoted "$arg" + qarg=$func_quote_arg_unquoted_result + func_append libtool_args " $func_quote_arg_result" # If the previous option needs an argument, assign it. if test -n "$prev"; then @@ -6849,6 +7098,13 @@ func_mode_link () prev= continue ;; + xassembler) + func_append compiler_flags " -Xassembler $qarg" + prev= + func_append compile_command " -Xassembler $qarg" + func_append finalize_command " -Xassembler $qarg" + continue + ;; xcclinker) func_append linker_flags " $qarg" func_append compiler_flags " $qarg" @@ -7019,7 +7275,7 @@ func_mode_link () # These systems don't actually have a C library (as such) test X-lc = "X$arg" && continue ;; - *-*-openbsd* | *-*-freebsd* | *-*-dragonfly* | *-*-bitrig*) + *-*-openbsd* | *-*-freebsd* | *-*-dragonfly* | *-*-bitrig* | *-*-midnightbsd*) # Do not include libc due to us having libc/libc_r. test X-lc = "X$arg" && continue ;; @@ -7039,7 +7295,7 @@ func_mode_link () esac elif test X-lc_r = "X$arg"; then case $host in - *-*-openbsd* | *-*-freebsd* | *-*-dragonfly* | *-*-bitrig*) + *-*-openbsd* | *-*-freebsd* | *-*-dragonfly* | *-*-bitrig* | *-*-midnightbsd*) # Do not include libc_r directly, use -pthread flag. continue ;; @@ -7069,8 +7325,20 @@ func_mode_link () prev=xcompiler continue ;; - - -mt|-mthreads|-kthread|-Kthread|-pthread|-pthreads|--thread-safe \ + # Solaris ld rejects as of 11.4. Refer to Oracle bug 22985199. + -pthread) + case $host in + *solaris2*) ;; + *) + case "$new_inherited_linker_flags " in + *" $arg "*) ;; + * ) func_append new_inherited_linker_flags " $arg" ;; + esac + ;; + esac + continue + ;; + -mt|-mthreads|-kthread|-Kthread|-pthreads|--thread-safe \ |-threads|-fopenmp|-openmp|-mp|-xopenmp|-omp|-qsmp=*) func_append compiler_flags " $arg" func_append compile_command " $arg" @@ -7211,9 +7479,9 @@ func_mode_link () save_ifs=$IFS; IFS=, for flag in $args; do IFS=$save_ifs - func_quote_for_eval "$flag" - func_append arg " $func_quote_for_eval_result" - func_append compiler_flags " $func_quote_for_eval_result" + func_quote_arg pretty "$flag" + func_append arg " $func_quote_arg_result" + func_append compiler_flags " $func_quote_arg_result" done IFS=$save_ifs func_stripname ' ' '' "$arg" @@ -7227,16 +7495,21 @@ func_mode_link () save_ifs=$IFS; IFS=, for flag in $args; do IFS=$save_ifs - func_quote_for_eval "$flag" - func_append arg " $wl$func_quote_for_eval_result" - func_append compiler_flags " $wl$func_quote_for_eval_result" - func_append linker_flags " $func_quote_for_eval_result" + func_quote_arg pretty "$flag" + func_append arg " $wl$func_quote_arg_result" + func_append compiler_flags " $wl$func_quote_arg_result" + func_append linker_flags " $func_quote_arg_result" done IFS=$save_ifs func_stripname ' ' '' "$arg" arg=$func_stripname_result ;; + -Xassembler) + prev=xassembler + continue + ;; + -Xcompiler) prev=xcompiler continue @@ -7254,8 +7527,8 @@ func_mode_link () # -msg_* for osf cc -msg_*) - func_quote_for_eval "$arg" - arg=$func_quote_for_eval_result + func_quote_arg pretty "$arg" + arg=$func_quote_arg_result ;; # Flags to be passed through unchanged, with rationale: @@ -7275,12 +7548,16 @@ func_mode_link () # -specs=* GCC specs files # -stdlib=* select c++ std lib with clang # -fsanitize=* Clang/GCC memory and address sanitizer + # -fuse-ld=* Linker select flags for GCC + # -static-* direct GCC to link specific libraries statically + # -fcilkplus Cilk Plus language extension features for C/C++ + # -Wa,* Pass flags directly to the assembler -64|-mips[0-9]|-r[0-9][0-9]*|-xarch=*|-xtarget=*|+DA*|+DD*|-q*|-m*| \ -t[45]*|-txscale*|-p|-pg|--coverage|-fprofile-*|-F*|@*|-tp=*|--sysroot=*| \ -O*|-g*|-flto*|-fwhopr*|-fuse-linker-plugin|-fstack-protector*|-stdlib=*| \ - -specs=*|-fsanitize=*) - func_quote_for_eval "$arg" - arg=$func_quote_for_eval_result + -specs=*|-fsanitize=*|-fuse-ld=*|-static-*|-fcilkplus|-Wa,*) + func_quote_arg pretty "$arg" + arg=$func_quote_arg_result func_append compile_command " $arg" func_append finalize_command " $arg" func_append compiler_flags " $arg" @@ -7301,15 +7578,15 @@ func_mode_link () continue else # Otherwise treat like 'Some other compiler flag' below - func_quote_for_eval "$arg" - arg=$func_quote_for_eval_result + func_quote_arg pretty "$arg" + arg=$func_quote_arg_result fi ;; # Some other compiler flag. -* | +*) - func_quote_for_eval "$arg" - arg=$func_quote_for_eval_result + func_quote_arg pretty "$arg" + arg=$func_quote_arg_result ;; *.$objext) @@ -7429,8 +7706,8 @@ func_mode_link () *) # Unknown arguments in both finalize_command and compile_command need # to be aesthetically quoted because they are evaled later. - func_quote_for_eval "$arg" - arg=$func_quote_for_eval_result + func_quote_arg pretty "$arg" + arg=$func_quote_arg_result ;; esac # arg @@ -8638,7 +8915,7 @@ func_mode_link () test CXX = "$tagname" && { case $host_os in linux*) - case `$CC -V 2>&1 | sed 5q` in + case `$CC -V 2>&1 | $SED 5q` in *Sun\ C*) # Sun C++ 5.9 func_suncc_cstd_abi @@ -8811,7 +9088,7 @@ func_mode_link () # case $version_type in # correct linux to gnu/linux during the next big refactor - darwin|freebsd-elf|linux|osf|windows|none) + darwin|freebsd-elf|linux|midnightbsd-elf|osf|windows|none) func_arith $number_major + $number_minor current=$func_arith_result age=$number_minor @@ -8905,7 +9182,7 @@ func_mode_link () versuffix=.$current.$revision ;; - freebsd-elf) + freebsd-elf | midnightbsd-elf) func_arith $current - $age major=.$func_arith_result versuffix=$major.$age.$revision @@ -9131,7 +9408,7 @@ func_mode_link () *-*-netbsd*) # Don't link with libc until the a.out ld.so is fixed. ;; - *-*-openbsd* | *-*-freebsd* | *-*-dragonfly*) + *-*-openbsd* | *-*-freebsd* | *-*-dragonfly* | *-*-midnightbsd*) # Do not include libc due to us having libc/libc_r. ;; *-*-sco3.2v5* | *-*-sco5v6*) @@ -9942,8 +10219,8 @@ EOF for cmd in $concat_cmds; do IFS=$save_ifs $opt_quiet || { - func_quote_for_expand "$cmd" - eval "func_echo $func_quote_for_expand_result" + func_quote_arg expand,pretty "$cmd" + eval "func_echo $func_quote_arg_result" } $opt_dry_run || eval "$cmd" || { lt_exit=$? @@ -10036,8 +10313,8 @@ EOF eval cmd=\"$cmd\" IFS=$save_ifs $opt_quiet || { - func_quote_for_expand "$cmd" - eval "func_echo $func_quote_for_expand_result" + func_quote_arg expand,pretty "$cmd" + eval "func_echo $func_quote_arg_result" } $opt_dry_run || eval "$cmd" || { lt_exit=$? @@ -10511,12 +10788,13 @@ EOF elif eval var_value=\$$var; test -z "$var_value"; then relink_command="$var=; export $var; $relink_command" else - func_quote_for_eval "$var_value" - relink_command="$var=$func_quote_for_eval_result; export $var; $relink_command" + func_quote_arg pretty "$var_value" + relink_command="$var=$func_quote_arg_result; export $var; $relink_command" fi done - relink_command="(cd `pwd`; $relink_command)" - relink_command=`$ECHO "$relink_command" | $SED "$sed_quote_subst"` + func_quote eval cd "`pwd`" + func_quote_arg pretty,unquoted "($func_quote_result; $relink_command)" + relink_command=$func_quote_arg_unquoted_result fi # Only actually do things if not in dry run mode. @@ -10756,13 +11034,15 @@ EOF elif eval var_value=\$$var; test -z "$var_value"; then relink_command="$var=; export $var; $relink_command" else - func_quote_for_eval "$var_value" - relink_command="$var=$func_quote_for_eval_result; export $var; $relink_command" + func_quote_arg pretty,unquoted "$var_value" + relink_command="$var=$func_quote_arg_unquoted_result; export $var; $relink_command" fi done # Quote the link command for shipping. - relink_command="(cd `pwd`; $SHELL \"$progpath\" $preserve_args --mode=relink $libtool_args @inst_prefix_dir@)" - relink_command=`$ECHO "$relink_command" | $SED "$sed_quote_subst"` + func_quote eval cd "`pwd`" + relink_command="($func_quote_result; $SHELL \"$progpath\" $preserve_args --mode=relink $libtool_args @inst_prefix_dir@)" + func_quote_arg pretty,unquoted "$relink_command" + relink_command=$func_quote_arg_unquoted_result if test yes = "$hardcode_automatic"; then relink_command= fi diff --git a/libpcre/m4/libtool.m4 b/libpcre/m4/libtool.m4 index 10ab2844c..e7b683345 100644 --- a/libpcre/m4/libtool.m4 +++ b/libpcre/m4/libtool.m4 @@ -1,6 +1,7 @@ # libtool.m4 - Configure libtool for the host system. -*-Autoconf-*- # -# Copyright (C) 1996-2001, 2003-2015 Free Software Foundation, Inc. +# Copyright (C) 1996-2001, 2003-2019, 2021-2022 Free Software +# Foundation, Inc. # Written by Gordon Matzigkeit, 1996 # # This file is free software; the Free Software Foundation gives @@ -31,7 +32,7 @@ m4_define([_LT_COPYING], [dnl # along with this program. If not, see . ]) -# serial 58 LT_INIT +# serial 59 LT_INIT # LT_PREREQ(VERSION) @@ -181,6 +182,7 @@ m4_require([_LT_FILEUTILS_DEFAULTS])dnl m4_require([_LT_CHECK_SHELL_FEATURES])dnl m4_require([_LT_PATH_CONVERSION_FUNCTIONS])dnl m4_require([_LT_CMD_RELOAD])dnl +m4_require([_LT_DECL_FILECMD])dnl m4_require([_LT_CHECK_MAGIC_METHOD])dnl m4_require([_LT_CHECK_SHAREDLIB_FROM_LINKLIB])dnl m4_require([_LT_CMD_OLD_ARCHIVE])dnl @@ -219,8 +221,8 @@ esac ofile=libtool can_build_shared=yes -# All known linkers require a '.a' archive for static linking (except MSVC, -# which needs '.lib'). +# All known linkers require a '.a' archive for static linking (except MSVC and +# ICC, which need '.lib'). libext=a with_gnu_ld=$lt_cv_prog_gnu_ld @@ -728,7 +730,6 @@ _LT_CONFIG_SAVE_COMMANDS([ cat <<_LT_EOF >> "$cfgfile" #! $SHELL # Generated automatically by $as_me ($PACKAGE) $VERSION -# Libtool was configured on host `(hostname || uname -n) 2>/dev/null | sed 1q`: # NOTE: Changes made to this file will be lost: look at ltmain.sh. # Provide generalized library-building support services. @@ -778,7 +779,7 @@ _LT_EOF # if finds mixed CR/LF and LF-only lines. Since sed operates in # text mode, it properly converts lines to CR/LF. This bash problem # is reportedly fixed, but why not run on old versions too? - sed '$q' "$ltmain" >> "$cfgfile" \ + $SED '$q' "$ltmain" >> "$cfgfile" \ || (rm -f "$cfgfile"; exit 1) mv -f "$cfgfile" "$ofile" || @@ -1042,8 +1043,8 @@ int forced_loaded() { return 2;} _LT_EOF echo "$LTCC $LTCFLAGS -c -o conftest.o conftest.c" >&AS_MESSAGE_LOG_FD $LTCC $LTCFLAGS -c -o conftest.o conftest.c 2>&AS_MESSAGE_LOG_FD - echo "$AR cru libconftest.a conftest.o" >&AS_MESSAGE_LOG_FD - $AR cru libconftest.a conftest.o 2>&AS_MESSAGE_LOG_FD + echo "$AR $AR_FLAGS libconftest.a conftest.o" >&AS_MESSAGE_LOG_FD + $AR $AR_FLAGS libconftest.a conftest.o 2>&AS_MESSAGE_LOG_FD echo "$RANLIB libconftest.a" >&AS_MESSAGE_LOG_FD $RANLIB libconftest.a 2>&AS_MESSAGE_LOG_FD cat > conftest.c << _LT_EOF @@ -1067,17 +1068,12 @@ _LT_EOF _lt_dar_allow_undefined='$wl-undefined ${wl}suppress' ;; darwin1.*) _lt_dar_allow_undefined='$wl-flat_namespace $wl-undefined ${wl}suppress' ;; - darwin*) # darwin 5.x on - # if running on 10.5 or later, the deployment target defaults - # to the OS version, if on x86, and 10.4, the deployment - # target defaults to 10.4. Don't you love it? - case ${MACOSX_DEPLOYMENT_TARGET-10.0},$host in - 10.0,*86*-darwin8*|10.0,*-darwin[[91]]*) - _lt_dar_allow_undefined='$wl-undefined ${wl}dynamic_lookup' ;; - 10.[[012]][[,.]]*) - _lt_dar_allow_undefined='$wl-flat_namespace $wl-undefined ${wl}suppress' ;; - 10.*) - _lt_dar_allow_undefined='$wl-undefined ${wl}dynamic_lookup' ;; + darwin*) + case $MACOSX_DEPLOYMENT_TARGET,$host in + 10.[[012]],*|,*powerpc*-darwin[[5-8]]*) + _lt_dar_allow_undefined='$wl-flat_namespace $wl-undefined ${wl}suppress' ;; + *) + _lt_dar_allow_undefined='$wl-undefined ${wl}dynamic_lookup' ;; esac ;; esac @@ -1126,12 +1122,12 @@ m4_defun([_LT_DARWIN_LINKER_FEATURES], output_verbose_link_cmd=func_echo_all _LT_TAGVAR(archive_cmds, $1)="\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod$_lt_dsymutil" _LT_TAGVAR(module_cmds, $1)="\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags$_lt_dsymutil" - _LT_TAGVAR(archive_expsym_cmds, $1)="sed 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod$_lt_dar_export_syms$_lt_dsymutil" - _LT_TAGVAR(module_expsym_cmds, $1)="sed -e 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags$_lt_dar_export_syms$_lt_dsymutil" + _LT_TAGVAR(archive_expsym_cmds, $1)="$SED 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod$_lt_dar_export_syms$_lt_dsymutil" + _LT_TAGVAR(module_expsym_cmds, $1)="$SED -e 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags$_lt_dar_export_syms$_lt_dsymutil" m4_if([$1], [CXX], [ if test yes != "$lt_cv_apple_cc_single_mod"; then _LT_TAGVAR(archive_cmds, $1)="\$CC -r -keep_private_externs -nostdlib -o \$lib-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$lib-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring$_lt_dsymutil" - _LT_TAGVAR(archive_expsym_cmds, $1)="sed 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC -r -keep_private_externs -nostdlib -o \$lib-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$lib-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring$_lt_dar_export_syms$_lt_dsymutil" + _LT_TAGVAR(archive_expsym_cmds, $1)="$SED 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC -r -keep_private_externs -nostdlib -o \$lib-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$lib-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring$_lt_dar_export_syms$_lt_dsymutil" fi ],[]) else @@ -1245,7 +1241,8 @@ _LT_DECL([], [ECHO], [1], [An echo program that protects backslashes]) # _LT_WITH_SYSROOT # ---------------- AC_DEFUN([_LT_WITH_SYSROOT], -[AC_MSG_CHECKING([for sysroot]) +[m4_require([_LT_DECL_SED])dnl +AC_MSG_CHECKING([for sysroot]) AC_ARG_WITH([sysroot], [AS_HELP_STRING([--with-sysroot@<:@=DIR@:>@], [Search for dependent libraries within DIR (or the compiler's sysroot @@ -1262,7 +1259,7 @@ case $with_sysroot in #( fi ;; #( /*) - lt_sysroot=`echo "$with_sysroot" | sed -e "$sed_quote_subst"` + lt_sysroot=`echo "$with_sysroot" | $SED -e "$sed_quote_subst"` ;; #( no|'') ;; #( @@ -1292,7 +1289,7 @@ ia64-*-hpux*) # options accordingly. echo 'int i;' > conftest.$ac_ext if AC_TRY_EVAL(ac_compile); then - case `/usr/bin/file conftest.$ac_objext` in + case `$FILECMD conftest.$ac_objext` in *ELF-32*) HPUX_IA64_MODE=32 ;; @@ -1309,7 +1306,7 @@ ia64-*-hpux*) echo '[#]line '$LINENO' "configure"' > conftest.$ac_ext if AC_TRY_EVAL(ac_compile); then if test yes = "$lt_cv_prog_gnu_ld"; then - case `/usr/bin/file conftest.$ac_objext` in + case `$FILECMD conftest.$ac_objext` in *32-bit*) LD="${LD-ld} -melf32bsmip" ;; @@ -1321,7 +1318,7 @@ ia64-*-hpux*) ;; esac else - case `/usr/bin/file conftest.$ac_objext` in + case `$FILECMD conftest.$ac_objext` in *32-bit*) LD="${LD-ld} -32" ;; @@ -1343,7 +1340,7 @@ mips64*-*linux*) echo '[#]line '$LINENO' "configure"' > conftest.$ac_ext if AC_TRY_EVAL(ac_compile); then emul=elf - case `/usr/bin/file conftest.$ac_objext` in + case `$FILECMD conftest.$ac_objext` in *32-bit*) emul="${emul}32" ;; @@ -1351,7 +1348,7 @@ mips64*-*linux*) emul="${emul}64" ;; esac - case `/usr/bin/file conftest.$ac_objext` in + case `$FILECMD conftest.$ac_objext` in *MSB*) emul="${emul}btsmip" ;; @@ -1359,7 +1356,7 @@ mips64*-*linux*) emul="${emul}ltsmip" ;; esac - case `/usr/bin/file conftest.$ac_objext` in + case `$FILECMD conftest.$ac_objext` in *N32*) emul="${emul}n32" ;; @@ -1379,14 +1376,14 @@ s390*-*linux*|s390*-*tpf*|sparc*-*linux*) # not appear in the list. echo 'int i;' > conftest.$ac_ext if AC_TRY_EVAL(ac_compile); then - case `/usr/bin/file conftest.o` in + case `$FILECMD conftest.o` in *32-bit*) case $host in x86_64-*kfreebsd*-gnu) LD="${LD-ld} -m elf_i386_fbsd" ;; x86_64-*linux*) - case `/usr/bin/file conftest.o` in + case `$FILECMD conftest.o` in *x86-64*) LD="${LD-ld} -m elf32_x86_64" ;; @@ -1454,7 +1451,7 @@ s390*-*linux*|s390*-*tpf*|sparc*-*linux*) # options accordingly. echo 'int i;' > conftest.$ac_ext if AC_TRY_EVAL(ac_compile); then - case `/usr/bin/file conftest.o` in + case `$FILECMD conftest.o` in *64-bit*) case $lt_cv_prog_gnu_ld in yes*) @@ -1493,9 +1490,22 @@ need_locks=$enable_libtool_lock m4_defun([_LT_PROG_AR], [AC_CHECK_TOOLS(AR, [ar], false) : ${AR=ar} -: ${AR_FLAGS=cru} _LT_DECL([], [AR], [1], [The archiver]) -_LT_DECL([], [AR_FLAGS], [1], [Flags to create an archive]) + +# Use ARFLAGS variable as AR's operation code to sync the variable naming with +# Automake. If both AR_FLAGS and ARFLAGS are specified, AR_FLAGS should have +# higher priority because thats what people were doing historically (setting +# ARFLAGS for automake and AR_FLAGS for libtool). FIXME: Make the AR_FLAGS +# variable obsoleted/removed. + +test ${AR_FLAGS+y} || AR_FLAGS=${ARFLAGS-cr} +lt_ar_flags=$AR_FLAGS +_LT_DECL([], [lt_ar_flags], [0], [Flags to create an archive (by configure)]) + +# Make AR_FLAGS overridable by 'make ARFLAGS='. Don't try to run-time override +# by AR_FLAGS because that was never working and AR_FLAGS is about to die. +_LT_DECL([], [AR_FLAGS], [\@S|@{ARFLAGS-"\@S|@lt_ar_flags"}], + [Flags to create an archive]) AC_CACHE_CHECK([for archiver @FILE support], [lt_cv_ar_at_file], [lt_cv_ar_at_file=no @@ -1714,7 +1724,7 @@ AC_CACHE_VAL([lt_cv_sys_max_cmd_len], [dnl lt_cv_sys_max_cmd_len=8192; ;; - bitrig* | darwin* | dragonfly* | freebsd* | netbsd* | openbsd*) + bitrig* | darwin* | dragonfly* | freebsd* | midnightbsd* | netbsd* | openbsd*) # This has been around since 386BSD, at least. Likely further. if test -x /sbin/sysctl; then lt_cv_sys_max_cmd_len=`/sbin/sysctl -n kern.argmax` @@ -1757,7 +1767,7 @@ AC_CACHE_VAL([lt_cv_sys_max_cmd_len], [dnl sysv5* | sco5v6* | sysv4.2uw2*) kargmax=`grep ARG_MAX /etc/conf/cf.d/stune 2>/dev/null` if test -n "$kargmax"; then - lt_cv_sys_max_cmd_len=`echo $kargmax | sed 's/.*[[ ]]//'` + lt_cv_sys_max_cmd_len=`echo $kargmax | $SED 's/.*[[ ]]//'` else lt_cv_sys_max_cmd_len=32768 fi @@ -2207,26 +2217,35 @@ m4_defun([_LT_CMD_STRIPLIB], striplib= old_striplib= AC_MSG_CHECKING([whether stripping libraries is possible]) -if test -n "$STRIP" && $STRIP -V 2>&1 | $GREP "GNU strip" >/dev/null; then - test -z "$old_striplib" && old_striplib="$STRIP --strip-debug" - test -z "$striplib" && striplib="$STRIP --strip-unneeded" - AC_MSG_RESULT([yes]) +if test -z "$STRIP"; then + AC_MSG_RESULT([no]) else -# FIXME - insert some real tests, host_os isn't really good enough - case $host_os in - darwin*) - if test -n "$STRIP"; then + if $STRIP -V 2>&1 | $GREP "GNU strip" >/dev/null; then + old_striplib="$STRIP --strip-debug" + striplib="$STRIP --strip-unneeded" + AC_MSG_RESULT([yes]) + else + case $host_os in + darwin*) + # FIXME - insert some real tests, host_os isn't really good enough striplib="$STRIP -x" old_striplib="$STRIP -S" AC_MSG_RESULT([yes]) - else + ;; + freebsd*) + if $STRIP -V 2>&1 | $GREP "elftoolchain" >/dev/null; then + old_striplib="$STRIP --strip-debug" + striplib="$STRIP --strip-unneeded" + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + fi + ;; + *) AC_MSG_RESULT([no]) - fi - ;; - *) - AC_MSG_RESULT([no]) - ;; - esac + ;; + esac + fi fi _LT_DECL([], [old_striplib], [1], [Commands to strip libraries]) _LT_DECL([], [striplib], [1]) @@ -2549,7 +2568,7 @@ cygwin* | mingw* | pw32* | cegcc*) case $host_os in cygwin*) # Cygwin DLLs use 'cyg' prefix rather than 'lib' - soname_spec='`echo $libname | sed -e 's/^lib/cyg/'``echo $release | $SED -e 's/[[.]]/-/g'`$versuffix$shared_ext' + soname_spec='`echo $libname | $SED -e 's/^lib/cyg/'``echo $release | $SED -e 's/[[.]]/-/g'`$versuffix$shared_ext' m4_if([$1], [],[ sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/lib/w32api"]) ;; @@ -2559,14 +2578,14 @@ m4_if([$1], [],[ ;; pw32*) # pw32 DLLs use 'pw' prefix rather than 'lib' - library_names_spec='`echo $libname | sed -e 's/^lib/pw/'``echo $release | $SED -e 's/[[.]]/-/g'`$versuffix$shared_ext' + library_names_spec='`echo $libname | $SED -e 's/^lib/pw/'``echo $release | $SED -e 's/[[.]]/-/g'`$versuffix$shared_ext' ;; esac dynamic_linker='Win32 ld.exe' ;; - *,cl*) - # Native MSVC + *,cl* | *,icl*) + # Native MSVC or ICC libname_spec='$name' soname_spec='$libname`echo $release | $SED -e 's/[[.]]/-/g'`$versuffix$shared_ext' library_names_spec='$libname.dll.lib' @@ -2585,7 +2604,7 @@ m4_if([$1], [],[ done IFS=$lt_save_ifs # Convert to MSYS style. - sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | sed -e 's|\\\\|/|g' -e 's| \\([[a-zA-Z]]\\):| /\\1|g' -e 's|^ ||'` + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's|\\\\|/|g' -e 's| \\([[a-zA-Z]]\\):| /\\1|g' -e 's|^ ||'` ;; cygwin*) # Convert to unix form, then to dos form, then back to unix form @@ -2622,7 +2641,7 @@ m4_if([$1], [],[ ;; *) - # Assume MSVC wrapper + # Assume MSVC and ICC wrapper library_names_spec='$libname`echo $release | $SED -e 's/[[.]]/-/g'`$versuffix$shared_ext $libname.lib' dynamic_linker='Win32 ld.exe' ;; @@ -2655,7 +2674,7 @@ dgux*) shlibpath_var=LD_LIBRARY_PATH ;; -freebsd* | dragonfly*) +freebsd* | dragonfly* | midnightbsd*) # DragonFly does not have aout. When/if they implement a new # versioning mechanism, adjust this. if test -x /usr/bin/objformat; then @@ -3466,7 +3485,7 @@ beos*) bsdi[[45]]*) lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (shared object|dynamic lib)' - lt_cv_file_magic_cmd='/usr/bin/file -L' + lt_cv_file_magic_cmd='$FILECMD -L' lt_cv_file_magic_test_file=/shlib/libc.so ;; @@ -3500,14 +3519,14 @@ darwin* | rhapsody*) lt_cv_deplibs_check_method=pass_all ;; -freebsd* | dragonfly*) +freebsd* | dragonfly* | midnightbsd*) if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then case $host_cpu in i*86 ) # Not sure whether the presence of OpenBSD here was a mistake. # Let's accept both of them until this is cleared up. lt_cv_deplibs_check_method='file_magic (FreeBSD|OpenBSD|DragonFly)/i[[3-9]]86 (compact )?demand paged shared library' - lt_cv_file_magic_cmd=/usr/bin/file + lt_cv_file_magic_cmd=$FILECMD lt_cv_file_magic_test_file=`echo /usr/lib/libc.so.*` ;; esac @@ -3521,7 +3540,7 @@ haiku*) ;; hpux10.20* | hpux11*) - lt_cv_file_magic_cmd=/usr/bin/file + lt_cv_file_magic_cmd=$FILECMD case $host_cpu in ia64*) lt_cv_deplibs_check_method='file_magic (s[[0-9]][[0-9]][[0-9]]|ELF-[[0-9]][[0-9]]) shared object file - IA64' @@ -3568,7 +3587,7 @@ netbsd* | netbsdelf*-gnu) newos6*) lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (executable|dynamic lib)' - lt_cv_file_magic_cmd=/usr/bin/file + lt_cv_file_magic_cmd=$FILECMD lt_cv_file_magic_test_file=/usr/lib/libnls.so ;; @@ -3695,13 +3714,13 @@ else mingw*) lt_bad_file=conftest.nm/nofile ;; *) lt_bad_file=/dev/null ;; esac - case `"$tmp_nm" -B $lt_bad_file 2>&1 | sed '1q'` in + case `"$tmp_nm" -B $lt_bad_file 2>&1 | $SED '1q'` in *$lt_bad_file* | *'Invalid file or object type'*) lt_cv_path_NM="$tmp_nm -B" break 2 ;; *) - case `"$tmp_nm" -p /dev/null 2>&1 | sed '1q'` in + case `"$tmp_nm" -p /dev/null 2>&1 | $SED '1q'` in */dev/null*) lt_cv_path_NM="$tmp_nm -p" break 2 @@ -3727,7 +3746,7 @@ else # Let the user override the test. else AC_CHECK_TOOLS(DUMPBIN, [dumpbin "link -dump"], :) - case `$DUMPBIN -symbols -headers /dev/null 2>&1 | sed '1q'` in + case `$DUMPBIN -symbols -headers /dev/null 2>&1 | $SED '1q'` in *COFF*) DUMPBIN="$DUMPBIN -symbols -headers" ;; @@ -3967,7 +3986,7 @@ esac if test "$lt_cv_nm_interface" = "MS dumpbin"; then # Gets list of data symbols to import. - lt_cv_sys_global_symbol_to_import="sed -n -e 's/^I .* \(.*\)$/\1/p'" + lt_cv_sys_global_symbol_to_import="$SED -n -e 's/^I .* \(.*\)$/\1/p'" # Adjust the below global symbol transforms to fixup imported variables. lt_cdecl_hook=" -e 's/^I .* \(.*\)$/extern __declspec(dllimport) char \1;/p'" lt_c_name_hook=" -e 's/^I .* \(.*\)$/ {\"\1\", (void *) 0},/p'" @@ -3985,20 +4004,20 @@ fi # Transform an extracted symbol line into a proper C declaration. # Some systems (esp. on ia64) link data and code symbols differently, # so use this general approach. -lt_cv_sys_global_symbol_to_cdecl="sed -n"\ +lt_cv_sys_global_symbol_to_cdecl="$SED -n"\ $lt_cdecl_hook\ " -e 's/^T .* \(.*\)$/extern int \1();/p'"\ " -e 's/^$symcode$symcode* .* \(.*\)$/extern char \1;/p'" # Transform an extracted symbol line into symbol name and symbol address -lt_cv_sys_global_symbol_to_c_name_address="sed -n"\ +lt_cv_sys_global_symbol_to_c_name_address="$SED -n"\ $lt_c_name_hook\ " -e 's/^: \(.*\) .*$/ {\"\1\", (void *) 0},/p'"\ " -e 's/^$symcode$symcode* .* \(.*\)$/ {\"\1\", (void *) \&\1},/p'" # Transform an extracted symbol line into symbol name with lib prefix and # symbol address. -lt_cv_sys_global_symbol_to_c_name_address_lib_prefix="sed -n"\ +lt_cv_sys_global_symbol_to_c_name_address_lib_prefix="$SED -n"\ $lt_c_name_lib_hook\ " -e 's/^: \(.*\) .*$/ {\"\1\", (void *) 0},/p'"\ " -e 's/^$symcode$symcode* .* \(lib.*\)$/ {\"\1\", (void *) \&\1},/p'"\ @@ -4022,7 +4041,7 @@ for ac_symprfx in "" "_"; do if test "$lt_cv_nm_interface" = "MS dumpbin"; then # Fake it for dumpbin and say T for any non-static function, # D for any global variable and I for any imported variable. - # Also find C++ and __fastcall symbols from MSVC++, + # Also find C++ and __fastcall symbols from MSVC++ or ICC, # which start with @ or ?. lt_cv_sys_global_symbol_pipe="$AWK ['"\ " {last_section=section; section=\$ 3};"\ @@ -4040,9 +4059,9 @@ for ac_symprfx in "" "_"; do " s[1]~prfx {split(s[1],t,\"@\"); print f,t[1],substr(t[1],length(prfx))}"\ " ' prfx=^$ac_symprfx]" else - lt_cv_sys_global_symbol_pipe="sed -n -e 's/^.*[[ ]]\($symcode$symcode*\)[[ ]][[ ]]*$ac_symprfx$sympat$opt_cr$/$symxfrm/p'" + lt_cv_sys_global_symbol_pipe="$SED -n -e 's/^.*[[ ]]\($symcode$symcode*\)[[ ]][[ ]]*$ac_symprfx$sympat$opt_cr$/$symxfrm/p'" fi - lt_cv_sys_global_symbol_pipe="$lt_cv_sys_global_symbol_pipe | sed '/ __gnu_lto/d'" + lt_cv_sys_global_symbol_pipe="$lt_cv_sys_global_symbol_pipe | $SED '/ __gnu_lto/d'" # Check to see that the pipe works correctly. pipe_works=no @@ -4064,7 +4083,8 @@ _LT_EOF if AC_TRY_EVAL(ac_compile); then # Now try to grab the symbols. nlist=conftest.nm - if AC_TRY_EVAL(NM conftest.$ac_objext \| "$lt_cv_sys_global_symbol_pipe" \> $nlist) && test -s "$nlist"; then + $ECHO "$as_me:$LINENO: $NM conftest.$ac_objext | $lt_cv_sys_global_symbol_pipe > $nlist" >&AS_MESSAGE_LOG_FD + if eval "$NM" conftest.$ac_objext \| "$lt_cv_sys_global_symbol_pipe" \> $nlist 2>&AS_MESSAGE_LOG_FD && test -s "$nlist"; then # Try sorting and uniquifying the output. if sort "$nlist" | uniq > "$nlist"T; then mv -f "$nlist"T "$nlist" @@ -4329,7 +4349,7 @@ m4_if([$1], [CXX], [ ;; esac ;; - freebsd* | dragonfly*) + freebsd* | dragonfly* | midnightbsd*) # FreeBSD uses GNU C++ ;; hpux9* | hpux10* | hpux11*) @@ -4412,7 +4432,7 @@ m4_if([$1], [CXX], [ _LT_TAGVAR(lt_prog_compiler_static, $1)='-qstaticlink' ;; *) - case `$CC -V 2>&1 | sed 5q` in + case `$CC -V 2>&1 | $SED 5q` in *Sun\ C*) # Sun C++ 5.9 _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' @@ -4704,6 +4724,12 @@ m4_if([$1], [CXX], [ _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' ;; + # flang / f18. f95 an alias for gfortran or flang on Debian + flang* | f18* | f95*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' + ;; # icc used to be incompatible with GCC. # ICC 10 doesn't accept -KPIC any more. icc* | ifort*) @@ -4748,7 +4774,7 @@ m4_if([$1], [CXX], [ _LT_TAGVAR(lt_prog_compiler_static, $1)='-qstaticlink' ;; *) - case `$CC -V 2>&1 | sed 5q` in + case `$CC -V 2>&1 | $SED 5q` in *Sun\ Ceres\ Fortran* | *Sun*Fortran*\ [[1-7]].* | *Sun*Fortran*\ 8.[[0-3]]*) # Sun Fortran 8.3 passes all unrecognized flags to the linker _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' @@ -4931,7 +4957,7 @@ m4_if([$1], [CXX], [ if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then _LT_TAGVAR(export_symbols_cmds, $1)='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && ([substr](\$ 3,1,1) != ".")) { if (\$ 2 == "W") { print \$ 3 " weak" } else { print \$ 3 } } }'\'' | sort -u > $export_symbols' else - _LT_TAGVAR(export_symbols_cmds, $1)='`func_echo_all $NM | $SED -e '\''s/B\([[^B]]*\)$/P\1/'\''` -PCpgl $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) && ([substr](\$ 1,1,1) != ".")) { if ((\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) { print \$ 1 " weak" } else { print \$ 1 } } }'\'' | sort -u > $export_symbols' + _LT_TAGVAR(export_symbols_cmds, $1)='`func_echo_all $NM | $SED -e '\''s/B\([[^B]]*\)$/P\1/'\''` -PCpgl $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "L") || (\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) && ([substr](\$ 1,1,1) != ".")) { if ((\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) { print \$ 1 " weak" } else { print \$ 1 } } }'\'' | sort -u > $export_symbols' fi ;; pw32*) @@ -4939,7 +4965,7 @@ m4_if([$1], [CXX], [ ;; cygwin* | mingw* | cegcc*) case $cc_basename in - cl*) + cl* | icl*) _LT_TAGVAR(exclude_expsyms, $1)='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*' ;; *) @@ -4999,15 +5025,15 @@ dnl Note also adjust exclude_expsyms for C++ above. case $host_os in cygwin* | mingw* | pw32* | cegcc*) - # FIXME: the MSVC++ port hasn't been tested in a loooong time + # FIXME: the MSVC++ and ICC port hasn't been tested in a loooong time # When not using gcc, we currently assume that we are using - # Microsoft Visual C++. + # Microsoft Visual C++ or Intel C++ Compiler. if test yes != "$GCC"; then with_gnu_ld=no fi ;; interix*) - # we just hope/assume this is gcc and not c89 (= MSVC++) + # we just hope/assume this is gcc and not c89 (= MSVC++ or ICC) with_gnu_ld=yes ;; openbsd* | bitrig*) @@ -5062,7 +5088,7 @@ dnl Note also adjust exclude_expsyms for C++ above. _LT_TAGVAR(whole_archive_flag_spec, $1)= fi supports_anon_versioning=no - case `$LD -v | $SED -e 's/([^)]\+)\s\+//' 2>&1` in + case `$LD -v | $SED -e 's/([[^)]]\+)\s\+//' 2>&1` in *GNU\ gold*) supports_anon_versioning=yes ;; *\ [[01]].* | *\ 2.[[0-9]].* | *\ 2.10.*) ;; # catch versions < 2.11 *\ 2.11.93.0.2\ *) supports_anon_versioning=yes ;; # RH7.3 ... @@ -5174,6 +5200,7 @@ _LT_EOF emximp -o $lib $output_objdir/$libname.def' _LT_TAGVAR(old_archive_From_new_cmds, $1)='emximp -o $output_objdir/${libname}_dll.a $output_objdir/$libname.def' _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes + _LT_TAGVAR(file_list_spec, $1)='@' ;; interix[[3-9]]*) @@ -5188,7 +5215,7 @@ _LT_EOF # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link # time. Moving up from 0x10000000 also allows more sbrk(2) space. _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' - _LT_TAGVAR(archive_expsym_cmds, $1)='sed "s|^|_|" $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--retain-symbols-file,$output_objdir/$soname.expsym $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$SED "s|^|_|" $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--retain-symbols-file,$output_objdir/$soname.expsym $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' ;; gnu* | linux* | tpf* | k*bsd*-gnu | kopensolaris*-gnu) @@ -5231,7 +5258,7 @@ _LT_EOF _LT_TAGVAR(compiler_needs_object, $1)=yes ;; esac - case `$CC -V 2>&1 | sed 5q` in + case `$CC -V 2>&1 | $SED 5q` in *Sun\ C*) # Sun C 5.9 _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' _LT_TAGVAR(compiler_needs_object, $1)=yes @@ -5243,13 +5270,14 @@ _LT_EOF if test yes = "$supports_anon_versioning"; then _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $output_objdir/$libname.ver~ - cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ + cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ echo "local: *; };" >> $output_objdir/$libname.ver~ $CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-version-script $wl$output_objdir/$libname.ver -o $lib' fi case $cc_basename in tcc*) + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' _LT_TAGVAR(export_dynamic_flag_spec, $1)='-rdynamic' ;; xlf* | bgf* | bgxlf* | mpixlf*) @@ -5259,7 +5287,7 @@ _LT_EOF _LT_TAGVAR(archive_cmds, $1)='$LD -shared $libobjs $deplibs $linker_flags -soname $soname -o $lib' if test yes = "$supports_anon_versioning"; then _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $output_objdir/$libname.ver~ - cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ + cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ echo "local: *; };" >> $output_objdir/$libname.ver~ $LD -shared $libobjs $deplibs $linker_flags -soname $soname -version-script $output_objdir/$libname.ver -o $lib' fi @@ -5391,7 +5419,7 @@ _LT_EOF if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then _LT_TAGVAR(export_symbols_cmds, $1)='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && ([substr](\$ 3,1,1) != ".")) { if (\$ 2 == "W") { print \$ 3 " weak" } else { print \$ 3 } } }'\'' | sort -u > $export_symbols' else - _LT_TAGVAR(export_symbols_cmds, $1)='`func_echo_all $NM | $SED -e '\''s/B\([[^B]]*\)$/P\1/'\''` -PCpgl $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) && ([substr](\$ 1,1,1) != ".")) { if ((\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) { print \$ 1 " weak" } else { print \$ 1 } } }'\'' | sort -u > $export_symbols' + _LT_TAGVAR(export_symbols_cmds, $1)='`func_echo_all $NM | $SED -e '\''s/B\([[^B]]*\)$/P\1/'\''` -PCpgl $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "L") || (\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) && ([substr](\$ 1,1,1) != ".")) { if ((\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) { print \$ 1 " weak" } else { print \$ 1 } } }'\'' | sort -u > $export_symbols' fi aix_use_runtimelinking=no @@ -5574,12 +5602,12 @@ _LT_EOF cygwin* | mingw* | pw32* | cegcc*) # When not using gcc, we currently assume that we are using - # Microsoft Visual C++. + # Microsoft Visual C++ or Intel C++ Compiler. # hardcode_libdir_flag_spec is actually meaningless, as there is # no search path for DLLs. case $cc_basename in - cl*) - # Native MSVC + cl* | icl*) + # Native MSVC or ICC _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=' ' _LT_TAGVAR(allow_undefined_flag, $1)=unsupported _LT_TAGVAR(always_export_symbols, $1)=yes @@ -5620,7 +5648,7 @@ _LT_EOF fi' ;; *) - # Assume MSVC wrapper + # Assume MSVC and ICC wrapper _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=' ' _LT_TAGVAR(allow_undefined_flag, $1)=unsupported # Tell ltmain to make .lib files, not .a files. @@ -5668,7 +5696,7 @@ _LT_EOF ;; # FreeBSD 3 and greater uses gcc -shared to do shared libraries. - freebsd* | dragonfly*) + freebsd* | dragonfly* | midnightbsd*) _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' _LT_TAGVAR(hardcode_direct, $1)=yes @@ -5809,6 +5837,7 @@ _LT_EOF # Fabrice Bellard et al's Tiny C Compiler _LT_TAGVAR(ld_shlibs, $1)=yes _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' ;; esac ;; @@ -5880,6 +5909,7 @@ _LT_EOF emximp -o $lib $output_objdir/$libname.def' _LT_TAGVAR(old_archive_From_new_cmds, $1)='emximp -o $output_objdir/${libname}_dll.a $output_objdir/$libname.def' _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes + _LT_TAGVAR(file_list_spec, $1)='@' ;; osf3*) @@ -6439,7 +6469,7 @@ if test yes != "$_lt_caught_CXX_error"; then # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. - output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"' + output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP " \-L"' else GXX=no @@ -6650,8 +6680,8 @@ if test yes != "$_lt_caught_CXX_error"; then cygwin* | mingw* | pw32* | cegcc*) case $GXX,$cc_basename in - ,cl* | no,cl*) - # Native MSVC + ,cl* | no,cl* | ,icl* | no,icl*) + # Native MSVC or ICC # hardcode_libdir_flag_spec is actually meaningless, as there is # no search path for DLLs. _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=' ' @@ -6749,6 +6779,7 @@ if test yes != "$_lt_caught_CXX_error"; then emximp -o $lib $output_objdir/$libname.def' _LT_TAGVAR(old_archive_From_new_cmds, $1)='emximp -o $output_objdir/${libname}_dll.a $output_objdir/$libname.def' _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes + _LT_TAGVAR(file_list_spec, $1)='@' ;; dgux*) @@ -6779,7 +6810,7 @@ if test yes != "$_lt_caught_CXX_error"; then _LT_TAGVAR(archive_cmds_need_lc, $1)=no ;; - freebsd* | dragonfly*) + freebsd* | dragonfly* | midnightbsd*) # FreeBSD 3 and later use GNU C++ and GNU ld with standard ELF # conventions _LT_TAGVAR(ld_shlibs, $1)=yes @@ -6814,7 +6845,7 @@ if test yes != "$_lt_caught_CXX_error"; then # explicitly linking system object files so we need to strip them # from the output so that they don't get included in the library # dependencies. - output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | $EGREP "\-L"`; list= ; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' + output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | $EGREP " \-L"`; list= ; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' ;; *) if test yes = "$GXX"; then @@ -6879,7 +6910,7 @@ if test yes != "$_lt_caught_CXX_error"; then # explicitly linking system object files so we need to strip them # from the output so that they don't get included in the library # dependencies. - output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | $GREP "\-L"`; list= ; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' + output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | $GREP " \-L"`; list= ; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' ;; *) if test yes = "$GXX"; then @@ -6916,7 +6947,7 @@ if test yes != "$_lt_caught_CXX_error"; then # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link # time. Moving up from 0x10000000 also allows more sbrk(2) space. _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' - _LT_TAGVAR(archive_expsym_cmds, $1)='sed "s|^|_|" $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--retain-symbols-file,$output_objdir/$soname.expsym $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$SED "s|^|_|" $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--retain-symbols-file,$output_objdir/$soname.expsym $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' ;; irix5* | irix6*) case $cc_basename in @@ -7056,13 +7087,13 @@ if test yes != "$_lt_caught_CXX_error"; then _LT_TAGVAR(archive_cmds, $1)='$CC -qmkshrobj $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' if test yes = "$supports_anon_versioning"; then _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $output_objdir/$libname.ver~ - cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ + cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ echo "local: *; };" >> $output_objdir/$libname.ver~ $CC -qmkshrobj $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-version-script $wl$output_objdir/$libname.ver -o $lib' fi ;; *) - case `$CC -V 2>&1 | sed 5q` in + case `$CC -V 2>&1 | $SED 5q` in *Sun\ C*) # Sun C++ 5.9 _LT_TAGVAR(no_undefined_flag, $1)=' -zdefs' @@ -7218,7 +7249,7 @@ if test yes != "$_lt_caught_CXX_error"; then # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. - output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"' + output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP " \-L"' else # FIXME: insert proper C++ library support @@ -7302,7 +7333,7 @@ if test yes != "$_lt_caught_CXX_error"; then # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. - output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"' + output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP " \-L"' else # g++ 2.7 appears to require '-G' NOT '-shared' on this # platform. @@ -7313,7 +7344,7 @@ if test yes != "$_lt_caught_CXX_error"; then # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. - output_verbose_link_cmd='$CC -G $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"' + output_verbose_link_cmd='$CC -G $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP " \-L"' fi _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-R $wl$libdir' @@ -8208,6 +8239,14 @@ _LT_DECL([], [DLLTOOL], [1], [DLL creation program]) AC_SUBST([DLLTOOL]) ]) +# _LT_DECL_FILECMD +# ---------------- +# Check for a file(cmd) program that can be used to detect file type and magic +m4_defun([_LT_DECL_FILECMD], +[AC_CHECK_TOOL([FILECMD], [file], [:]) +_LT_DECL([], [FILECMD], [1], [A file(cmd) program that detects file types]) +])# _LD_DECL_FILECMD + # _LT_DECL_SED # ------------ # Check for a fully-functional sed program, that truncates diff --git a/libpcre/m4/ltoptions.m4 b/libpcre/m4/ltoptions.m4 index 94b082976..b0b5e9c21 100644 --- a/libpcre/m4/ltoptions.m4 +++ b/libpcre/m4/ltoptions.m4 @@ -1,7 +1,7 @@ # Helper functions for option handling. -*- Autoconf -*- # -# Copyright (C) 2004-2005, 2007-2009, 2011-2015 Free Software -# Foundation, Inc. +# Copyright (C) 2004-2005, 2007-2009, 2011-2019, 2021-2022 Free +# Software Foundation, Inc. # Written by Gary V. Vaughan, 2004 # # This file is free software; the Free Software Foundation gives diff --git a/libpcre/m4/ltsugar.m4 b/libpcre/m4/ltsugar.m4 index 48bc9344a..902508bd9 100644 --- a/libpcre/m4/ltsugar.m4 +++ b/libpcre/m4/ltsugar.m4 @@ -1,6 +1,6 @@ # ltsugar.m4 -- libtool m4 base layer. -*-Autoconf-*- # -# Copyright (C) 2004-2005, 2007-2008, 2011-2015 Free Software +# Copyright (C) 2004-2005, 2007-2008, 2011-2019, 2021-2022 Free Software # Foundation, Inc. # Written by Gary V. Vaughan, 2004 # diff --git a/libpcre/m4/ltversion.m4 b/libpcre/m4/ltversion.m4 index fa04b52a3..b155d0ace 100644 --- a/libpcre/m4/ltversion.m4 +++ b/libpcre/m4/ltversion.m4 @@ -1,6 +1,7 @@ # ltversion.m4 -- version numbers -*- Autoconf -*- # -# Copyright (C) 2004, 2011-2015 Free Software Foundation, Inc. +# Copyright (C) 2004, 2011-2019, 2021-2022 Free Software Foundation, +# Inc. # Written by Scott James Remnant, 2004 # # This file is free software; the Free Software Foundation gives @@ -9,15 +10,15 @@ # @configure_input@ -# serial 4179 ltversion.m4 +# serial 4245 ltversion.m4 # This file is part of GNU Libtool -m4_define([LT_PACKAGE_VERSION], [2.4.6]) -m4_define([LT_PACKAGE_REVISION], [2.4.6]) +m4_define([LT_PACKAGE_VERSION], [2.4.7]) +m4_define([LT_PACKAGE_REVISION], [2.4.7]) AC_DEFUN([LTVERSION_VERSION], -[macro_version='2.4.6' -macro_revision='2.4.6' +[macro_version='2.4.7' +macro_revision='2.4.7' _LT_DECL(, macro_version, 0, [Which release of libtool.m4 was used?]) _LT_DECL(, macro_revision, 0) ]) diff --git a/libpcre/m4/lt~obsolete.m4 b/libpcre/m4/lt~obsolete.m4 index c6b26f88f..0f7a8759d 100644 --- a/libpcre/m4/lt~obsolete.m4 +++ b/libpcre/m4/lt~obsolete.m4 @@ -1,7 +1,7 @@ # lt~obsolete.m4 -- aclocal satisfying obsolete definitions. -*-Autoconf-*- # -# Copyright (C) 2004-2005, 2007, 2009, 2011-2015 Free Software -# Foundation, Inc. +# Copyright (C) 2004-2005, 2007, 2009, 2011-2019, 2021-2022 Free +# Software Foundation, Inc. # Written by Scott James Remnant, 2004. # # This file is free software; the Free Software Foundation gives diff --git a/libpcre/m4/pcre2_visibility.m4 b/libpcre/m4/pcre2_visibility.m4 index ae00de06e..03f4fba6e 100644 --- a/libpcre/m4/pcre2_visibility.m4 +++ b/libpcre/m4/pcre2_visibility.m4 @@ -4,63 +4,63 @@ dnl This file is free software; the Free Software Foundation dnl gives unlimited permission to copy and/or distribute it, dnl with or without modifications, as long as this notice is preserved. -dnl From Bruno Haible. +dnl Originally From Bruno Haible. dnl Tests whether the compiler supports the command-line option -dnl -fvisibility=hidden and the function and variable attributes -dnl __attribute__((__visibility__("hidden"))) and +dnl -fvisibility=hidden and the function attribute dnl __attribute__((__visibility__("default"))). -dnl Does *not* test for __visibility__("protected") - which has tricky -dnl semantics (see the 'vismain' test in glibc) and does not exist e.g. on -dnl MacOS X. -dnl Does *not* test for __visibility__("internal") - which has processor -dnl dependent semantics. -dnl Does *not* test for #pragma GCC visibility push(hidden) - which is -dnl "really only recommended for legacy code". -dnl Set the variable CFLAG_VISIBILITY. +dnl +dnl Set the variable VISIBILITY_CFLAGS. dnl Defines and sets the variable HAVE_VISIBILITY. +dnl Defines and sets the variable WORKING_WERROR. dnl Modified to fit with PCRE build environment by Cristian Rodríguez. -dnl Adjusted for PCRE2 by PH +dnl Adjusted for PCRE2 by PH. +dnl Refactored to work with non GCC (but compatible) compilers. AC_DEFUN([PCRE2_VISIBILITY], [ AC_REQUIRE([AC_PROG_CC]) VISIBILITY_CFLAGS= - VISIBILITY_CXXFLAGS= HAVE_VISIBILITY=0 - if test -n "$GCC"; then - dnl First, check whether -Werror can be added to the command line, or - dnl whether it leads to an error because of some other option that the - dnl user has put into $CC $CFLAGS $CPPFLAGS. - AC_MSG_CHECKING([whether the -Werror option is usable]) - AC_CACHE_VAL([pcre2_cv_cc_vis_werror], [ - pcre2_save_CFLAGS="$CFLAGS" - CFLAGS="$CFLAGS -Werror" - AC_COMPILE_IFELSE( - [AC_LANG_PROGRAM([[]], [[]])], - [pcre2_cv_cc_vis_werror=yes], - [pcre2_cv_cc_vis_werror=no]) - CFLAGS="$pcre2_save_CFLAGS"]) - AC_MSG_RESULT([$pcre2_cv_cc_vis_werror]) - dnl Now check whether visibility declarations are supported. - AC_MSG_CHECKING([for simple visibility declarations]) + dnl First, check whether -Werror can be added to the command line, or + dnl whether it leads to an error because of some other option that the + dnl user has put into $CC $CFLAGS $CPPFLAGS. + AC_MSG_CHECKING([whether the -Werror option is usable]) + AC_CACHE_VAL([pcre2_cv_cc_vis_werror], [ + pcre2_save_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS -Werror" + pcre2_cv_cc_vis_werror=no + AC_COMPILE_IFELSE( + [AC_LANG_PROGRAM([[]], [[]])], + [ + AC_COMPILE_IFELSE( + [AC_LANG_PROGRAM([[]], [[ #warning e ]])], + [], [pcre2_cv_cc_vis_werror=yes] + ) + ], []) + CFLAGS="$pcre2_save_CFLAGS"]) + AC_MSG_RESULT([$pcre2_cv_cc_vis_werror]) + if test -n "$pcre2_cv_cc_vis_werror" && test $pcre2_cv_cc_vis_werror = yes + then + WORKING_WERROR=1 + else + WORKING_WERROR=0 + fi + if test $pcre2_cv_cc_vis_werror = yes; then + dnl Now check whether GCC compatible visibility declarations are supported. + AC_MSG_CHECKING([for GCC compatible visibility declarations]) AC_CACHE_VAL([pcre2_cv_cc_visibility], [ pcre2_save_CFLAGS="$CFLAGS" - CFLAGS="$CFLAGS -fvisibility=hidden" + CFLAGS="$CFLAGS -Werror -fvisibility=hidden" dnl We use the option -Werror and a function dummyfunc, because on some dnl platforms (Cygwin 1.7) the use of -fvisibility triggers a warning dnl "visibility attribute not supported in this configuration; ignored" dnl at the first function definition in every compilation unit, and we dnl don't want to use the option in this case. - if test $pcre2_cv_cc_vis_werror = yes; then - CFLAGS="$CFLAGS -Werror" - fi AC_COMPILE_IFELSE( [AC_LANG_PROGRAM( - [[extern __attribute__((__visibility__("hidden"))) int hiddenvar; - extern __attribute__((__visibility__("default"))) int exportedvar; - extern __attribute__((__visibility__("hidden"))) int hiddenfunc (void); + [[extern __attribute__((__visibility__("hidden"))) int hiddenfunc (void); extern __attribute__((__visibility__("default"))) int exportedfunc (void); void dummyfunc (void) {} ]], @@ -68,19 +68,18 @@ AC_DEFUN([PCRE2_VISIBILITY], [pcre2_cv_cc_visibility=yes], [pcre2_cv_cc_visibility=no]) CFLAGS="$pcre2_save_CFLAGS"]) - AC_MSG_RESULT([$pcre2_cv_cc_visibility]) - if test $pcre2_cv_cc_visibility = yes; then - VISIBILITY_CFLAGS="-fvisibility=hidden" - VISIBILITY_CXXFLAGS="-fvisibility=hidden -fvisibility-inlines-hidden" - HAVE_VISIBILITY=1 - AC_DEFINE(PCRE2_EXPORT, [__attribute__ ((visibility ("default")))], [to make a symbol visible]) - else - AC_DEFINE(PCRE2_EXPORT, [], [to make a symbol visible]) - fi + AC_MSG_RESULT([$pcre2_cv_cc_visibility]) + fi + if test -n "$pcre2_cv_cc_visibility" && test $pcre2_cv_cc_visibility = yes + then + VISIBILITY_CFLAGS="-fvisibility=hidden" + HAVE_VISIBILITY=1 + AC_DEFINE(PCRE2_EXPORT, [__attribute__ ((visibility ("default")))], [to make a symbol visible]) + else + AC_DEFINE(PCRE2_EXPORT, [], [to make a symbol visible]) fi AC_SUBST([VISIBILITY_CFLAGS]) - AC_SUBST([VISIBILITY_CXXFLAGS]) AC_SUBST([HAVE_VISIBILITY]) AC_DEFINE_UNQUOTED([HAVE_VISIBILITY], [$HAVE_VISIBILITY], - [Define to 1 if the compiler supports simple visibility declarations.]) + [Define to 1 if the compiler supports GCC compatible visibility declarations.]) ]) diff --git a/libpcre/src/config.h.generic b/libpcre/src/config.h.generic index e8779b55c..380a08906 100644 --- a/libpcre/src/config.h.generic +++ b/libpcre/src/config.h.generic @@ -52,15 +52,24 @@ sure both macros are undefined; an emulation function will then be used. */ LF does in an ASCII/Unicode environment. */ /* #undef EBCDIC_NL25 */ +/* Define to 1 if you have the header file. */ +/* #undef HAVE_ASSERT_H */ + /* Define this if your compiler supports __attribute__((uninitialized)) */ /* #undef HAVE_ATTRIBUTE_UNINITIALIZED */ -/* Define to 1 if you have the 'bcopy' function. */ +/* Define to 1 if you have the `bcopy' function. */ /* #undef HAVE_BCOPY */ +/* Define this if your compiler provides __assume() */ +/* #undef HAVE_BUILTIN_ASSUME */ + /* Define this if your compiler provides __builtin_mul_overflow() */ /* #undef HAVE_BUILTIN_MUL_OVERFLOW */ +/* Define this if your compiler provides __builtin_unreachable() */ +/* #undef HAVE_BUILTIN_UNREACHABLE */ + /* Define to 1 if you have the header file. */ /* #undef HAVE_BZLIB_H */ @@ -82,16 +91,16 @@ sure both macros are undefined; an emulation function will then be used. */ /* Define to 1 if you have the header file. */ /* #undef HAVE_LIMITS_H */ -/* Define to 1 if you have the 'memfd_create' function. */ +/* Define to 1 if you have the `memfd_create' function. */ /* #undef HAVE_MEMFD_CREATE */ -/* Define to 1 if you have the 'memmove' function. */ +/* Define to 1 if you have the `memmove' function. */ /* #undef HAVE_MEMMOVE */ /* Define to 1 if you have the header file. */ /* #undef HAVE_MINIX_CONFIG_H */ -/* Define to 1 if you have the 'mkostemp' function. */ +/* Define to 1 if you have the `mkostemp' function. */ /* #undef HAVE_MKOSTEMP */ /* Define if you have POSIX threads libraries and header files. */ @@ -112,7 +121,7 @@ sure both macros are undefined; an emulation function will then be used. */ /* Define to 1 if you have the `realpath' function. */ /* #undef HAVE_REALPATH */ -/* Define to 1 if you have the 'secure_getenv' function. */ +/* Define to 1 if you have the `secure_getenv' function. */ /* #undef HAVE_SECURE_GETENV */ /* Define to 1 if you have the header file. */ @@ -124,7 +133,7 @@ sure both macros are undefined; an emulation function will then be used. */ /* Define to 1 if you have the header file. */ /* #undef HAVE_STDLIB_H */ -/* Define to 1 if you have the 'strerror' function. */ +/* Define to 1 if you have the `strerror' function. */ /* #undef HAVE_STRERROR */ /* Define to 1 if you have the header file. */ @@ -145,7 +154,8 @@ sure both macros are undefined; an emulation function will then be used. */ /* Define to 1 if you have the header file. */ /* #undef HAVE_UNISTD_H */ -/* Define to 1 if the compiler supports simple visibility declarations. */ +/* Define to 1 if the compiler supports GCC compatible visibility + declarations. */ /* #undef HAVE_VISIBILITY */ /* Define to 1 if you have the header file. */ @@ -215,7 +225,7 @@ sure both macros are undefined; an emulation function will then be used. */ Care must be taken if it is increased, because it guards against integer overflow caused by enormously large patterns. */ #ifndef MAX_NAME_SIZE -#define MAX_NAME_SIZE 32 +#define MAX_NAME_SIZE 128 #endif /* The value of MAX_VARLOOKBEHIND specifies the default maximum length, in @@ -245,7 +255,7 @@ sure both macros are undefined; an emulation function will then be used. */ #define PACKAGE_NAME "PCRE2" /* Define to the full name and version of this package. */ -#define PACKAGE_STRING "PCRE2 10.43" +#define PACKAGE_STRING "PCRE2 10.45" /* Define to the one symbol short name of this package. */ #define PACKAGE_TARNAME "pcre2" @@ -254,7 +264,7 @@ sure both macros are undefined; an emulation function will then be used. */ #define PACKAGE_URL "" /* Define to the version of this package. */ -#define PACKAGE_VERSION "10.43" +#define PACKAGE_VERSION "10.45" /* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested parentheses (of any kind) in a pattern. This limits the amount of system @@ -311,7 +321,7 @@ sure both macros are undefined; an emulation function will then be used. */ unless SUPPORT_JIT is also defined. */ /* #undef SLJIT_PROT_EXECUTABLE_ALLOCATOR */ -/* Define to 1 if all of the C89 standard headers exist (not just the ones +/* Define to 1 if all of the C90 standard headers exist (not just the ones required in a freestanding environment). This macro is provided for backward compatibility; new code need not use it. */ /* #undef STDC_HEADERS */ @@ -366,7 +376,7 @@ sure both macros are undefined; an emulation function will then be used. */ /* Define to any value for valgrind support to find invalid memory reads. */ /* #undef SUPPORT_VALGRIND */ -/* Enable extensions on AIX, Interix, z/OS. */ +/* Enable extensions on AIX 3, Interix. */ #ifndef _ALL_SOURCE # define _ALL_SOURCE 1 #endif @@ -427,15 +437,11 @@ sure both macros are undefined; an emulation function will then be used. */ #ifndef __STDC_WANT_IEC_60559_DFP_EXT__ # define __STDC_WANT_IEC_60559_DFP_EXT__ 1 #endif -/* Enable extensions specified by C23 Annex F. */ -#ifndef __STDC_WANT_IEC_60559_EXT__ -# define __STDC_WANT_IEC_60559_EXT__ 1 -#endif /* Enable extensions specified by ISO/IEC TS 18661-4:2015. */ #ifndef __STDC_WANT_IEC_60559_FUNCS_EXT__ # define __STDC_WANT_IEC_60559_FUNCS_EXT__ 1 #endif -/* Enable extensions specified by C23 Annex H and ISO/IEC TS 18661-3:2015. */ +/* Enable extensions specified by ISO/IEC TS 18661-3:2015. */ #ifndef __STDC_WANT_IEC_60559_TYPES_EXT__ # define __STDC_WANT_IEC_60559_TYPES_EXT__ 1 #endif @@ -458,26 +464,20 @@ sure both macros are undefined; an emulation function will then be used. */ #endif /* Version number of package */ -#define VERSION "10.43" +#define VERSION "10.45" /* Number of bits in a file offset, on hosts where this is settable. */ /* #undef _FILE_OFFSET_BITS */ -/* Define to 1 on platforms where this makes off_t a 64-bit type. */ +/* Define for large files, on AIX-style hosts. */ /* #undef _LARGE_FILES */ -/* Number of bits in time_t, on hosts where this is settable. */ -/* #undef _TIME_BITS */ - -/* Define to 1 on platforms where this makes time_t a 64-bit type. */ -/* #undef __MINGW_USE_VC2005_COMPAT */ - -/* Define to empty if 'const' does not conform to ANSI C. */ +/* Define to empty if `const' does not conform to ANSI C. */ /* #undef const */ /* Define to the type of a signed integer type of width exactly 64 bits if such a type exists and the standard includes do not define it. */ /* #undef int64_t */ -/* Define as 'unsigned int' if doesn't define. */ +/* Define to `unsigned int' if does not define. */ /* #undef size_t */ diff --git a/libpcre/src/config.h.in b/libpcre/src/config.h.in index 6c0e1ae5d..be29681c1 100644 --- a/libpcre/src/config.h.in +++ b/libpcre/src/config.h.in @@ -52,21 +52,39 @@ sure both macros are undefined; an emulation function will then be used. */ LF does in an ASCII/Unicode environment. */ #undef EBCDIC_NL25 +/* Define to 1 if you have the header file. */ +#undef HAVE_ASSERT_H + /* Define this if your compiler supports __attribute__((uninitialized)) */ #undef HAVE_ATTRIBUTE_UNINITIALIZED /* Define to 1 if you have the `bcopy' function. */ #undef HAVE_BCOPY +/* Define this if your compiler provides __assume() */ +#undef HAVE_BUILTIN_ASSUME + /* Define this if your compiler provides __builtin_mul_overflow() */ #undef HAVE_BUILTIN_MUL_OVERFLOW +/* Define this if your compiler provides __builtin_unreachable() */ +#undef HAVE_BUILTIN_UNREACHABLE + +/* Define to 1 if you have the header file. */ +#undef HAVE_BZLIB_H + /* Define to 1 if you have the header file. */ #undef HAVE_DIRENT_H /* Define to 1 if you have the header file. */ #undef HAVE_DLFCN_H +/* Define to 1 if you have the header file. */ +#undef HAVE_EDITLINE_READLINE_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_EDIT_READLINE_READLINE_H + /* Define to 1 if you have the header file. */ #undef HAVE_INTTYPES_H @@ -85,6 +103,21 @@ sure both macros are undefined; an emulation function will then be used. */ /* Define to 1 if you have the `mkostemp' function. */ #undef HAVE_MKOSTEMP +/* Define if you have POSIX threads libraries and header files. */ +#undef HAVE_PTHREAD + +/* Have PTHREAD_PRIO_INHERIT. */ +#undef HAVE_PTHREAD_PRIO_INHERIT + +/* Define to 1 if you have the header file. */ +#undef HAVE_READLINE_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_READLINE_HISTORY_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_READLINE_READLINE_H + /* Define to 1 if you have the `realpath' function. */ #undef HAVE_REALPATH @@ -121,7 +154,8 @@ sure both macros are undefined; an emulation function will then be used. */ /* Define to 1 if you have the header file. */ #undef HAVE_UNISTD_H -/* Define to 1 if the compiler supports simple visibility declarations. */ +/* Define to 1 if the compiler supports GCC compatible visibility + declarations. */ #undef HAVE_VISIBILITY /* Define to 1 if you have the header file. */ @@ -130,6 +164,9 @@ sure both macros are undefined; an emulation function will then be used. */ /* Define to 1 if you have the header file. */ #undef HAVE_WINDOWS_H +/* Define to 1 if you have the header file. */ +#undef HAVE_ZLIB_H + /* This limits the amount of memory that may be used while matching a pattern. It applies to both pcre2_match() and pcre2_dfa_match(). It does not apply to JIT matching. The value is in kibibytes (units of 1024 bytes). */ @@ -215,6 +252,20 @@ sure both macros are undefined; an emulation function will then be used. */ stack that is used while compiling a pattern. */ #undef PARENS_NEST_LIMIT +/* The value of PCRE2GREP_BUFSIZE is the starting size of the buffer used by + pcre2grep to hold parts of the file it is searching. The buffer will be + expanded up to PCRE2GREP_MAX_BUFSIZE if necessary, for files containing + very long lines. The actual amount of memory used by pcre2grep is three + times this number, because it allows for the buffering of "before" and + "after" lines. */ +#undef PCRE2GREP_BUFSIZE + +/* The value of PCRE2GREP_MAX_BUFSIZE specifies the maximum size of the buffer + used by pcre2grep to hold parts of the file it is searching. The actual + amount of memory used by pcre2grep is three times this number, because it + allows for the buffering of "before" and "after" lines. */ +#undef PCRE2GREP_MAX_BUFSIZE + /* Define to any value to include debugging code. */ #undef PCRE2_DEBUG @@ -237,11 +288,52 @@ sure both macros are undefined; an emulation function will then be used. */ /* Define to any value if linking statically (TODO: make nice with Libtool) */ #undef PCRE2_STATIC +/* Define to necessary symbol if this constant uses a non-standard name on + your system. */ +#undef PTHREAD_CREATE_JOINABLE + +/* Define to any non-zero number to enable support for SELinux compatible + executable memory allocator in JIT. Note that this will have no effect + unless SUPPORT_JIT is also defined. */ +#undef SLJIT_PROT_EXECUTABLE_ALLOCATOR + /* Define to 1 if all of the C90 standard headers exist (not just the ones required in a freestanding environment). This macro is provided for backward compatibility; new code need not use it. */ #undef STDC_HEADERS +/* Define to any value to enable differential fuzzing support. */ +#undef SUPPORT_DIFF_FUZZ + +/* Define to any value to enable support for Just-In-Time compiling. */ +#undef SUPPORT_JIT + +/* Define to any value to allow pcre2grep to be linked with libbz2, so that it + is able to handle .bz2 files. */ +#undef SUPPORT_LIBBZ2 + +/* Define to any value to allow pcre2test to be linked with libedit. */ +#undef SUPPORT_LIBEDIT + +/* Define to any value to allow pcre2test to be linked with libreadline. */ +#undef SUPPORT_LIBREADLINE + +/* Define to any value to allow pcre2grep to be linked with libz, so that it + is able to handle .gz files. */ +#undef SUPPORT_LIBZ + +/* Define to any value to enable callout script support in pcre2grep. */ +#undef SUPPORT_PCRE2GREP_CALLOUT + +/* Define to any value to enable fork support in pcre2grep callout scripts. + This will have no effect unless SUPPORT_PCRE2GREP_CALLOUT is also defined. + */ +#undef SUPPORT_PCRE2GREP_CALLOUT_FORK + +/* Define to any value to enable JIT support in pcre2grep. Note that this will + have no effect unless SUPPORT_JIT is also defined. */ +#undef SUPPORT_PCRE2GREP_JIT + /* Define to any value to enable the 16 bit PCRE2 library. */ #undef SUPPORT_PCRE2_16 diff --git a/libpcre/src/pcre2.h.generic b/libpcre/src/pcre2.h.generic index d7a8ff520..061f3db0a 100644 --- a/libpcre/src/pcre2.h.generic +++ b/libpcre/src/pcre2.h.generic @@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE. /* The current PCRE version information. */ #define PCRE2_MAJOR 10 -#define PCRE2_MINOR 43 +#define PCRE2_MINOR 45 #define PCRE2_PRERELEASE -#define PCRE2_DATE 2024-02-16 +#define PCRE2_DATE 2025-02-05 /* When an application links to a PCRE DLL in Windows, the symbols that are imported have to be identified as such. When building PCRE2, the appropriate @@ -143,6 +143,7 @@ D is inspected during pcre2_dfa_match() execution #define PCRE2_EXTENDED_MORE 0x01000000u /* C */ #define PCRE2_LITERAL 0x02000000u /* C */ #define PCRE2_MATCH_INVALID_UTF 0x04000000u /* J M D */ +#define PCRE2_ALT_EXTENDED_CLASS 0x08000000u /* C */ /* An additional compile options word is available in the compile context. */ @@ -159,6 +160,10 @@ D is inspected during pcre2_dfa_match() execution #define PCRE2_EXTRA_ASCII_BSW 0x00000400u /* C */ #define PCRE2_EXTRA_ASCII_POSIX 0x00000800u /* C */ #define PCRE2_EXTRA_ASCII_DIGIT 0x00001000u /* C */ +#define PCRE2_EXTRA_PYTHON_OCTAL 0x00002000u /* C */ +#define PCRE2_EXTRA_NO_BS0 0x00004000u /* C */ +#define PCRE2_EXTRA_NEVER_CALLOUT 0x00008000u /* C */ +#define PCRE2_EXTRA_TURKISH_CASING 0x00010000u /* C */ /* These are for pcre2_jit_compile(). */ @@ -166,6 +171,7 @@ D is inspected during pcre2_dfa_match() execution #define PCRE2_JIT_PARTIAL_SOFT 0x00000002u #define PCRE2_JIT_PARTIAL_HARD 0x00000004u #define PCRE2_JIT_INVALID_UTF 0x00000100u +#define PCRE2_JIT_TEST_ALLOC 0x00000200u /* These are for pcre2_match(), pcre2_dfa_match(), pcre2_jit_match(), and pcre2_substitute(). Some are allowed only for one of the functions, and in @@ -318,9 +324,25 @@ pcre2_pattern_convert(). */ #define PCRE2_ERROR_ALPHA_ASSERTION_UNKNOWN 195 #define PCRE2_ERROR_SCRIPT_RUN_NOT_AVAILABLE 196 #define PCRE2_ERROR_TOO_MANY_CAPTURES 197 -#define PCRE2_ERROR_CONDITION_ATOMIC_ASSERTION_EXPECTED 198 +#define PCRE2_ERROR_MISSING_OCTAL_DIGIT 198 #define PCRE2_ERROR_BACKSLASH_K_IN_LOOKAROUND 199 - +#define PCRE2_ERROR_MAX_VAR_LOOKBEHIND_EXCEEDED 200 +#define PCRE2_ERROR_PATTERN_COMPILED_SIZE_TOO_BIG 201 +#define PCRE2_ERROR_OVERSIZE_PYTHON_OCTAL 202 +#define PCRE2_ERROR_CALLOUT_CALLER_DISABLED 203 +#define PCRE2_ERROR_EXTRA_CASING_REQUIRES_UNICODE 204 +#define PCRE2_ERROR_TURKISH_CASING_REQUIRES_UTF 205 +#define PCRE2_ERROR_EXTRA_CASING_INCOMPATIBLE 206 +#define PCRE2_ERROR_ECLASS_NEST_TOO_DEEP 207 +#define PCRE2_ERROR_ECLASS_INVALID_OPERATOR 208 +#define PCRE2_ERROR_ECLASS_UNEXPECTED_OPERATOR 209 +#define PCRE2_ERROR_ECLASS_EXPECTED_OPERAND 210 +#define PCRE2_ERROR_ECLASS_MIXED_OPERATORS 211 +#define PCRE2_ERROR_ECLASS_HINT_SQUARE_BRACKET 212 +#define PCRE2_ERROR_PERL_ECLASS_UNEXPECTED_EXPR 213 +#define PCRE2_ERROR_PERL_ECLASS_EMPTY_EXPR 214 +#define PCRE2_ERROR_PERL_ECLASS_MISSING_CLOSE 215 +#define PCRE2_ERROR_PERL_ECLASS_UNEXPECTED_CHAR 216 /* "Expected" matching error codes: no match and partial match. */ @@ -407,6 +429,9 @@ released, the numbers must not be changed. */ #define PCRE2_ERROR_INTERNAL_DUPMATCH (-65) #define PCRE2_ERROR_DFA_UINVALID_UTF (-66) #define PCRE2_ERROR_INVALIDOFFSET (-67) +#define PCRE2_ERROR_JIT_UNSUPPORTED (-68) +#define PCRE2_ERROR_REPLACECASE (-69) +#define PCRE2_ERROR_TOOLARGEREPLACE (-70) /* Request types for pcre2_pattern_info() */ @@ -460,6 +485,30 @@ released, the numbers must not be changed. */ #define PCRE2_CONFIG_COMPILED_WIDTHS 14 #define PCRE2_CONFIG_TABLES_LENGTH 15 +/* Optimization directives for pcre2_set_optimize(). +For binary compatibility, only add to this list; do not renumber. */ + +#define PCRE2_OPTIMIZATION_NONE 0 +#define PCRE2_OPTIMIZATION_FULL 1 + +#define PCRE2_AUTO_POSSESS 64 +#define PCRE2_AUTO_POSSESS_OFF 65 +#define PCRE2_DOTSTAR_ANCHOR 66 +#define PCRE2_DOTSTAR_ANCHOR_OFF 67 +#define PCRE2_START_OPTIMIZE 68 +#define PCRE2_START_OPTIMIZE_OFF 69 + +/* Types used in pcre2_set_substitute_case_callout(). + +PCRE2_SUBSTITUTE_CASE_LOWER and PCRE2_SUBSTITUTE_CASE_UPPER are passed to the +callout to indicate that the case of the entire callout input should be +case-transformed. PCRE2_SUBSTITUTE_CASE_TITLE_FIRST is passed to indicate that +only the first character or glyph should be transformed to Unicode titlecase, +and the rest to lowercase. */ + +#define PCRE2_SUBSTITUTE_CASE_LOWER 1 +#define PCRE2_SUBSTITUTE_CASE_UPPER 2 +#define PCRE2_SUBSTITUTE_CASE_TITLE_FIRST 3 /* Types for code units in patterns and subject strings. */ @@ -603,6 +652,8 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_set_compile_extra_options(pcre2_compile_context *, uint32_t); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_set_max_pattern_length(pcre2_compile_context *, PCRE2_SIZE); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_max_pattern_compiled_length(pcre2_compile_context *, PCRE2_SIZE); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_set_max_varlookbehind(pcre2_compile_context *, uint32_t); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ @@ -611,7 +662,9 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_set_parens_nest_limit(pcre2_compile_context *, uint32_t); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_set_compile_recursion_guard(pcre2_compile_context *, \ - int (*)(uint32_t, void *), void *); + int (*)(uint32_t, void *), void *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_optimize(pcre2_compile_context *, uint32_t); #define PCRE2_MATCH_CONTEXT_FUNCTIONS \ PCRE2_EXP_DECL pcre2_match_context *PCRE2_CALL_CONVENTION \ @@ -626,6 +679,11 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_set_substitute_callout(pcre2_match_context *, \ int (*)(pcre2_substitute_callout_block *, void *), void *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_substitute_case_callout(pcre2_match_context *, \ + PCRE2_SIZE (*)(PCRE2_SPTR, PCRE2_SIZE, PCRE2_UCHAR *, PCRE2_SIZE, int, \ + void *), \ + void *); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_set_depth_limit(pcre2_match_context *, uint32_t); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ @@ -738,6 +796,7 @@ PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_substring_list_get(pcre2_match_data *, PCRE2_UCHAR ***, PCRE2_SIZE **); + /* Functions for serializing / deserializing compiled patterns. */ #define PCRE2_SERIALIZE_FUNCTIONS \ @@ -901,10 +960,13 @@ pcre2_compile are called by application code. */ #define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_) #define pcre2_set_max_varlookbehind PCRE2_SUFFIX(pcre2_set_max_varlookbehind_) #define pcre2_set_max_pattern_length PCRE2_SUFFIX(pcre2_set_max_pattern_length_) +#define pcre2_set_max_pattern_compiled_length PCRE2_SUFFIX(pcre2_set_max_pattern_compiled_length_) #define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_) #define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_) #define pcre2_set_offset_limit PCRE2_SUFFIX(pcre2_set_offset_limit_) +#define pcre2_set_optimize PCRE2_SUFFIX(pcre2_set_optimize_) #define pcre2_set_substitute_callout PCRE2_SUFFIX(pcre2_set_substitute_callout_) +#define pcre2_set_substitute_case_callout PCRE2_SUFFIX(pcre2_set_substitute_case_callout_) #define pcre2_substitute PCRE2_SUFFIX(pcre2_substitute_) #define pcre2_substring_copy_byname PCRE2_SUFFIX(pcre2_substring_copy_byname_) #define pcre2_substring_copy_bynumber PCRE2_SUFFIX(pcre2_substring_copy_bynumber_) diff --git a/libpcre/src/pcre2.h.in b/libpcre/src/pcre2.h.in index 1e7e5eb19..ca3f0b413 100644 --- a/libpcre/src/pcre2.h.in +++ b/libpcre/src/pcre2.h.in @@ -143,6 +143,7 @@ D is inspected during pcre2_dfa_match() execution #define PCRE2_EXTENDED_MORE 0x01000000u /* C */ #define PCRE2_LITERAL 0x02000000u /* C */ #define PCRE2_MATCH_INVALID_UTF 0x04000000u /* J M D */ +#define PCRE2_ALT_EXTENDED_CLASS 0x08000000u /* C */ /* An additional compile options word is available in the compile context. */ @@ -159,6 +160,10 @@ D is inspected during pcre2_dfa_match() execution #define PCRE2_EXTRA_ASCII_BSW 0x00000400u /* C */ #define PCRE2_EXTRA_ASCII_POSIX 0x00000800u /* C */ #define PCRE2_EXTRA_ASCII_DIGIT 0x00001000u /* C */ +#define PCRE2_EXTRA_PYTHON_OCTAL 0x00002000u /* C */ +#define PCRE2_EXTRA_NO_BS0 0x00004000u /* C */ +#define PCRE2_EXTRA_NEVER_CALLOUT 0x00008000u /* C */ +#define PCRE2_EXTRA_TURKISH_CASING 0x00010000u /* C */ /* These are for pcre2_jit_compile(). */ @@ -166,6 +171,7 @@ D is inspected during pcre2_dfa_match() execution #define PCRE2_JIT_PARTIAL_SOFT 0x00000002u #define PCRE2_JIT_PARTIAL_HARD 0x00000004u #define PCRE2_JIT_INVALID_UTF 0x00000100u +#define PCRE2_JIT_TEST_ALLOC 0x00000200u /* These are for pcre2_match(), pcre2_dfa_match(), pcre2_jit_match(), and pcre2_substitute(). Some are allowed only for one of the functions, and in @@ -318,9 +324,25 @@ pcre2_pattern_convert(). */ #define PCRE2_ERROR_ALPHA_ASSERTION_UNKNOWN 195 #define PCRE2_ERROR_SCRIPT_RUN_NOT_AVAILABLE 196 #define PCRE2_ERROR_TOO_MANY_CAPTURES 197 -#define PCRE2_ERROR_CONDITION_ATOMIC_ASSERTION_EXPECTED 198 +#define PCRE2_ERROR_MISSING_OCTAL_DIGIT 198 #define PCRE2_ERROR_BACKSLASH_K_IN_LOOKAROUND 199 - +#define PCRE2_ERROR_MAX_VAR_LOOKBEHIND_EXCEEDED 200 +#define PCRE2_ERROR_PATTERN_COMPILED_SIZE_TOO_BIG 201 +#define PCRE2_ERROR_OVERSIZE_PYTHON_OCTAL 202 +#define PCRE2_ERROR_CALLOUT_CALLER_DISABLED 203 +#define PCRE2_ERROR_EXTRA_CASING_REQUIRES_UNICODE 204 +#define PCRE2_ERROR_TURKISH_CASING_REQUIRES_UTF 205 +#define PCRE2_ERROR_EXTRA_CASING_INCOMPATIBLE 206 +#define PCRE2_ERROR_ECLASS_NEST_TOO_DEEP 207 +#define PCRE2_ERROR_ECLASS_INVALID_OPERATOR 208 +#define PCRE2_ERROR_ECLASS_UNEXPECTED_OPERATOR 209 +#define PCRE2_ERROR_ECLASS_EXPECTED_OPERAND 210 +#define PCRE2_ERROR_ECLASS_MIXED_OPERATORS 211 +#define PCRE2_ERROR_ECLASS_HINT_SQUARE_BRACKET 212 +#define PCRE2_ERROR_PERL_ECLASS_UNEXPECTED_EXPR 213 +#define PCRE2_ERROR_PERL_ECLASS_EMPTY_EXPR 214 +#define PCRE2_ERROR_PERL_ECLASS_MISSING_CLOSE 215 +#define PCRE2_ERROR_PERL_ECLASS_UNEXPECTED_CHAR 216 /* "Expected" matching error codes: no match and partial match. */ @@ -407,6 +429,9 @@ released, the numbers must not be changed. */ #define PCRE2_ERROR_INTERNAL_DUPMATCH (-65) #define PCRE2_ERROR_DFA_UINVALID_UTF (-66) #define PCRE2_ERROR_INVALIDOFFSET (-67) +#define PCRE2_ERROR_JIT_UNSUPPORTED (-68) +#define PCRE2_ERROR_REPLACECASE (-69) +#define PCRE2_ERROR_TOOLARGEREPLACE (-70) /* Request types for pcre2_pattern_info() */ @@ -460,6 +485,30 @@ released, the numbers must not be changed. */ #define PCRE2_CONFIG_COMPILED_WIDTHS 14 #define PCRE2_CONFIG_TABLES_LENGTH 15 +/* Optimization directives for pcre2_set_optimize(). +For binary compatibility, only add to this list; do not renumber. */ + +#define PCRE2_OPTIMIZATION_NONE 0 +#define PCRE2_OPTIMIZATION_FULL 1 + +#define PCRE2_AUTO_POSSESS 64 +#define PCRE2_AUTO_POSSESS_OFF 65 +#define PCRE2_DOTSTAR_ANCHOR 66 +#define PCRE2_DOTSTAR_ANCHOR_OFF 67 +#define PCRE2_START_OPTIMIZE 68 +#define PCRE2_START_OPTIMIZE_OFF 69 + +/* Types used in pcre2_set_substitute_case_callout(). + +PCRE2_SUBSTITUTE_CASE_LOWER and PCRE2_SUBSTITUTE_CASE_UPPER are passed to the +callout to indicate that the case of the entire callout input should be +case-transformed. PCRE2_SUBSTITUTE_CASE_TITLE_FIRST is passed to indicate that +only the first character or glyph should be transformed to Unicode titlecase, +and the rest to lowercase. */ + +#define PCRE2_SUBSTITUTE_CASE_LOWER 1 +#define PCRE2_SUBSTITUTE_CASE_UPPER 2 +#define PCRE2_SUBSTITUTE_CASE_TITLE_FIRST 3 /* Types for code units in patterns and subject strings. */ @@ -603,6 +652,8 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_set_compile_extra_options(pcre2_compile_context *, uint32_t); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_set_max_pattern_length(pcre2_compile_context *, PCRE2_SIZE); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_max_pattern_compiled_length(pcre2_compile_context *, PCRE2_SIZE); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_set_max_varlookbehind(pcre2_compile_context *, uint32_t); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ @@ -611,7 +662,9 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_set_parens_nest_limit(pcre2_compile_context *, uint32_t); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_set_compile_recursion_guard(pcre2_compile_context *, \ - int (*)(uint32_t, void *), void *); + int (*)(uint32_t, void *), void *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_optimize(pcre2_compile_context *, uint32_t); #define PCRE2_MATCH_CONTEXT_FUNCTIONS \ PCRE2_EXP_DECL pcre2_match_context *PCRE2_CALL_CONVENTION \ @@ -626,6 +679,11 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_set_substitute_callout(pcre2_match_context *, \ int (*)(pcre2_substitute_callout_block *, void *), void *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_substitute_case_callout(pcre2_match_context *, \ + PCRE2_SIZE (*)(PCRE2_SPTR, PCRE2_SIZE, PCRE2_UCHAR *, PCRE2_SIZE, int, \ + void *), \ + void *); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_set_depth_limit(pcre2_match_context *, uint32_t); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ @@ -738,6 +796,7 @@ PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_substring_list_get(pcre2_match_data *, PCRE2_UCHAR ***, PCRE2_SIZE **); + /* Functions for serializing / deserializing compiled patterns. */ #define PCRE2_SERIALIZE_FUNCTIONS \ @@ -901,10 +960,13 @@ pcre2_compile are called by application code. */ #define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_) #define pcre2_set_max_varlookbehind PCRE2_SUFFIX(pcre2_set_max_varlookbehind_) #define pcre2_set_max_pattern_length PCRE2_SUFFIX(pcre2_set_max_pattern_length_) +#define pcre2_set_max_pattern_compiled_length PCRE2_SUFFIX(pcre2_set_max_pattern_compiled_length_) #define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_) #define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_) #define pcre2_set_offset_limit PCRE2_SUFFIX(pcre2_set_offset_limit_) +#define pcre2_set_optimize PCRE2_SUFFIX(pcre2_set_optimize_) #define pcre2_set_substitute_callout PCRE2_SUFFIX(pcre2_set_substitute_callout_) +#define pcre2_set_substitute_case_callout PCRE2_SUFFIX(pcre2_set_substitute_case_callout_) #define pcre2_substitute PCRE2_SUFFIX(pcre2_substitute_) #define pcre2_substring_copy_byname PCRE2_SUFFIX(pcre2_substring_copy_byname_) #define pcre2_substring_copy_bynumber PCRE2_SUFFIX(pcre2_substring_copy_bynumber_) diff --git a/libpcre/src/pcre2_auto_possess.c b/libpcre/src/pcre2_auto_possess.c index 210d13d37..6d7f27b69 100644 --- a/libpcre/src/pcre2_auto_possess.c +++ b/libpcre/src/pcre2_auto_possess.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2022 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -49,6 +49,10 @@ repeats into possessive repeats where possible. */ #include "pcre2_internal.h" +/* This macro represents the max size of list[] and that is used to keep +track of UCD info in several places, it should be kept on sync with the +value used by GenerateUcd.py */ +#define MAX_LIST 8 /************************************************* * Tables for auto-possessification * @@ -64,7 +68,7 @@ The Unicode property types (\P and \p) have to be present to fill out the table because of what their opcode values are, but the table values should always be zero because property types are handled separately in the code. The last four columns apply to items that cannot be repeated, so there is no need to have -rows for them. Note that OP_DIGIT etc. are generated only when PCRE_UCP is +rows for them. Note that OP_DIGIT etc. are generated only when PCRE2_UCP is *not* set. When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */ #define APTROWS (LAST_AUTOTAB_LEFT_OP - FIRST_AUTOTAB_OP + 1) @@ -123,21 +127,21 @@ opcode is used to select the column. The values are as follows: */ static const uint8_t propposstab[PT_TABSIZE][PT_TABSIZE] = { -/* ANY LAMP GC PC SC SCX ALNUM SPACE PXSPACE WORD CLIST UCNC BIDICL BOOL */ - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_ANY */ - { 0, 3, 0, 0, 0, 0, 3, 1, 1, 0, 0, 0, 0, 0 }, /* PT_LAMP */ - { 0, 0, 2, 4, 0, 0, 9, 10, 10, 11, 0, 0, 0, 0 }, /* PT_GC */ - { 0, 0, 5, 2, 0, 0, 15, 16, 16, 17, 0, 0, 0, 0 }, /* PT_PC */ - { 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_SC */ - { 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_SCX */ - { 0, 3, 6, 12, 0, 0, 3, 1, 1, 0, 0, 0, 0, 0 }, /* PT_ALNUM */ - { 0, 1, 7, 13, 0, 0, 1, 3, 3, 1, 0, 0, 0, 0 }, /* PT_SPACE */ - { 0, 1, 7, 13, 0, 0, 1, 3, 3, 1, 0, 0, 0, 0 }, /* PT_PXSPACE */ - { 0, 0, 8, 14, 0, 0, 0, 1, 1, 3, 0, 0, 0, 0 }, /* PT_WORD */ - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_CLIST */ - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0 }, /* PT_UCNC */ - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_BIDICL */ - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } /* PT_BOOL */ +/* LAMP GC PC SC SCX ALNUM SPACE PXSPACE WORD CLIST UCNC BIDICL BOOL */ + { 3, 0, 0, 0, 0, 3, 1, 1, 0, 0, 0, 0, 0 }, /* PT_LAMP */ + { 0, 2, 4, 0, 0, 9, 10, 10, 11, 0, 0, 0, 0 }, /* PT_GC */ + { 0, 5, 2, 0, 0, 15, 16, 16, 17, 0, 0, 0, 0 }, /* PT_PC */ + { 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_SC */ + { 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_SCX */ + { 3, 6, 12, 0, 0, 3, 1, 1, 0, 0, 0, 0, 0 }, /* PT_ALNUM */ + { 1, 7, 13, 0, 0, 1, 3, 3, 1, 0, 0, 0, 0 }, /* PT_SPACE */ + { 1, 7, 13, 0, 0, 1, 3, 3, 1, 0, 0, 0, 0 }, /* PT_PXSPACE */ + { 0, 8, 14, 0, 0, 0, 1, 1, 3, 0, 0, 0, 0 }, /* PT_WORD */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_CLIST */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0 }, /* PT_UCNC */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_BIDICL */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } /* PT_BOOL */ + /* PT_ANY does not need a record. */ }; /* This table is used to check whether auto-possessification is possible @@ -199,7 +203,7 @@ static BOOL check_char_prop(uint32_t c, unsigned int ptype, unsigned int pdata, BOOL negated) { -BOOL ok; +BOOL ok, rc; const uint32_t *p; const ucd_record *prop = GET_UCD(c); @@ -240,12 +244,13 @@ switch(ptype) { HSPACE_CASES: VSPACE_CASES: - return negated; + rc = negated; + break; default: - return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == negated; + rc = (PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == negated; } - break; /* Control never reaches here */ + return rc; case PT_WORD: return (PRIV(ucp_gentype)[prop->chartype] == ucp_L || @@ -259,7 +264,8 @@ switch(ptype) if (c < *p) return !negated; if (c == *p++) return negated; } - break; /* Control never reaches here */ + PCRE2_DEBUG_UNREACHABLE(); /* Control should never reach here */ + break; /* Haven't yet thought these through. */ @@ -328,6 +334,7 @@ get_chr_property_list(PCRE2_SPTR code, BOOL utf, BOOL ucp, const uint8_t *fcc, PCRE2_UCHAR c = *code; PCRE2_UCHAR base; PCRE2_SPTR end; +PCRE2_SPTR class_end; uint32_t chr; #ifdef SUPPORT_UNICODE @@ -450,10 +457,12 @@ switch(c) code += 2; do { - if (clist_dest >= list + 8) + if (clist_dest >= list + MAX_LIST) { - /* Early return if there is not enough space. This should never - happen, since all clists are shorter than 5 character now. */ + /* Early return if there is not enough space. GenerateUcd.py + generated a list with more than 5 characters and something + must be done about that going forward. */ + PCRE2_DEBUG_UNREACHABLE(); /* Remove if it ever triggers */ list[2] = code[0]; list[3] = code[1]; return code; @@ -473,11 +482,13 @@ switch(c) case OP_CLASS: #ifdef SUPPORT_WIDE_CHARS case OP_XCLASS: - if (c == OP_XCLASS) + case OP_ECLASS: + if (c == OP_XCLASS || c == OP_ECLASS) end = code + GET(code, 0) - 1; else #endif end = code + 32 / sizeof(PCRE2_UCHAR); + class_end = end; switch(*end) { @@ -505,6 +516,7 @@ switch(c) break; } list[2] = (uint32_t)(end - code); + list[3] = (uint32_t)(end - class_end); return end; } @@ -537,7 +549,7 @@ compare_opcodes(PCRE2_SPTR code, BOOL utf, BOOL ucp, const compile_block *cb, const uint32_t *base_list, PCRE2_SPTR base_end, int *rec_limit) { PCRE2_UCHAR c; -uint32_t list[8]; +uint32_t list[MAX_LIST]; const uint32_t *chr_ptr; const uint32_t *ochr_ptr; const uint32_t *list_ptr; @@ -581,7 +593,7 @@ for(;;) continue; } - /* At the end of a branch, skip to the end of the group. */ + /* At the end of a branch, skip to the end of the group and process it. */ if (c == OP_ALT) { @@ -638,19 +650,29 @@ for(;;) return FALSE; break; - /* Atomic sub-patterns and assertions can always auto-possessify their - last iterator except for variable length lookbehinds. However, if the - group was entered as a result of checking a previous iterator, this is - not possible. */ + /* Atomic sub-patterns and forward assertions can always auto-possessify + their last iterator. However, if the group was entered as a result of + checking a previous iterator, this is not possible. */ case OP_ASSERT: case OP_ASSERT_NOT: case OP_ONCE: return !entered_a_group; + /* Fixed-length lookbehinds can be treated the same way, but variable + length lookbehinds must not auto-possessify their last iterator. Note + that in order to identify a variable length lookbehind we must check + through all branches, because some may be of fixed length. */ + case OP_ASSERTBACK: case OP_ASSERTBACK_NOT: - return (bracode[1+LINK_SIZE] == OP_VREVERSE)? FALSE : !entered_a_group; + do + { + if (bracode[1+LINK_SIZE] == OP_VREVERSE) return FALSE; /* Variable */ + bracode += GET(bracode, 1); + } + while (*bracode == OP_ALT); + return !entered_a_group; /* Not variable length */ /* Non-atomic assertions - don't possessify last iterator. This needs more thought. */ @@ -748,12 +770,12 @@ for(;;) if (base_list[0] == OP_CLASS) #endif { - set1 = (uint8_t *)(base_end - base_list[2]); + set1 = (const uint8_t *)(base_end - base_list[2]); list_ptr = list; } else { - set1 = (uint8_t *)(code - list[2]); + set1 = (const uint8_t *)(code - list[2]); list_ptr = base_list; } @@ -762,13 +784,14 @@ for(;;) { case OP_CLASS: case OP_NCLASS: - set2 = (uint8_t *) + set2 = (const uint8_t *) ((list_ptr == list ? code : base_end) - list_ptr[2]); break; #ifdef SUPPORT_WIDE_CHARS case OP_XCLASS: - xclass_flags = (list_ptr == list ? code : base_end) - list_ptr[2] + LINK_SIZE; + xclass_flags = (list_ptr == list ? code : base_end) - + list_ptr[2] + LINK_SIZE; if ((*xclass_flags & XCL_HASPROP) != 0) return FALSE; if ((*xclass_flags & XCL_MAP) == 0) { @@ -777,7 +800,7 @@ for(;;) /* Might be an empty repeat. */ continue; } - set2 = (uint8_t *)(xclass_flags + 1); + set2 = (const uint8_t *)(xclass_flags + 1); break; #endif @@ -785,21 +808,21 @@ for(;;) invert_bits = TRUE; /* Fall through */ case OP_DIGIT: - set2 = (uint8_t *)(cb->cbits + cbit_digit); + set2 = (const uint8_t *)(cb->cbits + cbit_digit); break; case OP_NOT_WHITESPACE: invert_bits = TRUE; /* Fall through */ case OP_WHITESPACE: - set2 = (uint8_t *)(cb->cbits + cbit_space); + set2 = (const uint8_t *)(cb->cbits + cbit_space); break; case OP_NOT_WORDCHAR: invert_bits = TRUE; /* Fall through */ case OP_WORDCHAR: - set2 = (uint8_t *)(cb->cbits + cbit_word); + set2 = (const uint8_t *)(cb->cbits + cbit_word); break; default: @@ -1084,7 +1107,7 @@ for(;;) case OP_CLASS: if (chr > 255) break; - class_bitset = (uint8_t *) + class_bitset = (const uint8_t *) ((list_ptr == list ? code : base_end) - list_ptr[2]); if ((class_bitset[chr >> 3] & (1u << (chr & 7))) != 0) return FALSE; break; @@ -1092,9 +1115,18 @@ for(;;) #ifdef SUPPORT_WIDE_CHARS case OP_XCLASS: if (PRIV(xclass)(chr, (list_ptr == list ? code : base_end) - - list_ptr[2] + LINK_SIZE, utf)) return FALSE; + list_ptr[2] + LINK_SIZE, (const uint8_t*)cb->start_code, utf)) + return FALSE; break; -#endif + + case OP_ECLASS: + if (PRIV(eclass)(chr, + (list_ptr == list ? code : base_end) - list_ptr[2] + LINK_SIZE, + (list_ptr == list ? code : base_end) - list_ptr[3], + (const uint8_t*)cb->start_code, utf)) + return FALSE; + break; +#endif /* SUPPORT_WIDE_CHARS */ default: return FALSE; @@ -1109,8 +1141,8 @@ for(;;) if (list[1] == 0) return TRUE; } -/* Control never reaches here. There used to be a fail-save return FALSE; here, -but some compilers complain about an unreachable statement. */ +PCRE2_DEBUG_UNREACHABLE(); /* Control should never reach here */ +return FALSE; /* Avoid compiler warnings */ } @@ -1140,7 +1172,7 @@ PRIV(auto_possessify)(PCRE2_UCHAR *code, const compile_block *cb) PCRE2_UCHAR c; PCRE2_SPTR end; PCRE2_UCHAR *repeat_opcode; -uint32_t list[8]; +uint32_t list[MAX_LIST]; int rec_limit = 1000; /* Was 10,000 but clang+ASAN uses a lot of stack. */ BOOL utf = (cb->external_options & PCRE2_UTF) != 0; BOOL ucp = (cb->external_options & PCRE2_UCP) != 0; @@ -1149,7 +1181,11 @@ for (;;) { c = *code; - if (c >= OP_TABLE_LENGTH) return -1; /* Something gone wrong */ + if (c >= OP_TABLE_LENGTH) + { + PCRE2_DEBUG_UNREACHABLE(); + return -1; /* Something gone wrong */ + } if (c >= OP_STAR && c <= OP_TYPEPOSUPTO) { @@ -1198,10 +1234,14 @@ for (;;) } c = *code; } - else if (c == OP_CLASS || c == OP_NCLASS || c == OP_XCLASS) + else if (c == OP_CLASS || c == OP_NCLASS +#ifdef SUPPORT_WIDE_CHARS + || c == OP_XCLASS || c == OP_ECLASS +#endif + ) { #ifdef SUPPORT_WIDE_CHARS - if (c == OP_XCLASS) + if (c == OP_XCLASS || c == OP_ECLASS) repeat_opcode = code + GET(code, 1); else #endif @@ -1211,7 +1251,7 @@ for (;;) if (c >= OP_CRSTAR && c <= OP_CRMINRANGE) { /* The return from get_chr_property_list() will never be NULL when - *code (aka c) is one of the three class opcodes. However, gcc with + *code (aka c) is one of the four class opcodes. However, gcc with -fanalyzer notes that a NULL return is possible, and grumbles. Hence we put in a check. */ @@ -1279,6 +1319,7 @@ for (;;) #ifdef SUPPORT_WIDE_CHARS case OP_XCLASS: + case OP_ECLASS: code += GET(code, 1); break; #endif diff --git a/libpcre/src/pcre2_chkdint.c b/libpcre/src/pcre2_chkdint.c index d04f6f8cf..708302369 100644 --- a/libpcre/src/pcre2_chkdint.c +++ b/libpcre/src/pcre2_chkdint.c @@ -74,9 +74,7 @@ if (__builtin_mul_overflow(a, b, &m)) return TRUE; #else INT64_OR_DOUBLE m; -#ifdef PCRE2_DEBUG -if (a < 0 || b < 0) abort(); -#endif +PCRE2_ASSERT(a >= 0 && b >= 0); m = (INT64_OR_DOUBLE)a * (INT64_OR_DOUBLE)b; @@ -93,4 +91,4 @@ if (m > PCRE2_SIZE_MAX) return TRUE; return FALSE; } -/* End of pcre_chkdint.c */ +/* End of pcre2_chkdint.c */ diff --git a/libpcre/src/pcre2_compile.c b/libpcre/src/pcre2_compile.c index 8b364977c..0ffac8939 100644 --- a/libpcre/src/pcre2_compile.c +++ b/libpcre/src/pcre2_compile.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2023 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -47,7 +47,7 @@ POSSIBILITY OF SUCH DAMAGE. #define PSSTART start_pattern /* Field containing processed string start */ #define PSEND end_pattern /* Field containing processed string end */ -#include "pcre2_internal.h" +#include "pcre2_compile.h" /* In rare error cases debugging might require calling pcre2_printint(). */ @@ -108,20 +108,8 @@ them will be able to (i.e. assume a 64-bit world). */ #define SIZEOFFSET 2 #endif -/* Macros for manipulating elements of the parsed pattern vector. */ - -#define META_CODE(x) (x & 0xffff0000u) -#define META_DATA(x) (x & 0x0000ffffu) -#define META_DIFF(x,y) ((x-y)>>16) - /* Function definitions to allow mutual recursion */ -#ifdef SUPPORT_UNICODE -static unsigned int - add_list_to_class_internal(uint8_t *, PCRE2_UCHAR **, uint32_t, uint32_t, - compile_block *, const uint32_t *, unsigned int); -#endif - static int compile_regex(uint32_t, uint32_t, PCRE2_UCHAR **, uint32_t **, int *, uint32_t, uint32_t *, uint32_t *, uint32_t *, uint32_t *, branch_chain *, @@ -199,106 +187,6 @@ don't have to check them every time. */ #define OFLOW_MAX (INT_MAX - 20) -/* Code values for parsed patterns, which are stored in a vector of 32-bit -unsigned ints. Values less than META_END are literal data values. The coding -for identifying the item is in the top 16-bits, leaving 16 bits for the -additional data that some of them need. The META_CODE, META_DATA, and META_DIFF -macros are used to manipulate parsed pattern elements. - -NOTE: When these definitions are changed, the table of extra lengths for each -code (meta_extra_lengths, just below) must be updated to remain in step. */ - -#define META_END 0x80000000u /* End of pattern */ - -#define META_ALT 0x80010000u /* alternation */ -#define META_ATOMIC 0x80020000u /* atomic group */ -#define META_BACKREF 0x80030000u /* Back ref */ -#define META_BACKREF_BYNAME 0x80040000u /* \k'name' */ -#define META_BIGVALUE 0x80050000u /* Next is a literal > META_END */ -#define META_CALLOUT_NUMBER 0x80060000u /* (?C with numerical argument */ -#define META_CALLOUT_STRING 0x80070000u /* (?C with string argument */ -#define META_CAPTURE 0x80080000u /* Capturing parenthesis */ -#define META_CIRCUMFLEX 0x80090000u /* ^ metacharacter */ -#define META_CLASS 0x800a0000u /* start non-empty class */ -#define META_CLASS_EMPTY 0x800b0000u /* empty class */ -#define META_CLASS_EMPTY_NOT 0x800c0000u /* negative empty class */ -#define META_CLASS_END 0x800d0000u /* end of non-empty class */ -#define META_CLASS_NOT 0x800e0000u /* start non-empty negative class */ -#define META_COND_ASSERT 0x800f0000u /* (?(?assertion)... */ -#define META_COND_DEFINE 0x80100000u /* (?(DEFINE)... */ -#define META_COND_NAME 0x80110000u /* (?()... */ -#define META_COND_NUMBER 0x80120000u /* (?(digits)... */ -#define META_COND_RNAME 0x80130000u /* (?(R&name)... */ -#define META_COND_RNUMBER 0x80140000u /* (?(Rdigits)... */ -#define META_COND_VERSION 0x80150000u /* (?(VERSIONx.y)... */ -#define META_DOLLAR 0x80160000u /* $ metacharacter */ -#define META_DOT 0x80170000u /* . metacharacter */ -#define META_ESCAPE 0x80180000u /* \d and friends */ -#define META_KET 0x80190000u /* closing parenthesis */ -#define META_NOCAPTURE 0x801a0000u /* no capture parens */ -#define META_OPTIONS 0x801b0000u /* (?i) and friends */ -#define META_POSIX 0x801c0000u /* POSIX class item */ -#define META_POSIX_NEG 0x801d0000u /* negative POSIX class item */ -#define META_RANGE_ESCAPED 0x801e0000u /* range with at least one escape */ -#define META_RANGE_LITERAL 0x801f0000u /* range defined literally */ -#define META_RECURSE 0x80200000u /* Recursion */ -#define META_RECURSE_BYNAME 0x80210000u /* (?&name) */ -#define META_SCRIPT_RUN 0x80220000u /* (*script_run:...) */ - -/* These must be kept together to make it easy to check that an assertion -is present where expected in a conditional group. */ - -#define META_LOOKAHEAD 0x80230000u /* (?= */ -#define META_LOOKAHEADNOT 0x80240000u /* (?! */ -#define META_LOOKBEHIND 0x80250000u /* (?<= */ -#define META_LOOKBEHINDNOT 0x80260000u /* (? */ CHAR_GREATER_THAN_SIGN, /* ? */ CHAR_QUESTION_MARK, + /* @ */ CHAR_COMMERCIAL_AT, /* A */ -ESC_A, + /* B */ -ESC_B, /* C */ -ESC_C, + /* D */ -ESC_D, /* E */ -ESC_E, + /* F */ 0, /* G */ -ESC_G, + /* H */ -ESC_H, /* I */ 0, + /* J */ 0, /* K */ -ESC_K, + /* L */ 0, /* M */ 0, + /* N */ -ESC_N, /* O */ 0, + /* P */ -ESC_P, /* Q */ -ESC_Q, + /* R */ -ESC_R, /* S */ -ESC_S, + /* T */ 0, /* U */ 0, + /* V */ -ESC_V, /* W */ -ESC_W, + /* X */ -ESC_X, /* Y */ 0, + /* Z */ -ESC_Z, /* [ */ CHAR_LEFT_SQUARE_BRACKET, + /* \ */ CHAR_BACKSLASH, /* ] */ CHAR_RIGHT_SQUARE_BRACKET, + /* ^ */ CHAR_CIRCUMFLEX_ACCENT, /* _ */ CHAR_UNDERSCORE, + /* ` */ CHAR_GRAVE_ACCENT, /* a */ CHAR_BEL, + /* b */ -ESC_b, /* c */ 0, + /* d */ -ESC_d, /* e */ CHAR_ESC, + /* f */ CHAR_FF, /* g */ 0, + /* h */ -ESC_h, /* i */ 0, + /* j */ 0, /* k */ -ESC_k, + /* l */ 0, /* m */ 0, + /* n */ CHAR_LF, /* o */ 0, + /* p */ -ESC_p, /* q */ 0, + /* r */ CHAR_CR, /* s */ -ESC_s, + /* t */ CHAR_HT, /* u */ 0, + /* v */ -ESC_v, /* w */ -ESC_w, + /* x */ 0, /* y */ 0, + /* z */ -ESC_z }; #else @@ -656,6 +542,8 @@ static const char alasnames[] = STRING_non_atomic_positive_lookbehind0 STRING_negative_lookahead0 STRING_negative_lookbehind0 + STRING_scs0 + STRING_scan_substring0 STRING_atomic0 STRING_sr0 STRING_asr0 @@ -675,6 +563,8 @@ static const alasitem alasmeta[] = { { 30, META_LOOKBEHIND_NA }, { 18, META_LOOKAHEADNOT }, { 19, META_LOOKBEHINDNOT }, + { 3, META_SCS }, + { 14, META_SCS }, { 6, META_ATOMIC }, { 2, META_SCRIPT_RUN }, /* sr = script run */ { 3, META_ATOMIC_SCRIPT_RUN }, /* asr = atomic script run */ @@ -694,8 +584,11 @@ static uint32_t chartypeoffset[] = { now all in a single string, to reduce the number of relocations when a shared library is dynamically loaded. The list of lengths is terminated by a zero length entry. The first three must be alpha, lower, upper, as this is assumed -for handling case independence. The indices for several classes are needed, so -identify them. */ +for handling case independence. + +The indices for several classes are stored in pcre2_compile.h - these must +be kept in sync with posix_names, posix_name_lengths, posix_class_maps, +and posix_substitutes. */ static const char posix_names[] = STRING_alpha0 STRING_lower0 STRING_upper0 STRING_alnum0 @@ -706,12 +599,6 @@ static const char posix_names[] = static const uint8_t posix_name_lengths[] = { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 }; -#define PC_DIGIT 7 -#define PC_GRAPH 8 -#define PC_PRINT 9 -#define PC_PUNCT 10 -#define PC_XDIGIT 13 - /* Table of class bit maps for each POSIX class. Each class is formed from a base map, with an optional addition or removal of another map. Then, for some classes, there is some additional tweaking: for [:blank:] the vertical space @@ -722,7 +609,7 @@ addition or a negative value for map subtraction (if there are two maps). The absolute value of the third field has these meanings: 0 => no tweaking, 1 => remove vertical space characters, 2 => remove underscore. */ -static const int posix_class_maps[] = { +const int PRIV(posix_class_maps)[] = { cbit_word, cbit_digit, -2, /* alpha */ cbit_lower, -1, 0, /* lower */ cbit_upper, -1, 0, /* upper */ @@ -760,7 +647,6 @@ static int posix_substitutes[] = { PT_WORD, 0, /* word */ /* Perl and POSIX space are the same */ PT_PXXDIGIT, 0 /* xdigit */ /* Perl has additional hex digits */ }; -#define POSIX_SUBSIZE (sizeof(posix_substitutes) / (2*sizeof(uint32_t))) #endif /* SUPPORT_UNICODE */ /* Masks for checking option settings. When PCRE2_LITERAL is set, only a subset @@ -778,10 +664,11 @@ are allowed. */ PCRE2_EXTENDED|PCRE2_EXTENDED_MORE|PCRE2_MATCH_UNSET_BACKREF| \ PCRE2_MULTILINE|PCRE2_NEVER_BACKSLASH_C|PCRE2_NEVER_UCP| \ PCRE2_NEVER_UTF|PCRE2_NO_AUTO_CAPTURE|PCRE2_NO_AUTO_POSSESS| \ - PCRE2_NO_DOTSTAR_ANCHOR|PCRE2_UCP|PCRE2_UNGREEDY) + PCRE2_NO_DOTSTAR_ANCHOR|PCRE2_UCP|PCRE2_UNGREEDY|PCRE2_ALT_EXTENDED_CLASS) #define PUBLIC_LITERAL_COMPILE_EXTRA_OPTIONS \ - (PCRE2_EXTRA_MATCH_LINE|PCRE2_EXTRA_MATCH_WORD|PCRE2_EXTRA_CASELESS_RESTRICT) + (PCRE2_EXTRA_MATCH_LINE|PCRE2_EXTRA_MATCH_WORD| \ + PCRE2_EXTRA_CASELESS_RESTRICT|PCRE2_EXTRA_TURKISH_CASING) #define PUBLIC_COMPILE_EXTRA_OPTIONS \ (PUBLIC_LITERAL_COMPILE_EXTRA_OPTIONS| \ @@ -789,26 +676,8 @@ are allowed. */ PCRE2_EXTRA_ESCAPED_CR_IS_LF|PCRE2_EXTRA_ALT_BSUX| \ PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK|PCRE2_EXTRA_ASCII_BSD| \ PCRE2_EXTRA_ASCII_BSS|PCRE2_EXTRA_ASCII_BSW|PCRE2_EXTRA_ASCII_POSIX| \ - PCRE2_EXTRA_ASCII_DIGIT) - -/* Compile time error code numbers. They are given names so that they can more -easily be tracked. When a new number is added, the tables called eint1 and -eint2 in pcre2posix.c may need to be updated, and a new error text must be -added to compile_error_texts in pcre2_error.c. Also, the error codes in -pcre2.h.in must be updated - their values are exactly 100 greater than these -values. */ - -enum { ERR0 = COMPILE_ERROR_BASE, - ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9, ERR10, - ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19, ERR20, - ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR28, ERR29, ERR30, - ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39, ERR40, - ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, ERR50, - ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, ERR60, - ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70, - ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, ERR80, - ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERR88, ERR89, ERR90, - ERR91, ERR92, ERR93, ERR94, ERR95, ERR96, ERR97, ERR98, ERR99, ERR100 }; + PCRE2_EXTRA_ASCII_DIGIT|PCRE2_EXTRA_PYTHON_OCTAL|PCRE2_EXTRA_NO_BS0| \ + PCRE2_EXTRA_NEVER_CALLOUT) /* This is a table of start-of-pattern options such as (*UTF) and settings such as (*LIMIT_MATCH=nnnn) and (*CRLF). For completeness and backward @@ -816,16 +685,18 @@ compatibility, (*UTFn) is supported in the relevant libraries, but (*UTF) is generic and always supported. */ enum { PSO_OPT, /* Value is an option bit */ + PSO_XOPT, /* Value is an xoption bit */ PSO_FLG, /* Value is a flag bit */ PSO_NL, /* Value is a newline type */ PSO_BSR, /* Value is a \R type */ PSO_LIMH, /* Read integer value for heap limit */ PSO_LIMM, /* Read integer value for match limit */ - PSO_LIMD /* Read integer value for depth limit */ + PSO_LIMD, /* Read integer value for depth limit */ + PSO_OPTMZ /* Value is an optimization bit */ }; typedef struct pso { - const uint8_t *name; + const char *name; uint16_t length; uint16_t type; uint32_t value; @@ -834,27 +705,29 @@ typedef struct pso { /* NB: STRING_UTFn_RIGHTPAR contains the length as well */ static const pso pso_list[] = { - { (uint8_t *)STRING_UTFn_RIGHTPAR, PSO_OPT, PCRE2_UTF }, - { (uint8_t *)STRING_UTF_RIGHTPAR, 4, PSO_OPT, PCRE2_UTF }, - { (uint8_t *)STRING_UCP_RIGHTPAR, 4, PSO_OPT, PCRE2_UCP }, - { (uint8_t *)STRING_NOTEMPTY_RIGHTPAR, 9, PSO_FLG, PCRE2_NOTEMPTY_SET }, - { (uint8_t *)STRING_NOTEMPTY_ATSTART_RIGHTPAR, 17, PSO_FLG, PCRE2_NE_ATST_SET }, - { (uint8_t *)STRING_NO_AUTO_POSSESS_RIGHTPAR, 16, PSO_OPT, PCRE2_NO_AUTO_POSSESS }, - { (uint8_t *)STRING_NO_DOTSTAR_ANCHOR_RIGHTPAR, 18, PSO_OPT, PCRE2_NO_DOTSTAR_ANCHOR }, - { (uint8_t *)STRING_NO_JIT_RIGHTPAR, 7, PSO_FLG, PCRE2_NOJIT }, - { (uint8_t *)STRING_NO_START_OPT_RIGHTPAR, 13, PSO_OPT, PCRE2_NO_START_OPTIMIZE }, - { (uint8_t *)STRING_LIMIT_HEAP_EQ, 11, PSO_LIMH, 0 }, - { (uint8_t *)STRING_LIMIT_MATCH_EQ, 12, PSO_LIMM, 0 }, - { (uint8_t *)STRING_LIMIT_DEPTH_EQ, 12, PSO_LIMD, 0 }, - { (uint8_t *)STRING_LIMIT_RECURSION_EQ, 16, PSO_LIMD, 0 }, - { (uint8_t *)STRING_CR_RIGHTPAR, 3, PSO_NL, PCRE2_NEWLINE_CR }, - { (uint8_t *)STRING_LF_RIGHTPAR, 3, PSO_NL, PCRE2_NEWLINE_LF }, - { (uint8_t *)STRING_CRLF_RIGHTPAR, 5, PSO_NL, PCRE2_NEWLINE_CRLF }, - { (uint8_t *)STRING_ANY_RIGHTPAR, 4, PSO_NL, PCRE2_NEWLINE_ANY }, - { (uint8_t *)STRING_NUL_RIGHTPAR, 4, PSO_NL, PCRE2_NEWLINE_NUL }, - { (uint8_t *)STRING_ANYCRLF_RIGHTPAR, 8, PSO_NL, PCRE2_NEWLINE_ANYCRLF }, - { (uint8_t *)STRING_BSR_ANYCRLF_RIGHTPAR, 12, PSO_BSR, PCRE2_BSR_ANYCRLF }, - { (uint8_t *)STRING_BSR_UNICODE_RIGHTPAR, 12, PSO_BSR, PCRE2_BSR_UNICODE } + { STRING_UTFn_RIGHTPAR, PSO_OPT, PCRE2_UTF }, + { STRING_UTF_RIGHTPAR, 4, PSO_OPT, PCRE2_UTF }, + { STRING_UCP_RIGHTPAR, 4, PSO_OPT, PCRE2_UCP }, + { STRING_NOTEMPTY_RIGHTPAR, 9, PSO_FLG, PCRE2_NOTEMPTY_SET }, + { STRING_NOTEMPTY_ATSTART_RIGHTPAR, 17, PSO_FLG, PCRE2_NE_ATST_SET }, + { STRING_NO_AUTO_POSSESS_RIGHTPAR, 16, PSO_OPTMZ, PCRE2_OPTIM_AUTO_POSSESS }, + { STRING_NO_DOTSTAR_ANCHOR_RIGHTPAR, 18, PSO_OPTMZ, PCRE2_OPTIM_DOTSTAR_ANCHOR }, + { STRING_NO_JIT_RIGHTPAR, 7, PSO_FLG, PCRE2_NOJIT }, + { STRING_NO_START_OPT_RIGHTPAR, 13, PSO_OPTMZ, PCRE2_OPTIM_START_OPTIMIZE }, + { STRING_CASELESS_RESTRICT_RIGHTPAR, 18, PSO_XOPT, PCRE2_EXTRA_CASELESS_RESTRICT }, + { STRING_TURKISH_CASING_RIGHTPAR, 15, PSO_XOPT, PCRE2_EXTRA_TURKISH_CASING }, + { STRING_LIMIT_HEAP_EQ, 11, PSO_LIMH, 0 }, + { STRING_LIMIT_MATCH_EQ, 12, PSO_LIMM, 0 }, + { STRING_LIMIT_DEPTH_EQ, 12, PSO_LIMD, 0 }, + { STRING_LIMIT_RECURSION_EQ, 16, PSO_LIMD, 0 }, + { STRING_CR_RIGHTPAR, 3, PSO_NL, PCRE2_NEWLINE_CR }, + { STRING_LF_RIGHTPAR, 3, PSO_NL, PCRE2_NEWLINE_LF }, + { STRING_CRLF_RIGHTPAR, 5, PSO_NL, PCRE2_NEWLINE_CRLF }, + { STRING_ANY_RIGHTPAR, 4, PSO_NL, PCRE2_NEWLINE_ANY }, + { STRING_NUL_RIGHTPAR, 4, PSO_NL, PCRE2_NEWLINE_NUL }, + { STRING_ANYCRLF_RIGHTPAR, 8, PSO_NL, PCRE2_NEWLINE_ANYCRLF }, + { STRING_BSR_ANYCRLF_RIGHTPAR, 12, PSO_BSR, PCRE2_BSR_ANYCRLF }, + { STRING_BSR_UNICODE_RIGHTPAR, 12, PSO_BSR, PCRE2_BSR_UNICODE } }; /* This table is used when converting repeating opcodes into possessified @@ -909,12 +782,15 @@ static const uint8_t opcode_possessify[] = { OP_CRPOSRANGE, 0, /* CRRANGE, CRMINRANGE */ 0, 0, 0, 0, /* CRPOS{STAR,PLUS,QUERY,RANGE} */ - 0, 0, 0, /* CLASS, NCLASS, XCLASS */ + 0, 0, 0, 0, /* CLASS, NCLASS, XCLASS, ECLASS */ 0, 0, /* REF, REFI */ 0, 0, /* DNREF, DNREFI */ - 0, 0 /* RECURSE, CALLOUT */ + 0, 0, /* RECURSE, CALLOUT */ }; +/* Compile-time check that the table has the correct size. */ +STATIC_ASSERT(sizeof(opcode_possessify) == OP_CALLOUT+1, opcode_possessify); + #ifdef DEBUG_SHOW_PARSED /************************************************* @@ -976,7 +852,7 @@ for (;;) { uint32_t ptype = *pptr >> 16; uint32_t pvalue = *pptr++ & 0xffff; - fprintf(stderr, "META \\%c %d %d", (meta_arg == ESC_P)? 'P':'p', + fprintf(stderr, "META \\%c %d %d", (meta_arg == ESC_P)? CHAR_P:CHAR_p, ptype, pvalue); } else @@ -1151,6 +1027,24 @@ for (;;) fprintf(stderr, "%zd", offset); break; + case META_OFFSET: + fprintf(stderr, "META_OFFSET offset="); + GETOFFSET(offset, pptr); + fprintf(stderr, "%zd", offset); + break; + + case META_SCS: + fprintf(stderr, "META (*scan_substring:"); + break; + + case META_SCS_NAME: + fprintf(stderr, "META_SCS_NAME length=%d relative_offset=%d", *pptr++, (int)meta_arg); + break; + + case META_SCS_NUMBER: + fprintf(stderr, "META_SCS_NUMBER %d relative_offset=%d", *pptr++, (int)meta_arg); + break; + case META_MARK: fprintf(stderr, "META (*MARK:"); goto SHOWARG; @@ -1179,6 +1073,12 @@ for (;;) } fprintf(stderr, ") length=%u", length); break; + + case META_ECLASS_AND: fprintf(stderr, "META_ECLASS_AND"); break; + case META_ECLASS_OR: fprintf(stderr, "META_ECLASS_OR"); break; + case META_ECLASS_SUB: fprintf(stderr, "META_ECLASS_SUB"); break; + case META_ECLASS_XOR: fprintf(stderr, "META_ECLASS_XOR"); break; + case META_ECLASS_NOT: fprintf(stderr, "META_ECLASS_NOT"); break; } fprintf(stderr, "\n"); } @@ -1198,7 +1098,7 @@ associated JIT data. */ PCRE2_EXP_DEFN pcre2_code * PCRE2_CALL_CONVENTION pcre2_code_copy(const pcre2_code *code) { -PCRE2_SIZE* ref_count; +PCRE2_SIZE *ref_count; pcre2_code *newcode; if (code == NULL) return NULL; @@ -1310,7 +1210,10 @@ Arguments: ptrptr points to the character pointer variable ptrend points to the end of the input string allow_sign if < 0, sign not allowed; if >= 0, sign is relative to this - max_value the largest number allowed + max_value the largest number allowed; + you must not pass a value for max_value larger than + INT_MAX/10 - 1 because this function relies on max_value to + avoid integer overflow max_error the error to give for an over-large number intptr where to put the result errcodeptr where to put an error code @@ -1329,6 +1232,8 @@ uint32_t n = 0; PCRE2_SPTR ptr = *ptrptr; BOOL yield = FALSE; +PCRE2_ASSERT(max_value <= INT_MAX/10 - 1); + *errorcodeptr = 0; if (allow_sign >= 0 && ptr < ptrend) @@ -1349,10 +1254,11 @@ if (allow_sign >= 0 && ptr < ptrend) if (ptr >= ptrend || !IS_DIGIT(*ptr)) return FALSE; while (ptr < ptrend && IS_DIGIT(*ptr)) { - n = n * 10 + *ptr++ - CHAR_0; + n = n * 10 + (*ptr++ - CHAR_0); if (n > max_value) { *errorcodeptr = max_error; + while (ptr < ptrend && IS_DIGIT(*ptr)) ptr++; goto EXIT; } } @@ -1366,7 +1272,7 @@ if (allow_sign >= 0 && sign != 0) } if (sign > 0) n += allow_sign; - else if ((int)n > allow_sign) + else if (n > (uint32_t)allow_sign) { *errorcodeptr = ERR15; /* Non-existent subpattern */ goto EXIT; @@ -1453,7 +1359,7 @@ else if (pp >= ptrend || *pp != CHAR_RIGHT_CURLY_BRACKET) return FALSE; } -/* Now process the quantifier for real. We know it must be {n} or (n,} or {,m} +/* Now process the quantifier for real. We know it must be {n} or {n,} or {,m} or {n,m}. The only error that read_number() can return is for a number that is too big. If *errorcodeptr is returned as zero it means no number was found. */ @@ -1520,15 +1426,15 @@ return yield; /* This function is called when a \ has been encountered. It either returns a positive value for a simple escape such as \d, or 0 for a data character, which -is placed in chptr. A backreference to group n is returned as negative n. On +is placed in chptr. A backreference to group n is returned as -(n+1). On entry, ptr is pointing at the character after \. On exit, it points after the final code unit of the escape sequence. This function is also called from pcre2_substitute() to handle escape sequences in replacement strings. In this case, the cb argument is NULL, and in the case of escapes that have further processing, only sequences that define a data -character are recognised. The isclass argument is not relevant; the options -argument is the final value of the compiled pattern's options. +character are recognised. The options argument is the final value of the +compiled pattern's options. Arguments: ptrptr points to the input position pointer @@ -1537,7 +1443,8 @@ Arguments: errorcodeptr points to the errorcode variable (containing zero) options the current options bits xoptions the current extra options bits - isclass TRUE if inside a character class + bracount the number of capturing parentheses encountered so far + isclass TRUE if in a character class cb compile data block or NULL when called from pcre2_substitute() Returns: zero => a data character @@ -1548,8 +1455,8 @@ Returns: zero => a data character int PRIV(check_escape)(PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend, uint32_t *chptr, - int *errorcodeptr, uint32_t options, uint32_t xoptions, BOOL isclass, - compile_block *cb) + int *errorcodeptr, uint32_t options, uint32_t xoptions, uint32_t bracount, + BOOL isclass, compile_block *cb) { BOOL utf = (options & PCRE2_UTF) != 0; BOOL alt_bsux = @@ -1618,17 +1525,23 @@ else if ((i = escapes[c - ESCAPES_FIRST]) != 0) if (ptrend - p > 1 && *p == CHAR_U && p[1] == CHAR_PLUS) { -#ifdef EBCDIC - *errorcodeptr = ERR93; -#else +#ifndef EBCDIC if (utf) { ptr = p + 2; escape = 0; /* Not a fancy escape after all */ goto COME_FROM_NU; } - else *errorcodeptr = ERR93; #endif + *errorcodeptr = ERR93; + } + + /* Give an error in contexts where quantifiers are not allowed + (character classes; substitution strings). */ + + else if (isclass || cb == NULL) + { + *errorcodeptr = ERR37; } /* Give an error if what follows is not a quantifier, but don't override @@ -1659,7 +1572,8 @@ else if (cb == NULL) { - if (c != CHAR_c && c != CHAR_o && c != CHAR_x) + if (c < CHAR_0 || + (c > CHAR_9 && (c != CHAR_c && c != CHAR_o && c != CHAR_x && c != CHAR_g))) { *errorcodeptr = ERR3; return 0; @@ -1714,6 +1628,7 @@ else hptr >= ptrend || /* Hit end of input */ *hptr != CHAR_RIGHT_CURLY_BRACKET) /* No } terminator */ { + if (isclass) break; /* In a class, just treat as '\u' literal */ escape = ESC_ub; /* Special return */ ptr++; /* Skip { */ break; /* Hex escape not recognized */ @@ -1772,8 +1687,14 @@ else (possibly recursive) subroutine calls, _not_ backreferences. We return the ESC_g code. - Summary: Return a negative number for a numerical back reference, ESC_k for - a named back reference, and ESC_g for a named or numbered subroutine call. + Summary: Return a negative number for a numerical back reference (offset + by 1), ESC_k for a named back reference, and ESC_g for a named or + numbered subroutine call. + + The above describes the \g behaviour inside patterns. Inside replacement + strings (pcre2_substitute) we support only \g for Python + compatibility. Return ESG_g for the named case, and -(num+1) for the + numbered case. */ case CHAR_g: @@ -1785,6 +1706,40 @@ else break; } + if (cb == NULL) + { + PCRE2_SPTR p; + /* Substitution strings */ + if (*ptr != CHAR_LESS_THAN_SIGN) + { + *errorcodeptr = ERR57; + break; + } + + p = ptr + 1; + + if (!read_number(&p, ptrend, -1, MAX_GROUP_NUMBER, ERR61, &s, + errorcodeptr)) + { + if (*errorcodeptr == 0) escape = ESC_g; /* No number found */ + break; + } + + if (p >= ptrend || *p != CHAR_GREATER_THAN_SIGN) + { + /* not advancing ptr; report error at the \g character */ + *errorcodeptr = ERR57; + break; + } + + /* This is the reason that back references are returned as -(s+1) rather + than just -s. In a pattern, \0 is not a back reference, but \g<0> is + valid in a substitution string, so this must be representable. */ + ptr = p + 1; + escape = -(s+1); + break; + } + if (*ptr == CHAR_LESS_THAN_SIGN || *ptr == CHAR_APOSTROPHE) { escape = ESC_g; @@ -1799,7 +1754,7 @@ else PCRE2_SPTR p = ptr + 1; while (p < ptrend && (*p == CHAR_SPACE || *p == CHAR_HT)) p++; - if (!read_number(&p, ptrend, cb->bracount, MAX_GROUP_NUMBER, ERR61, &s, + if (!read_number(&p, ptrend, bracount, MAX_GROUP_NUMBER, ERR61, &s, errorcodeptr)) { if (*errorcodeptr == 0) escape = ESC_k; /* No number found */ @@ -1809,6 +1764,7 @@ else if (p >= ptrend || *p != CHAR_RIGHT_CURLY_BRACKET) { + /* not advancing ptr; report error at the \g character */ *errorcodeptr = ERR57; break; } @@ -1819,7 +1775,7 @@ else else { - if (!read_number(&ptr, ptrend, cb->bracount, MAX_GROUP_NUMBER, ERR61, &s, + if (!read_number(&ptr, ptrend, bracount, MAX_GROUP_NUMBER, ERR61, &s, errorcodeptr)) { if (*errorcodeptr == 0) *errorcodeptr = ERR57; /* No number found */ @@ -1833,7 +1789,7 @@ else break; } - escape = -s; + escape = -(s+1); break; /* The handling of escape sequences consisting of a string of digits @@ -1845,7 +1801,16 @@ else number is less than 10, or if there are that many previous extracting left brackets, it is a back reference. Otherwise, up to three octal digits are read to form an escaped character code. Thus \123 is likely to be octal 123 - (cf \0123, which is octal 012 followed by the literal 3). + (cf \0123, which is octal 012 followed by the literal 3). This is the "Perl + style" of handling ambiguous octal/backrefences such as \12. + + There is an alternative disambiguation strategy, selected by + PCRE2_EXTRA_PYTHON_OCTAL, which follows Python's behaviour. An octal must + have either a leading zero, or exactly three octal digits; otherwise it's + a backreference. The disambiguation is stable, and does not depend on how + many capture groups are defined (it's simply an invalid backreference if + there is no corresponding capture group). Additionally, octal values above + \377 (\xff) are rejected. Inside a character class, \ followed by a digit is always either a literal 8 or 9 or an octal number. */ @@ -1853,24 +1818,65 @@ else case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4: case CHAR_5: case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9: - if (!isclass) + if (isclass) { + /* Fall through to octal handling; never a backreference inside a class. */ + } + else if ((xoptions & PCRE2_EXTRA_PYTHON_OCTAL) != 0) + { + /* Python-style disambiguation. */ + if (ptr[-1] <= CHAR_7 && ptr + 1 < ptrend && ptr[0] >= CHAR_0 && + ptr[0] <= CHAR_7 && ptr[1] >= CHAR_0 && ptr[1] <= CHAR_7) + { + /* We peeked a three-digit octal, so fall through */ + } + else + { + /* We are at a digit, so the only possible error from read_number() is + a number that is too large. */ + ptr--; /* Back to the digit */ + + if (!read_number(&ptr, ptrend, -1, MAX_GROUP_NUMBER, 0, &s, errorcodeptr)) + { + *errorcodeptr = ERR61; + break; + } + + escape = -(s+1); + break; + } + } + else + { + /* Perl-style disambiguation. */ oldptr = ptr; ptr--; /* Back to the digit */ /* As we know we are at a digit, the only possible error from - read_number() is a number that is too large to be a group number. In this - case we fall through handle this as not a group reference. If we have - read a small enough number, check for a back reference. + read_number() is a number that is too large to be a group number. Because + that number might be still valid if read as an octal, errorcodeptr is not + set on failure and therefore a sentinel value of INT_MAX is used instead + of the original value, and will be used later to properly set the error, + if not falling through. */ - \1 to \9 are always back references. \8x and \9x are too; \1x to \7x + if (!read_number(&ptr, ptrend, -1, MAX_GROUP_NUMBER, 0, &s, errorcodeptr)) + s = INT_MAX; + + /* \1 to \9 are always back references. \8x and \9x are too; \1x to \7x are octal escapes if there are not that many previous captures. */ - if (read_number(&ptr, ptrend, -1, INT_MAX/10 - 1, 0, &s, errorcodeptr) && - (s < 10 || oldptr[-1] >= CHAR_8 || s <= (int)cb->bracount)) + if (s < 10 || c >= CHAR_8 || (unsigned)s <= bracount) { - if (s > (int)MAX_GROUP_NUMBER) *errorcodeptr = ERR61; - else escape = -s; /* Indicates a back reference */ + /* s > MAX_GROUP_NUMBER should not be possible because of read_number(), + but we keep it just to be safe and because it will also catch the + sentinel value that was set on failure by that function. */ + + if ((unsigned)s > MAX_GROUP_NUMBER) + { + PCRE2_ASSERT(s == INT_MAX); + *errorcodeptr = ERR61; + } + else escape = -(s+1); /* Indicates a back reference */ break; } @@ -1889,16 +1895,26 @@ else /* \0 always starts an octal number, but we may drop through to here with a larger first octal digit. The original code used just to take the least significant 8 bits of octal numbers (I think this is what early Perls used - to do). Nowadays we allow for larger numbers in UTF-8 mode and 16-bit mode, + to do). Nowadays we allow for larger numbers in UTF-8 mode and 16/32-bit mode, but no more than 3 octal digits. */ case CHAR_0: c -= CHAR_0; while(i++ < 2 && ptr < ptrend && *ptr >= CHAR_0 && *ptr <= CHAR_7) c = c * 8 + *ptr++ - CHAR_0; + if (c > 0xff) + { + if ((xoptions & PCRE2_EXTRA_PYTHON_OCTAL) != 0) *errorcodeptr = ERR102; #if PCRE2_CODE_UNIT_WIDTH == 8 - if (!utf && c > 0xff) *errorcodeptr = ERR51; + else if (!utf) *errorcodeptr = ERR51; #endif + } + + /* PCRE2_EXTRA_NO_BS0 disables the NUL escape '\0' but doesn't affect + two- or three-character octal escapes \00 and \000, nor \x00. */ + + if ((xoptions & PCRE2_EXTRA_NO_BS0) != 0 && c == 0 && i == 1) + *errorcodeptr = ERR98; break; /* \o is a relatively new Perl feature, supporting a more general way of @@ -1927,7 +1943,7 @@ else cc = *ptr++; if (c == 0 && cc == CHAR_0) continue; /* Leading zeroes */ #if PCRE2_CODE_UNIT_WIDTH == 32 - if (c >= 0x20000000l) { overflow = TRUE; break; } + if (c >= 0x20000000u) { overflow = TRUE; break; } #endif c = (c << 3) + (cc - CHAR_0); #if PCRE2_CODE_UNIT_WIDTH == 8 @@ -2053,10 +2069,29 @@ else else { - c = 0; - if (ptr >= ptrend || (cc = XDIGIT(*ptr)) == 0xff) break; /* Not a hex digit */ + /* Perl has the surprising/broken behaviour that \x without following + hex digits is treated as an escape for NUL. Their source code laments + this but keeps it for backwards compatibility. A warning is printed + when "use warnings" is enabled. Because we don't have warnings, we + simply forbid it. */ + if (ptr >= ptrend || (cc = XDIGIT(*ptr)) == 0xff) + { + /* Not a hex digit */ + *errorcodeptr = ERR78; + break; + } ptr++; c = cc; + + /* With "use re 'strict'" Perl actually requires exactly two digits (error + for \x, \xA and \xAAA). While \x was already rejected, this seems overly + strict, and there seems little incentive to align with that, given the + backwards-compatibility cost. + + For comparison, note that other engines disagree. For example: + - Java allows 1 or 2 hex digits. Error if 0 digits. No error if >2 digits + - .NET requires 2 hex digits. Error if 0, 1 digits. No error if >2 digits. + */ if (ptr >= ptrend || (cc = XDIGIT(*ptr)) == 0xff) break; /* Not a hex digit */ ptr++; c = (c << 4) | cc; @@ -2178,37 +2213,65 @@ c = *ptr++; *negptr = FALSE; /* \P or \p can be followed by a name in {}, optionally preceded by ^ for -negation. */ +negation. We must be handling Unicode encoding here, though we may be compiling +for UTF-8 input in an EBCDIC environment. (PCRE2 does not support both EBCDIC +input and Unicode input in the same build.) In accordance with Unicode's "loose +matching" rules, ASCII white space, hyphens, and underscores are ignored. We +don't use isspace() or tolower() because (a) code points may be greater than +255, and (b) they wouldn't work when compiling for Unicode in an EBCDIC +environment. */ if (c == CHAR_LEFT_CURLY_BRACKET) { if (ptr >= cb->end_pattern) goto ERROR_RETURN; - if (*ptr == CHAR_CIRCUMFLEX_ACCENT) - { - *negptr = TRUE; - ptr++; - } - for (i = 0; i < (int)(sizeof(name) / sizeof(PCRE2_UCHAR)) - 1; i++) { + REDO: + if (ptr >= cb->end_pattern) goto ERROR_RETURN; c = *ptr++; -#if PCRE2_CODE_UNIT_WIDTH != 8 - while (c == '_' || c == '-' || (c <= 0xff && isspace(c))) -#else - while (c == '_' || c == '-' || isspace(c)) -#endif + + /* Skip ignorable Unicode characters. */ + + while (c == CHAR_UNDERSCORE || c == CHAR_MINUS || c == CHAR_SPACE || + (c >= CHAR_HT && c <= CHAR_CR)) { if (ptr >= cb->end_pattern) goto ERROR_RETURN; c = *ptr++; } - if (c == CHAR_NUL) goto ERROR_RETURN; + + /* The first significant character being circumflex negates the meaning of + the item. */ + + if (i == 0 && !*negptr && c == CHAR_CIRCUMFLEX_ACCENT) + { + *negptr = TRUE; + goto REDO; + } + if (c == CHAR_RIGHT_CURLY_BRACKET) break; - name[i] = tolower(c); - if ((c == ':' || c == '=') && vptr == NULL) vptr = name + i; + + /* Names consist of ASCII letters and digits, but equals and colon may also + occur as a name/value separator. We must also allow for \p{L&}. A simple + check for a value between '&' and 'z' suffices because anything else in a + name or value will cause an "unknown property" error anyway. */ + + if (c < CHAR_AMPERSAND || c > CHAR_z) goto ERROR_RETURN; + + /* Lower case a capital letter or remember where the name/value separator + is. */ + + if (c >= CHAR_A && c <= CHAR_Z) c |= 0x20; + else if ((c == CHAR_COLON || c == CHAR_EQUALS_SIGN) && vptr == NULL) + vptr = name + i; + + name[i] = c; } + /* Error if the loop didn't end with '}' - either we hit the end of the + pattern or the name was longer than any legal property name. */ + if (c != CHAR_RIGHT_CURLY_BRACKET) goto ERROR_RETURN; name[i] = 0; } @@ -2216,14 +2279,19 @@ if (c == CHAR_LEFT_CURLY_BRACKET) /* If { doesn't follow \p or \P there is just one following character, which must be an ASCII letter. */ -else if (MAX_255(c) && (cb->ctypes[c] & ctype_letter) != 0) +else if (c >= CHAR_A && c <= CHAR_Z) { - name[0] = tolower(c); + name[0] = c | 0x20; /* Lower case */ + name[1] = 0; + } +else if (c >= CHAR_a && c <= CHAR_z) + { + name[0] = c; name[1] = 0; } else goto ERROR_RETURN; -*ptrptr = ptr; +*ptrptr = ptr; /* Update pattern pointer */ /* If the property contains ':' or '=' we have class name and value separately specified. The following are supported: @@ -2481,6 +2549,9 @@ if (ptr >= ptrend) /* No characters in name */ *nameptr = ptr; *offsetptr = (PCRE2_SIZE)(ptr - cb->start_pattern); +/* If this logic were ever to change, the matching function in pcre2_substitute.c +ought to be updated to match. */ + /* In UTF mode, a group name may contain letters and decimal digits as defined by Unicode properties, and underscores, but must not start with a digit. */ @@ -2699,6 +2770,60 @@ return parsed_pattern; +/************************************************* +* Maximum size of parsed_pattern for given input * +*************************************************/ + +/* This function is called from parse_regex() below, to determine the amount +of memory to allocate for parsed_pattern. It is also called to check whether +the amount of data written respects the amount of memory allocated. + +Arguments: + ptr points to the start of the pattern + ptrend points to the end of the pattern + utf TRUE in UTF mode + options the options bits + +Returns: the number of uint32_t units for parsed_pattern +*/ +static ptrdiff_t +max_parsed_pattern(PCRE2_SPTR ptr, PCRE2_SPTR ptrend, BOOL utf, + uint32_t options) +{ +PCRE2_SIZE big32count = 0; +ptrdiff_t parsed_size_needed; + +/* When PCRE2_AUTO_CALLOUT is not set, in all but one case the number of +unsigned 32-bit ints written out to the parsed pattern is bounded by the length +of the pattern. The exceptional case is when running in 32-bit, non-UTF mode, +when literal characters greater than META_END (0x80000000) have to be coded as +two units. In this case, therefore, we scan the pattern to check for such +values. */ + +#if PCRE2_CODE_UNIT_WIDTH == 32 +if (!utf) + { + PCRE2_SPTR p; + for (p = ptr; p < ptrend; p++) if (*p >= META_END) big32count++; + } +#else +(void)utf; /* Avoid compiler warning */ +#endif + +parsed_size_needed = (ptrend - ptr) + big32count; + +/* When PCRE2_AUTO_CALLOUT is set we have to assume a numerical callout (4 +elements) for each character. This is overkill, but memory is plentiful these +days. */ + +if ((options & PCRE2_AUTO_CALLOUT) != 0) + parsed_size_needed += (ptrend - ptr) * 4; + +return parsed_size_needed; +} + + + /************************************************* * Parse regex and identify named groups * *************************************************/ @@ -2750,7 +2875,33 @@ the main compiling phase. */ /* States used for analyzing ranges in character classes. The two OK values must be last. */ -enum { RANGE_NO, RANGE_STARTED, RANGE_OK_ESCAPED, RANGE_OK_LITERAL }; +enum { + RANGE_NO, /* State after '[' (initial), or '[a-z'; hyphen is literal */ + RANGE_STARTED, /* State after '[1-'; last-emitted code is META_RANGE_XYZ */ + RANGE_FORBID_NO, /* State after '[\d'; '-]' is allowed but not '-1]' */ + RANGE_FORBID_STARTED, /* State after '[\d-'*/ + RANGE_OK_ESCAPED, /* State after '[\1'; hyphen may be a range */ + RANGE_OK_LITERAL /* State after '[1'; hyphen may be a range */ +}; + +/* States used for analyzing operators and operands in extended character +classes. */ + +enum { + CLASS_OP_EMPTY, /* At start of an expression; empty previous contents */ + CLASS_OP_OPERAND, /* Have preceding operand; after "z" a "--" can follow */ + CLASS_OP_OPERATOR /* Have preceding operator; after "--" operand must follow */ +}; + +/* States used for determining the parse mode in character classes. The two +PERL_EXT values must be last. */ + +enum { + CLASS_MODE_NORMAL, /* Ordinary PCRE2 '[...]' class. */ + CLASS_MODE_ALT_EXT, /* UTS#18-style extended '[...]' class. */ + CLASS_MODE_PERL_EXT, /* Perl extended '(?[...])' class. */ + CLASS_MODE_PERL_EXT_LEAF /* Leaf within extended '(?[ [...] ])' class. */ +}; /* Only in 32-bit mode can there be literals > META_END. A macro encapsulates the storing of literal values in the main parsed pattern, where they can always @@ -2769,13 +2920,16 @@ be quantified. */ /* Here's the actual function. */ -static int parse_regex(PCRE2_SPTR ptr, uint32_t options, BOOL *has_lookbehind, - compile_block *cb) +static int parse_regex(PCRE2_SPTR ptr, uint32_t options, uint32_t xoptions, + BOOL *has_lookbehind, compile_block *cb) { uint32_t c; uint32_t delimiter; uint32_t namelen; uint32_t class_range_state; +uint32_t class_op_state; +uint32_t class_mode_state; +uint32_t *class_start; uint32_t *verblengthptr = NULL; /* Value avoids compiler warning */ uint32_t *verbstartptr = NULL; uint32_t *previous_callout = NULL; @@ -2785,8 +2939,9 @@ uint32_t *this_parsed_item = NULL; uint32_t *prev_parsed_item = NULL; uint32_t meta_quantifier = 0; uint32_t add_after_mark = 0; -uint32_t xoptions = cb->cx->extra_options; uint16_t nest_depth = 0; +int16_t class_depth_m1 = -1; /* The m1 means minus 1. */ +int16_t class_maxdepth_m1 = -1; int after_manual_callout = 0; int expect_cond_assert = 0; int errorcode = 0; @@ -2803,8 +2958,17 @@ PCRE2_SPTR thisptr; PCRE2_SPTR name; PCRE2_SPTR ptrend = cb->end_pattern; PCRE2_SPTR verbnamestart = NULL; /* Value avoids compiler warning */ +PCRE2_SPTR class_range_forbid_ptr = NULL; named_group *ng; nest_save *top_nest, *end_nests; +#ifdef PCRE2_DEBUG +uint32_t *parsed_pattern_check; +ptrdiff_t parsed_pattern_extra = 0; +ptrdiff_t parsed_pattern_extra_check = 0; +PCRE2_SPTR ptr_check; +#endif + +PCRE2_ASSERT(parsed_pattern != NULL); /* Insert leading items for word and line matching (features provided for the benefit of pcre2grep). */ @@ -2820,6 +2984,11 @@ else if ((xoptions & PCRE2_EXTRA_MATCH_WORD) != 0) *parsed_pattern++ = META_NOCAPTURE; } +#ifdef PCRE2_DEBUG +parsed_pattern_check = parsed_pattern; +ptr_check = ptr; +#endif + /* If the pattern is actually a literal string, process it separately to avoid cluttering up the main loop. */ @@ -2829,6 +2998,7 @@ if ((options & PCRE2_LITERAL) != 0) { if (parsed_pattern >= parsed_pattern_end) { + PCRE2_DEBUG_UNREACHABLE(); errorcode = ERR63; /* Internal error (parsed pattern overflow) */ goto FAILED; } @@ -2872,18 +3042,40 @@ while (ptr < ptrend) PCRE2_SPTR tempptr; PCRE2_SIZE offset; - if (parsed_pattern >= parsed_pattern_end) - { - errorcode = ERR63; /* Internal error (parsed pattern overflow) */ - goto FAILED; - } - if (nest_depth > cb->cx->parens_nest_limit) { errorcode = ERR19; goto FAILED; /* Parentheses too deeply nested */ } + /* Check that we haven't emitted too much into parsed_pattern. We allocate + a suitably-sized buffer upfront, then do unchecked writes to it. If we only + write a little bit too much, everything will appear to be OK, because the + upfront size is an overestimate... but a malicious pattern could end up + forcing a write past the buffer end. We must catch this during + development. */ + +#ifdef PCRE2_DEBUG + /* Strong post-write check. Won't help in release builds - at this point + the write has already occurred so it's too late. However, should stop us + committing unsafe code. */ + PCRE2_ASSERT((parsed_pattern - parsed_pattern_check) + + (parsed_pattern_extra - parsed_pattern_extra_check) <= + max_parsed_pattern(ptr_check, ptr, utf, options)); + parsed_pattern_check = parsed_pattern; + parsed_pattern_extra_check = parsed_pattern_extra; + ptr_check = ptr; +#endif + + if (parsed_pattern >= parsed_pattern_end) + { + /* Weak pre-write check; only ensures parsed_pattern[0] is writeable + (but the code below can write many chars). Better than nothing. */ + PCRE2_DEBUG_UNREACHABLE(); + errorcode = ERR63; /* Internal error (parsed pattern overflow) */ + goto FAILED; + } + /* If the last time round this loop something was added, parsed_pattern will no longer be equal to this_parsed_item. Remember where the previous item started and reset for the next item. Note that sometimes round the loop, @@ -3003,7 +3195,7 @@ while (ptr < ptrend) if ((options & PCRE2_ALT_VERBNAMES) != 0) { escape = PRIV(check_escape)(&ptr, ptrend, &c, &errorcode, options, - xoptions, FALSE, cb); + xoptions, cb->bracount, FALSE, cb); if (errorcode != 0) goto FAILED; } else escape = 0; /* Treat all as literal */ @@ -3203,7 +3395,7 @@ while (ptr < ptrend) case CHAR_BACKSLASH: tempptr = ptr; escape = PRIV(check_escape)(&ptr, ptrend, &c, &errorcode, options, - xoptions, FALSE, cb); + xoptions, cb->bracount, FALSE, cb); if (errorcode != 0) { ESCAPE_FAILED: @@ -3234,7 +3426,7 @@ while (ptr < ptrend) else if (escape < 0) { offset = (PCRE2_SIZE)(ptr - cb->start_pattern - 1); - escape = -escape; + escape = -escape - 1; *parsed_pattern++ = META_BACKREF | (uint32_t)escape; if (escape < 10) { @@ -3346,7 +3538,7 @@ while (ptr < ptrend) /* When \g is used with quotes or angle brackets as delimiters, it is a numerical or named subroutine call, and control comes here. When used - with brace delimiters it is a numberical back reference and does not come + with brace delimiters it is a numerical back reference and does not come here because check_escape() returns it directly as a reference. \k is always a named back reference. */ @@ -3457,7 +3649,7 @@ while (ptr < ptrend) if (!prev_okquantifier) { errorcode = ERR9; - goto FAILED_BACK; + goto FAILED_BACK; // TODO https://github.com/PCRE2Project/pcre2/issues/549 } /* Most (*VERB)s are not allowed to be quantified, but an ungreedy @@ -3473,6 +3665,11 @@ while (ptr < ptrend) *verbstartptr = META_NOCAPTURE; parsed_pattern[1] = META_KET; parsed_pattern += 2; + +#ifdef PCRE2_DEBUG + PCRE2_ASSERT(parsed_pattern_extra >= 2); + parsed_pattern_extra -= 2; +#endif } /* Now we can put the quantifier into the parsed pattern vector. At this @@ -3492,7 +3689,6 @@ while (ptr < ptrend) /* ---- Character class ---- */ case CHAR_LEFT_SQUARE_BRACKET: - okquantifier = TRUE; /* In another (POSIX) regex library, the ugly syntax [[:<:]] and [[:>:]] is used for "start of word" and "end of word". As these are otherwise illegal @@ -3530,6 +3726,7 @@ while (ptr < ptrend) } *parsed_pattern++ = META_KET; ptr += 6; + okquantifier = TRUE; break; } @@ -3544,46 +3741,14 @@ while (ptr < ptrend) goto FAILED; } - /* Process a regular character class. If the first character is '^', set - the negation flag. If the first few characters (either before or after ^) - are \Q\E or \E or space or tab in extended-more mode, we skip them too. - This makes for compatibility with Perl. */ + class_mode_state = ((options & PCRE2_ALT_EXTENDED_CLASS) != 0)? + CLASS_MODE_ALT_EXT : CLASS_MODE_NORMAL; - negate_class = FALSE; - while (ptr < ptrend) - { - GETCHARINCTEST(c, ptr); - if (c == CHAR_BACKSLASH) - { - if (ptr < ptrend && *ptr == CHAR_E) ptr++; - else if (ptrend - ptr >= 3 && - PRIV(strncmp_c8)(ptr, STR_Q STR_BACKSLASH STR_E, 3) == 0) - ptr += 3; - else - break; - } - else if ((options & PCRE2_EXTENDED_MORE) != 0 && - (c == CHAR_SPACE || c == CHAR_HT)) /* Note: just these two */ - continue; - else if (!negate_class && c == CHAR_CIRCUMFLEX_ACCENT) - negate_class = TRUE; - else break; - } + /* Jump here from '(?[...])'. That jump must initialize class_mode_state, + set c to the '[' character, and ptr to just after the '['. */ - /* Now the real contents of the class; c has the first "real" character. - Empty classes are permitted only if the option is set. */ - - if (c == CHAR_RIGHT_SQUARE_BRACKET && - (cb->external_options & PCRE2_ALLOW_EMPTY_CLASS) != 0) - { - *parsed_pattern++ = negate_class? META_CLASS_EMPTY_NOT : META_CLASS_EMPTY; - break; /* End of class processing */ - } - - /* Process a non-empty class. */ - - *parsed_pattern++ = negate_class? META_CLASS_NOT : META_CLASS; - class_range_state = RANGE_NO; + FROM_PERL_EXTENDED_CLASS: + okquantifier = TRUE; /* In an EBCDIC environment, Perl treats alphabetic ranges specially because there are holes in the encoding, and simply using the range A-Z @@ -3593,7 +3758,16 @@ while (ptr < ptrend) character values are literal or not, and a state variable for handling ranges. */ - /* Loop for the contents of the class */ + /* Loop for the contents of the class. Classes may be nested, if + PCRE2_ALT_EXTENDED_CLASS is set, or the class is of the form (?[...]). */ + + /* c is still set to '[' so the loop will handle the start of the class. */ + + class_depth_m1 = -1; + class_maxdepth_m1 = -1; + class_range_state = RANGE_NO; + class_op_state = CLASS_OP_EMPTY; + class_start = NULL; for (;;) { @@ -3609,13 +3783,26 @@ while (ptr < ptrend) ptr++; /* Skip the 'E' */ goto CLASS_CONTINUE; } + + /* Surprisingly, you cannot use \Q..\E to escape a character inside a + Perl extended class. However, empty \Q\E sequences are allowed, so here + were're only giving an error if the \Q..\E is non-empty. */ + + if (class_mode_state == CLASS_MODE_PERL_EXT) + { + errorcode = ERR116; + goto FAILED; + } + goto CLASS_LITERAL; } - /* Skip over space and tab (only) in extended-more mode. */ + /* Skip over space and tab (only) in extended-more mode, or anywhere + inside a Perl extended class (which implies /xx). */ - if ((options & PCRE2_EXTENDED_MORE) != 0 && - (c == CHAR_SPACE || c == CHAR_HT)) + if ((c == CHAR_SPACE || c == CHAR_HT) && + ((options & PCRE2_EXTENDED_MORE) != 0 || + class_mode_state >= CLASS_MODE_PERL_EXT)) goto CLASS_CONTINUE; /* Handle POSIX class names. Perl allows a negation extension of the @@ -3624,7 +3811,8 @@ while (ptr < ptrend) [.ch.] and [=ch=] ("collating elements") and fault them, as Perl 5.6 and 5.8 do. */ - if (c == CHAR_LEFT_SQUARE_BRACKET && + if (class_depth_m1 >= 0 && + c == CHAR_LEFT_SQUARE_BRACKET && ptrend - ptr >= 3 && (*ptr == CHAR_COLON || *ptr == CHAR_DOT || *ptr == CHAR_EQUALS_SIGN) && @@ -3640,14 +3828,41 @@ while (ptr < ptrend) if (class_range_state == RANGE_STARTED) { + ptr = tempptr + 2; errorcode = ERR50; goto FAILED; } + /* Perl treats a hyphen after a POSIX class as a literal, not the + start of a range. However, it gives a warning in its warning mode + unless the hyphen is the last character in the class. PCRE does not + have a warning mode, so we give an error, because this is likely an + error on the user's part. + + Roll back to the hyphen for the error position. */ + + if (class_range_state == RANGE_FORBID_STARTED) + { + ptr = class_range_forbid_ptr; + errorcode = ERR50; + goto FAILED; + } + + /* Disallow implicit union in Perl extended classes. */ + + if (class_op_state == CLASS_OP_OPERAND && + class_mode_state == CLASS_MODE_PERL_EXT) + { + ptr = tempptr + 2; + errorcode = ERR113; + goto FAILED; + } + if (*ptr != CHAR_COLON) { + ptr = tempptr + 2; errorcode = ERR13; - goto FAILED_BACK; + goto FAILED; } if (*(++ptr) == CHAR_CIRCUMFLEX_ACCENT) @@ -3657,33 +3872,19 @@ while (ptr < ptrend) } posix_class = check_posix_name(ptr, (int)(tempptr - ptr)); + ptr = tempptr + 2; if (posix_class < 0) { errorcode = ERR30; goto FAILED; } - ptr = tempptr + 2; - /* Perl treats a hyphen after a POSIX class as a literal, not the - start of a range. However, it gives a warning in its warning mode - unless the hyphen is the last character in the class. PCRE does not - have a warning mode, so we give an error, because this is likely an - error on the user's part. */ + /* Set "a hyphen is forbidden to be the start of a range". For the '-]' + case, the hyphen is treated as a literal, but for '-1' it is disallowed + (because it would be interpreted as range). */ - if (ptr < ptrend - 1 && *ptr == CHAR_MINUS && - ptr[1] != CHAR_RIGHT_SQUARE_BRACKET) - { - errorcode = ERR50; - goto FAILED; - } - - /* Set "a hyphen is not the start of a range" for the -] case, and also - in case the POSIX class is followed by \E or \Q\E (possibly repeated - - fuzzers do that kind of thing) and *then* a hyphen. This causes that - hyphen to be treated as a literal. I don't think it's worth setting up - special apparatus to do otherwise. */ - - class_range_state = RANGE_NO; + class_range_state = RANGE_FORBID_NO; + class_op_state = CLASS_OP_OPERAND; /* When PCRE2_UCP is set, unless PCRE2_EXTRA_ASCII_POSIX is set, some of the POSIX classes are converted to use Unicode properties \p or \P @@ -3726,56 +3927,344 @@ while (ptr < ptrend) *parsed_pattern++ = posix_class; } - /* Handle potential start of range */ + /* Check for the start of the outermost class, or the start of a nested class. */ - else if (c == CHAR_MINUS && class_range_state >= RANGE_OK_ESCAPED) + else if ((c == CHAR_LEFT_SQUARE_BRACKET && + (class_depth_m1 < 0 || class_mode_state == CLASS_MODE_ALT_EXT || + class_mode_state == CLASS_MODE_PERL_EXT)) || + (c == CHAR_LEFT_PARENTHESIS && + class_mode_state == CLASS_MODE_PERL_EXT)) { - *parsed_pattern++ = (class_range_state == RANGE_OK_LITERAL)? - META_RANGE_LITERAL : META_RANGE_ESCAPED; - class_range_state = RANGE_STARTED; + uint32_t start_c = c; + uint32_t new_class_mode_state; + + /* Update the class mode, if moving into a 'leaf' inside a Perl extended + class. */ + + if (start_c == CHAR_LEFT_SQUARE_BRACKET && + class_mode_state == CLASS_MODE_PERL_EXT && class_depth_m1 >= 0) + new_class_mode_state = CLASS_MODE_PERL_EXT_LEAF; + else + new_class_mode_state = class_mode_state; + + /* Tidy up the other class before starting the nested class. */ + /* -[ beginning a nested class is a literal '-' */ + + if (class_range_state == RANGE_STARTED) + parsed_pattern[-1] = CHAR_MINUS; + + /* Disallow implicit union in Perl extended classes. */ + + if (class_op_state == CLASS_OP_OPERAND && + class_mode_state == CLASS_MODE_PERL_EXT) + { + errorcode = ERR113; + goto FAILED; + } + + /* Validate nesting depth */ + if (class_depth_m1 >= ECLASS_NEST_LIMIT - 1) + { + errorcode = ERR107; + goto FAILED; /* Classes too deeply nested */ + } + + /* Process the character class start. If the first character is '^', set + the negation flag. If the first few characters (either before or after ^) + are \Q\E or \E or space or tab in extended-more mode, we skip them too. + This makes for compatibility with Perl. */ + + negate_class = FALSE; + for (;;) + { + if (ptr >= ptrend) + { + if (start_c == CHAR_LEFT_PARENTHESIS) + errorcode = ERR14; /* Missing terminating ')' */ + else + errorcode = ERR6; /* Missing terminating ']' */ + goto FAILED; + } + + GETCHARINCTEST(c, ptr); + if (new_class_mode_state == CLASS_MODE_PERL_EXT) break; + else if (c == CHAR_BACKSLASH) + { + if (ptr < ptrend && *ptr == CHAR_E) ptr++; + else if (ptrend - ptr >= 3 && + PRIV(strncmp_c8)(ptr, STR_Q STR_BACKSLASH STR_E, 3) == 0) + ptr += 3; + else + break; + } + else if ((c == CHAR_SPACE || c == CHAR_HT) && /* Note: just these two */ + ((options & PCRE2_EXTENDED_MORE) != 0 || + new_class_mode_state >= CLASS_MODE_PERL_EXT)) + continue; + else if (!negate_class && c == CHAR_CIRCUMFLEX_ACCENT) + negate_class = TRUE; + else break; + } + + /* Now the real contents of the class; c has the first "real" character. + Empty classes are permitted only if the option is set, and if it's not + a Perl-extended class. */ + + if (c == CHAR_RIGHT_SQUARE_BRACKET && + (cb->external_options & PCRE2_ALLOW_EMPTY_CLASS) != 0 && + new_class_mode_state < CLASS_MODE_PERL_EXT) + { + PCRE2_ASSERT(start_c == CHAR_LEFT_SQUARE_BRACKET); + + if (class_start != NULL) + { + PCRE2_ASSERT(class_depth_m1 >= 0); + /* Represents that the class is an extended class. */ + *class_start |= CLASS_IS_ECLASS; + class_start = NULL; + } + + *parsed_pattern++ = negate_class? META_CLASS_EMPTY_NOT : META_CLASS_EMPTY; + + /* Leave nesting depth unchanged; but check for zero depth to handle the + very first (top-level) class being empty. */ + if (class_depth_m1 < 0) break; + + class_range_state = RANGE_NO; /* for processing the containing class */ + class_op_state = CLASS_OP_OPERAND; + goto CLASS_CONTINUE; + } + + /* Enter a non-empty class. */ + + if (class_start != NULL) + { + PCRE2_ASSERT(class_depth_m1 >= 0); + /* Represents that the class is an extended class. */ + *class_start |= CLASS_IS_ECLASS; + class_start = NULL; + } + + class_start = parsed_pattern; + *parsed_pattern++ = negate_class? META_CLASS_NOT : META_CLASS; + class_range_state = RANGE_NO; + class_op_state = CLASS_OP_EMPTY; + class_mode_state = new_class_mode_state; + ++class_depth_m1; + if (class_maxdepth_m1 < class_depth_m1) + class_maxdepth_m1 = class_depth_m1; + /* Reset; no op seen yet at new depth. */ + cb->class_op_used[class_depth_m1] = 0; + + /* Implement the special start-of-class literal meaning of ']'. */ + if (c == CHAR_RIGHT_SQUARE_BRACKET && + new_class_mode_state != CLASS_MODE_PERL_EXT) + { + class_range_state = RANGE_OK_LITERAL; + class_op_state = CLASS_OP_OPERAND; + PARSED_LITERAL(c, parsed_pattern); + goto CLASS_CONTINUE; + } + + continue; /* We have already loaded c with the next character */ } - /* Handle a literal character */ + /* Check for the end of the class. */ - else if (c != CHAR_BACKSLASH) + else if (c == CHAR_RIGHT_SQUARE_BRACKET || + (c == CHAR_RIGHT_PARENTHESIS && class_mode_state == CLASS_MODE_PERL_EXT)) { - CLASS_LITERAL: - if (class_range_state == RANGE_STARTED) + /* In Perl extended mode, the ']' can only be used to match the + opening '[', and ')' must match an opening parenthesis. */ + if (class_mode_state == CLASS_MODE_PERL_EXT) { - if (c == parsed_pattern[-2]) /* Optimize one-char range */ - parsed_pattern--; - else if (parsed_pattern[-2] > c) /* Check range is in order */ + if (c == CHAR_RIGHT_SQUARE_BRACKET && class_depth_m1 != 0) { - errorcode = ERR8; + errorcode = ERR14; goto FAILED_BACK; } - else + if (c == CHAR_RIGHT_PARENTHESIS && class_depth_m1 < 1) { - if (!char_is_literal && parsed_pattern[-1] == META_RANGE_LITERAL) - parsed_pattern[-1] = META_RANGE_ESCAPED; - PARSED_LITERAL(c, parsed_pattern); + errorcode = ERR22; + goto FAILED; } - class_range_state = RANGE_NO; } - else /* Potential start of range */ + + /* Check no trailing operator. */ + if (class_op_state == CLASS_OP_OPERATOR) { - class_range_state = char_is_literal? - RANGE_OK_LITERAL : RANGE_OK_ESCAPED; - PARSED_LITERAL(c, parsed_pattern); + errorcode = ERR110; + goto FAILED; } + + /* Check no empty expression for Perl extended expressions. */ + if (class_mode_state == CLASS_MODE_PERL_EXT && + class_op_state == CLASS_OP_EMPTY) + { + errorcode = ERR114; + goto FAILED; + } + + /* -] at the end of a class is a literal '-' */ + if (class_range_state == RANGE_STARTED) + parsed_pattern[-1] = CHAR_MINUS; + + *parsed_pattern++ = META_CLASS_END; + + if (--class_depth_m1 < 0) + { + /* Check for and consume ')' after '(?[...]'. */ + PCRE2_ASSERT(class_mode_state != CLASS_MODE_PERL_EXT_LEAF); + if (class_mode_state == CLASS_MODE_PERL_EXT) + { + if (ptr >= ptrend || *ptr != CHAR_RIGHT_PARENTHESIS) + { + errorcode = ERR115; + goto FAILED; + } + + ptr++; + } + + break; + } + + class_range_state = RANGE_NO; /* for processing the containing class */ + class_op_state = CLASS_OP_OPERAND; + if (class_mode_state == CLASS_MODE_PERL_EXT_LEAF) + class_mode_state = CLASS_MODE_PERL_EXT; + /* The extended class flag has already + been set for the parent class. */ + class_start = NULL; + } + + /* Handle a Perl set binary operator */ + + else if (class_mode_state == CLASS_MODE_PERL_EXT && + (c == CHAR_PLUS || c == CHAR_VERTICAL_LINE || c == CHAR_MINUS || + c == CHAR_AMPERSAND || c == CHAR_CIRCUMFLEX_ACCENT)) + { + /* Check that there was a preceding operand. */ + if (class_op_state != CLASS_OP_OPERAND) + { + errorcode = ERR109; + goto FAILED; + } + + if (class_start != NULL) + { + PCRE2_ASSERT(class_depth_m1 >= 0); + /* Represents that the class is an extended class. */ + *class_start |= CLASS_IS_ECLASS; + class_start = NULL; + } + + PCRE2_ASSERT(class_range_state != RANGE_STARTED && + class_range_state != RANGE_FORBID_STARTED); + + *parsed_pattern++ = c == CHAR_PLUS? META_ECLASS_OR : + c == CHAR_VERTICAL_LINE? META_ECLASS_OR : + c == CHAR_MINUS? META_ECLASS_SUB : + c == CHAR_AMPERSAND? META_ECLASS_AND : + META_ECLASS_XOR; + class_range_state = RANGE_NO; + class_op_state = CLASS_OP_OPERATOR; + } + + /* Handle a Perl set unary operator */ + + else if (class_mode_state == CLASS_MODE_PERL_EXT && + c == CHAR_EXCLAMATION_MARK) + { + /* Check that the "!" has not got a preceding operand (i.e. it's the + start of the class, or follows an operator). */ + if (class_op_state == CLASS_OP_OPERAND) + { + errorcode = ERR113; + goto FAILED; + } + + if (class_start != NULL) + { + PCRE2_ASSERT(class_depth_m1 >= 0); + /* Represents that the class is an extended class. */ + *class_start |= CLASS_IS_ECLASS; + class_start = NULL; + } + + PCRE2_ASSERT(class_range_state != RANGE_STARTED && + class_range_state != RANGE_FORBID_STARTED); + + *parsed_pattern++ = META_ECLASS_NOT; + class_range_state = RANGE_NO; + class_op_state = CLASS_OP_OPERATOR; + } + + /* Handle a UTS#18 set operator */ + + else if (class_mode_state == CLASS_MODE_ALT_EXT && + (c == CHAR_VERTICAL_LINE || c == CHAR_MINUS || + c == CHAR_AMPERSAND || c == CHAR_TILDE) && + ptr < ptrend && *ptr == c) + { + ++ptr; + + /* Check there isn't a triple-repetition. */ + if (ptr < ptrend && *ptr == c) + { + while (ptr < ptrend && *ptr == c) ++ptr; /* Improve error offset. */ + errorcode = ERR108; + goto FAILED; + } + + /* Check for a preceding operand. */ + if (class_op_state != CLASS_OP_OPERAND) + { + errorcode = ERR109; + goto FAILED; + } + + /* Check for mixed precedence. Forbid [A--B&&C]. */ + if (cb->class_op_used[class_depth_m1] != 0 && + cb->class_op_used[class_depth_m1] != (uint8_t)c) + { + errorcode = ERR111; + goto FAILED; + } + + if (class_start != NULL) + { + PCRE2_ASSERT(class_depth_m1 >= 0); + /* Represents that the class is an extended class. */ + *class_start |= CLASS_IS_ECLASS; + class_start = NULL; + } + + /* Dangling '-' before an operator is a literal */ + if (class_range_state == RANGE_STARTED) + parsed_pattern[-1] = CHAR_MINUS; + + *parsed_pattern++ = c == CHAR_VERTICAL_LINE? META_ECLASS_OR : + c == CHAR_MINUS? META_ECLASS_SUB : + c == CHAR_AMPERSAND? META_ECLASS_AND : + META_ECLASS_XOR; + class_range_state = RANGE_NO; + class_op_state = CLASS_OP_OPERATOR; + cb->class_op_used[class_depth_m1] = (uint8_t)c; } /* Handle escapes in a class */ - else + else if (c == CHAR_BACKSLASH) { tempptr = ptr; escape = PRIV(check_escape)(&ptr, ptrend, &c, &errorcode, options, - xoptions, TRUE, cb); + xoptions, cb->bracount, TRUE, cb); if (errorcode != 0) { - if ((xoptions & PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL) == 0) + if ((xoptions & PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL) == 0 || + class_mode_state >= CLASS_MODE_PERL_EXT) goto FAILED; ptr = tempptr; if (ptr >= ptrend) c = CHAR_BACKSLASH; else @@ -3796,6 +4285,11 @@ while (ptr < ptrend) char_is_literal = FALSE; goto CLASS_LITERAL; + case ESC_k: + c = CHAR_k; /* \k is not special in a class, just like \g */ + char_is_literal = FALSE; + goto CLASS_LITERAL; + case ESC_Q: inescq = TRUE; /* Enter literal mode */ goto CLASS_CONTINUE; @@ -3807,29 +4301,10 @@ while (ptr < ptrend) case ESC_R: case ESC_X: errorcode = ERR7; - ptr--; + ptr--; // TODO https://github.com/PCRE2Project/pcre2/issues/549 goto FAILED; - } - /* The second part of a range can be a single-character escape - sequence (detected above), but not any of the other escapes. Perl - treats a hyphen as a literal in such circumstances. However, in Perl's - warning mode, a warning is given, so PCRE now faults it, as it is - almost certainly a mistake on the user's part. */ - - if (class_range_state == RANGE_STARTED) - { - errorcode = ERR50; - goto FAILED; /* Not CLASS_ESCAPE_FAILED; always an error */ - } - - /* Of the remaining escapes, only those that define characters are - allowed in a class. None may start a range. */ - - class_range_state = RANGE_NO; - switch(escape) - { - case ESC_N: + case ESC_N: /* Not permitted by Perl either */ errorcode = ERR71; goto FAILED; @@ -3863,6 +4338,18 @@ while (ptr < ptrend) uint16_t ptype = 0, pdata = 0; if (!get_ucp(&ptr, &negated, &ptype, &pdata, &errorcode, cb)) goto FAILED; + + /* In caseless matching, particular characteristics Lu, Ll, and Lt + get converted to the general characteristic L&. That is, upper, + lower, and title case letters are all conflated. */ + + if ((options & PCRE2_CASELESS) != 0 && ptype == PT_PC && + (pdata == ucp_Lu || pdata == ucp_Ll || pdata == ucp_Lt)) + { + ptype = PT_LAMP; + pdata = 0; + } + if (negated) escape = (escape == ESC_P)? ESC_p : ESC_P; *parsed_pattern++ = META_ESCAPE + escape; *parsed_pattern++ = (ptype << 16) | pdata; @@ -3873,21 +4360,134 @@ while (ptr < ptrend) #endif break; /* End \P and \p */ - default: /* All others are not allowed in a class */ + /* All others are not allowed in a class */ + + default: + PCRE2_DEBUG_UNREACHABLE(); + /* Fall through */ + + case ESC_A: + case ESC_Z: + case ESC_z: + case ESC_G: + case ESC_K: + case ESC_C: errorcode = ERR7; - ptr--; + ptr--; // TODO https://github.com/PCRE2Project/pcre2/issues/549 goto FAILED; } - /* Perl gives a warning unless a following hyphen is the last character - in the class. PCRE throws an error. */ + /* All the switch-cases above which end in "break" describe a set + of characters. None may start a range. */ - if (ptr < ptrend - 1 && *ptr == CHAR_MINUS && - ptr[1] != CHAR_RIGHT_SQUARE_BRACKET) + /* The second part of a range can be a single-character escape + sequence (detected above), but not any of the other escapes. Perl + treats a hyphen as a literal in such circumstances. However, in Perl's + warning mode, a warning is given, so PCRE now faults it, as it is + almost certainly a mistake on the user's part. */ + + if (class_range_state == RANGE_STARTED) { errorcode = ERR50; goto FAILED; } + + /* Perl gives a warning unless the hyphen following a multi-character + escape is the last character in the class. PCRE throws an error. */ + + if (class_range_state == RANGE_FORBID_STARTED) + { + ptr = class_range_forbid_ptr; + errorcode = ERR50; + goto FAILED; + } + + /* Disallow implicit union in Perl extended classes. */ + + if (class_op_state == CLASS_OP_OPERAND && + class_mode_state == CLASS_MODE_PERL_EXT) + { + errorcode = ERR113; + goto FAILED; + } + + class_range_state = RANGE_FORBID_NO; + class_op_state = CLASS_OP_OPERAND; + } + + /* Forbid unescaped literals, and the special meaning of '-', inside a + Perl extended class. */ + + else if (class_mode_state == CLASS_MODE_PERL_EXT) + { + errorcode = ERR116; + goto FAILED; + } + + /* Handle potential start of range */ + + else if (c == CHAR_MINUS && class_range_state >= RANGE_OK_ESCAPED) + { + *parsed_pattern++ = (class_range_state == RANGE_OK_LITERAL)? + META_RANGE_LITERAL : META_RANGE_ESCAPED; + class_range_state = RANGE_STARTED; + } + + /* Handle forbidden start of range */ + + else if (c == CHAR_MINUS && class_range_state == RANGE_FORBID_NO) + { + *parsed_pattern++ = CHAR_MINUS; + class_range_state = RANGE_FORBID_STARTED; + class_range_forbid_ptr = ptr; + } + + /* Handle a literal character */ + + else + { + CLASS_LITERAL: + + /* Disallow implicit union in Perl extended classes. */ + + if (class_op_state == CLASS_OP_OPERAND && + class_mode_state == CLASS_MODE_PERL_EXT) + { + errorcode = ERR113; + goto FAILED; + } + + if (class_range_state == RANGE_STARTED) + { + if (c == parsed_pattern[-2]) /* Optimize one-char range */ + parsed_pattern--; + else if (parsed_pattern[-2] > c) /* Check range is in order */ + { + errorcode = ERR8; + goto FAILED_BACK; // TODO https://github.com/PCRE2Project/pcre2/issues/549 + } + else + { + if (!char_is_literal && parsed_pattern[-1] == META_RANGE_LITERAL) + parsed_pattern[-1] = META_RANGE_ESCAPED; + PARSED_LITERAL(c, parsed_pattern); + } + class_range_state = RANGE_NO; + class_op_state = CLASS_OP_OPERAND; + } + else if (class_range_state == RANGE_FORBID_STARTED) + { + ptr = class_range_forbid_ptr; + errorcode = ERR50; + goto FAILED; + } + else /* Potential start of range */ + { + class_range_state = char_is_literal? + RANGE_OK_LITERAL : RANGE_OK_ESCAPED; + class_op_state = CLASS_OP_OPERAND; + PARSED_LITERAL(c, parsed_pattern); + } } /* Proceed to next thing in the class. */ @@ -3895,22 +4495,18 @@ while (ptr < ptrend) CLASS_CONTINUE: if (ptr >= ptrend) { - errorcode = ERR6; /* Missing terminating ']' */ + if (class_mode_state == CLASS_MODE_PERL_EXT && class_depth_m1 > 0) + errorcode = ERR14; /* Missing terminating ')' */ + if (class_mode_state == CLASS_MODE_ALT_EXT && + class_depth_m1 == 0 && class_maxdepth_m1 == 1) + errorcode = ERR112; /* Missing terminating ']', but we saw '[ [ ]...' */ + else + errorcode = ERR6; /* Missing terminating ']' */ goto FAILED; } GETCHARINCTEST(c, ptr); - if (c == CHAR_RIGHT_SQUARE_BRACKET && !inescq) break; } /* End of class-processing loop */ - /* -] at the end of a class is a literal '-' */ - - if (class_range_state == RANGE_STARTED) - { - parsed_pattern[-1] = CHAR_MINUS; - class_range_state = RANGE_NO; - } - - *parsed_pattern++ = META_CLASS_END; break; /* End of character class */ @@ -3993,8 +4589,7 @@ while (ptr < ptrend) if (prev_expect_cond_assert > 0 && (meta < META_LOOKAHEAD || meta > META_LOOKBEHINDNOT)) { - errorcode = (meta == META_LOOKAHEAD_NA || meta == META_LOOKBEHIND_NA)? - ERR98 : ERR28; /* (Atomic) assertion expected */ + errorcode = ERR28; /* Atomic assertion expected */ goto FAILED; } @@ -4004,6 +4599,7 @@ while (ptr < ptrend) switch(meta) { default: + PCRE2_DEBUG_UNREACHABLE(); errorcode = ERR89; /* Unknown code; should never occur because */ goto FAILED; /* the meta values come from a table above. */ @@ -4019,6 +4615,90 @@ while (ptr < ptrend) case META_LOOKAHEADNOT: goto NEGATIVE_LOOK_AHEAD; + case META_SCS: + if (++ptr >= ptrend) goto UNCLOSED_PARENTHESIS; + + if (*ptr != CHAR_LEFT_PARENTHESIS) + { + errorcode = ERR15; + goto FAILED; + } + + ptr++; + *parsed_pattern++ = META_SCS; + /* Temporary variable, zero in the first iteration. */ + offset = 0; + + for (;;) + { + PCRE2_SIZE next_offset = (PCRE2_SIZE)(ptr - cb->start_pattern); + + /* Handle (scan_substring:([+-]number)... */ + if (read_number(&ptr, ptrend, cb->bracount, MAX_GROUP_NUMBER, ERR61, + &i, &errorcode)) + { + PCRE2_ASSERT(i >= 0); + if (i <= 0) + { + errorcode = ERR15; + goto FAILED; + } + meta = META_SCS_NUMBER; + namelen = (uint32_t)i; + } + else if (errorcode != 0) goto FAILED; /* Number too big */ + else + { + if (ptr >= ptrend) goto UNCLOSED_PARENTHESIS; + + /* Handle (*scan_substring:('name') or (*scan_substring:() */ + if (*ptr == CHAR_LESS_THAN_SIGN) + terminator = CHAR_GREATER_THAN_SIGN; + else if (*ptr == CHAR_APOSTROPHE) + terminator = CHAR_APOSTROPHE; + else + { + errorcode = ERR15; + goto FAILED; + } + + if (!read_name(&ptr, ptrend, utf, terminator, &next_offset, + &name, &namelen, &errorcode, cb)) goto FAILED; + + meta = META_SCS_NAME; + } + + PCRE2_ASSERT(next_offset > 0); + if (offset == 0 || (next_offset - offset) >= 0x10000) + { + *parsed_pattern++ = META_OFFSET; + PUTOFFSET(next_offset, parsed_pattern); + offset = next_offset; + } + + /* The offset is encoded as a relative offset, because for some + inputs such as ",2" in (*scs:(1,2,3)...), we only have space for + two uint32_t values, and an opcode and absolute offset may require + three uint32_t values. */ + *parsed_pattern++ = meta | (uint32_t)(next_offset - offset); + *parsed_pattern++ = namelen; + offset = next_offset; + + if (ptr >= ptrend) goto UNCLOSED_PARENTHESIS; + + if (*ptr == CHAR_RIGHT_PARENTHESIS) break; + + if (*ptr != CHAR_COMMA) + { + errorcode = ERR24; + goto FAILED; + } + + ptr++; + } + ptr++; + goto POST_ASSERTION; + case META_LOOKBEHIND: case META_LOOKBEHINDNOT: case META_LOOKBEHIND_NA: @@ -4050,6 +4730,12 @@ while (ptr < ptrend) top_nest->flags = NSF_ATOMICSR; top_nest->options = options & PARSE_TRACKED_OPTIONS; top_nest->xoptions = xoptions & PARSE_TRACKED_EXTRA_OPTIONS; + +#ifdef PCRE2_DEBUG + /* We'll write out two META_KETs for a single ")" in the input + pattern, so we reserve space for that in our bounds check. */ + parsed_pattern_extra++; +#endif } break; #else /* SUPPORT_UNICODE */ @@ -4109,6 +4795,11 @@ while (ptr < ptrend) verbstartptr = parsed_pattern; okquantifier = (verbs[i].meta == META_ACCEPT); +#ifdef PCRE2_DEBUG + /* Reserve space in our bounds check for optionally wrapping the (*ACCEPT) + with a non-capturing bracket, if there is a following quantifier. */ + if (okquantifier) parsed_pattern_extra += 2; +#endif /* It appears that Perl allows any characters whatsoever, other than a closing parenthesis, to appear in arguments ("names"), so we no longer @@ -4416,11 +5107,7 @@ while (ptr < ptrend) (IS_DIGIT(*ptr))? -1:(int)(cb->bracount), /* + and - are relative */ MAX_GROUP_NUMBER, ERR61, &i, &errorcode)) goto FAILED; - if (i < 0) /* NB (?0) is permitted */ - { - errorcode = ERR15; /* Unknown group */ - goto FAILED_BACK; - } + PCRE2_ASSERT(i >= 0); /* NB (?0) is permitted, represented by i=0 */ if (ptr >= ptrend || *ptr != CHAR_RIGHT_PARENTHESIS) goto UNCLOSED_PARENTHESIS; @@ -4448,6 +5135,12 @@ while (ptr < ptrend) /* ---- Callout with numerical or string argument ---- */ case CHAR_C: + if ((xoptions & PCRE2_EXTRA_NEVER_CALLOUT) != 0) + { + errorcode = ERR103; + goto FAILED; + } + if (++ptr >= ptrend) goto UNCLOSED_PARENTHESIS; /* If the previous item was a condition starting (?(? an assertion, @@ -4535,7 +5228,7 @@ while (ptr < ptrend) parsed_pattern += 3; /* Skip pattern info */ while (ptr < ptrend && IS_DIGIT(*ptr)) { - n = n * 10 + *ptr++ - CHAR_0; + n = n * 10 + (*ptr++ - CHAR_0); if (n > 255) { errorcode = ERR38; @@ -4606,6 +5299,7 @@ while (ptr < ptrend) if (read_number(&ptr, ptrend, cb->bracount, MAX_GROUP_NUMBER, ERR61, &i, &errorcode)) { + PCRE2_ASSERT(i >= 0); if (i <= 0) { errorcode = ERR15; @@ -4756,7 +5450,7 @@ while (ptr < ptrend) goto POST_ASSERTION; case CHAR_ASTERISK: - POSITIVE_NONATOMIC_LOOK_AHEAD: /* Come from (?* */ + POSITIVE_NONATOMIC_LOOK_AHEAD: /* Come from (*napla: */ *parsed_pattern++ = META_LOOKAHEAD_NA; ptr++; goto POST_ASSERTION; @@ -4921,6 +5615,18 @@ while (ptr < ptrend) cb->named_groups[cb->names_found].isdup = (uint16_t)isdupname; cb->names_found++; break; + + + /* ---- Perl extended character class ---- */ + + /* These are of the form '(?[...])'. We handle these via the same parser + that consumes ordinary '[...]' classes, but with a flag set to activate + the extended behaviour. */ + + case CHAR_LEFT_SQUARE_BRACKET: + class_mode_state = CLASS_MODE_PERL_EXT; + c = *ptr++; + goto FROM_PERL_EXTENDED_CLASS; } /* End of (? switch */ break; /* End of ( handling */ @@ -4959,6 +5665,11 @@ while (ptr < ptrend) if ((top_nest->flags & NSF_ATOMICSR) != 0) { *parsed_pattern++ = META_KET; + +#ifdef PCRE2_DEBUG + PCRE2_ASSERT(parsed_pattern_extra > 0); + parsed_pattern_extra--; +#endif } if (top_nest == (nest_save *)(cb->start_workspace)) top_nest = NULL; @@ -4967,7 +5678,7 @@ while (ptr < ptrend) if (nest_depth == 0) /* Unmatched closing parenthesis */ { errorcode = ERR22; - goto FAILED_BACK; + goto FAILED_BACK; // TODO https://github.com/PCRE2Project/pcre2/issues/549 } nest_depth--; *parsed_pattern++ = META_KET; @@ -4983,9 +5694,15 @@ if (inverbname && ptr >= ptrend) goto FAILED; } -/* Manage callout for the final item */ PARSED_END: + +PCRE2_ASSERT((parsed_pattern - parsed_pattern_check) + + (parsed_pattern_extra - parsed_pattern_extra_check) <= + max_parsed_pattern(ptr_check, ptr, utf, options)); + +/* Manage callout for the final item */ + parsed_pattern = manage_callouts(ptr, &previous_callout, auto_callout, parsed_pattern, cb); @@ -5008,6 +5725,7 @@ Otherwise we have unclosed parentheses. */ if (parsed_pattern >= parsed_pattern_end) { + PCRE2_DEBUG_UNREACHABLE(); errorcode = ERR63; /* Internal error (parsed pattern overflow) */ goto FAILED; } @@ -5117,415 +5835,8 @@ for (;;) return code; } } -/* Control never reaches here */ -} - - -#ifdef SUPPORT_UNICODE -/************************************************* -* Get othercase range * -*************************************************/ - -/* This function is passed the start and end of a class range in UCP mode. For -single characters the range may be just one character long. The function -searches up the characters, looking for ranges of characters in the "other" -case. Each call returns the next one, updating the start address. A character -with multiple other cases is returned on its own with a special return value. - -Arguments: - cptr points to starting character value; updated - d end value - ocptr where to put start of othercase range - odptr where to put end of othercase range - restricted TRUE if caseless restriction applies - -Yield: -1 when no more - 0 when a range is returned - >0 the CASESET offset for char with multiple other cases; - for this return, *ocptr contains the original -*/ - -static int -get_othercase_range(uint32_t *cptr, uint32_t d, uint32_t *ocptr, - uint32_t *odptr, BOOL restricted) -{ -uint32_t c, othercase, next; -unsigned int co; - -/* Find the first character that has an other case. If it has multiple other -cases, return its case offset value. When CASELESS_RESTRICT is set, ignore the -multi-case entries that begin with ASCII values. In 32-bit mode, a value -greater than the Unicode maximum ends the range. */ - -for (c = *cptr; c <= d; c++) - { -#if PCRE2_CODE_UNIT_WIDTH == 32 - if (c > MAX_UTF_CODE_POINT) return -1; -#endif - if ((co = UCD_CASESET(c)) != 0 && - (!restricted || PRIV(ucd_caseless_sets)[co] > 127)) - { - *ocptr = c++; /* Character that has the set */ - *cptr = c; /* Rest of input range */ - return (int)co; - } - - /* This is not a valid multiple-case character. Check that the single other - case is different to the original. We don't need to check "restricted" here - because the non-ASCII characters with multiple cases that include an ASCII - character don't have a different "othercase". */ - - if ((othercase = UCD_OTHERCASE(c)) != c) break; - } - -if (c > d) return -1; /* Reached end of range */ - -/* Found a character that has a single other case. Search for the end of the -range, which is either the end of the input range, or a character that has zero -or more than one other cases. */ - -*ocptr = othercase; -next = othercase + 1; - -for (++c; c <= d; c++) - { - if ((co = UCD_CASESET(c)) != 0 || UCD_OTHERCASE(c) != next) break; - next++; - } - -*odptr = next - 1; /* End of othercase range */ -*cptr = c; /* Rest of input range */ -return 0; -} -#endif /* SUPPORT_UNICODE */ - - - -/************************************************* -* Add a character or range to a class (internal) * -*************************************************/ - -/* This function packages up the logic of adding a character or range of -characters to a class. The character values in the arguments will be within the -valid values for the current mode (8-bit, 16-bit, UTF, etc). This function is -called only from within the "add to class" group of functions, some of which -are recursive and mutually recursive. The external entry point is -add_to_class(). - -Arguments: - classbits the bit map for characters < 256 - uchardptr points to the pointer for extra data - options the options bits - xoptions the extra options bits - cb compile data - start start of range character - end end of range character - -Returns: the number of < 256 characters added - the pointer to extra data is updated -*/ - -static unsigned int -add_to_class_internal(uint8_t *classbits, PCRE2_UCHAR **uchardptr, - uint32_t options, uint32_t xoptions, compile_block *cb, uint32_t start, - uint32_t end) -{ -uint32_t c; -uint32_t classbits_end = (end <= 0xff ? end : 0xff); -unsigned int n8 = 0; - -/* If caseless matching is required, scan the range and process alternate -cases. In Unicode, there are 8-bit characters that have alternate cases that -are greater than 255 and vice-versa (though these may be ignored if caseless -restriction is in force). Sometimes we can just extend the original range. */ - -if ((options & PCRE2_CASELESS) != 0) - { -#ifdef SUPPORT_UNICODE - if ((options & (PCRE2_UTF|PCRE2_UCP)) != 0) - { - int rc; - uint32_t oc, od; - - options &= ~PCRE2_CASELESS; /* Remove for recursive calls */ - c = start; - - while ((rc = get_othercase_range(&c, end, &oc, &od, - (xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) != 0)) >= 0) - { - /* Handle a single character that has more than one other case. */ - - if (rc > 0) n8 += add_list_to_class_internal(classbits, uchardptr, - options, xoptions, cb, PRIV(ucd_caseless_sets) + rc, oc); - - /* Do nothing if the other case range is within the original range. */ - - else if (oc >= cb->class_range_start && od <= cb->class_range_end) - continue; - - /* Extend the original range if there is overlap, noting that if oc < c, - we can't have od > end because a subrange is always shorter than the - basic range. Otherwise, use a recursive call to add the additional range. - */ - - else if (oc < start && od >= start - 1) start = oc; /* Extend downwards */ - else if (od > end && oc <= end + 1) - { - end = od; /* Extend upwards */ - if (end > classbits_end) classbits_end = (end <= 0xff ? end : 0xff); - } - else n8 += add_to_class_internal(classbits, uchardptr, options, xoptions, - cb, oc, od); - } - } - else -#else - (void)xoptions; /* Avoid compiler warning */ -#endif /* SUPPORT_UNICODE */ - - /* Not UTF mode */ - - for (c = start; c <= classbits_end; c++) - { - SETBIT(classbits, cb->fcc[c]); - n8++; - } - } - -/* Now handle the originally supplied range. Adjust the final value according -to the bit length - this means that the same lists of (e.g.) horizontal spaces -can be used in all cases. */ - -if ((options & PCRE2_UTF) == 0 && end > MAX_NON_UTF_CHAR) - end = MAX_NON_UTF_CHAR; - -if (start > cb->class_range_start && end < cb->class_range_end) return n8; - -/* Use the bitmap for characters < 256. Otherwise use extra data.*/ - -for (c = start; c <= classbits_end; c++) - { - /* Regardless of start, c will always be <= 255. */ - SETBIT(classbits, c); - n8++; - } - -#ifdef SUPPORT_WIDE_CHARS -if (start <= 0xff) start = 0xff + 1; - -if (end >= start) - { - PCRE2_UCHAR *uchardata = *uchardptr; - -#ifdef SUPPORT_UNICODE - if ((options & PCRE2_UTF) != 0) - { - if (start < end) - { - *uchardata++ = XCL_RANGE; - uchardata += PRIV(ord2utf)(start, uchardata); - uchardata += PRIV(ord2utf)(end, uchardata); - } - else if (start == end) - { - *uchardata++ = XCL_SINGLE; - uchardata += PRIV(ord2utf)(start, uchardata); - } - } - else -#endif /* SUPPORT_UNICODE */ - - /* Without UTF support, character values are constrained by the bit length, - and can only be > 256 for 16-bit and 32-bit libraries. */ - -#if PCRE2_CODE_UNIT_WIDTH == 8 - {} -#else - if (start < end) - { - *uchardata++ = XCL_RANGE; - *uchardata++ = start; - *uchardata++ = end; - } - else if (start == end) - { - *uchardata++ = XCL_SINGLE; - *uchardata++ = start; - } -#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ - *uchardptr = uchardata; /* Updata extra data pointer */ - } -#else /* SUPPORT_WIDE_CHARS */ - (void)uchardptr; /* Avoid compiler warning */ -#endif /* SUPPORT_WIDE_CHARS */ - -return n8; /* Number of 8-bit characters */ -} - - - -#ifdef SUPPORT_UNICODE -/************************************************* -* Add a list of characters to a class (internal) * -*************************************************/ - -/* This function is used for adding a list of case-equivalent characters to a -class when in UTF mode. This function is called only from within -add_to_class_internal(), with which it is mutually recursive. - -Arguments: - classbits the bit map for characters < 256 - uchardptr points to the pointer for extra data - options the options bits - xoptions the extra options bits - cb contains pointers to tables etc. - p points to row of 32-bit values, terminated by NOTACHAR - except character to omit; this is used when adding lists of - case-equivalent characters to avoid including the one we - already know about - -Returns: the number of < 256 characters added - the pointer to extra data is updated -*/ - -static unsigned int -add_list_to_class_internal(uint8_t *classbits, PCRE2_UCHAR **uchardptr, - uint32_t options, uint32_t xoptions, compile_block *cb, const uint32_t *p, - unsigned int except) -{ -unsigned int n8 = 0; -while (p[0] < NOTACHAR) - { - unsigned int n = 0; - if (p[0] != except) - { - while(p[n+1] == p[0] + n + 1) n++; - n8 += add_to_class_internal(classbits, uchardptr, options, xoptions, cb, - p[0], p[n]); - } - p += n + 1; - } -return n8; -} -#endif - - - -/************************************************* -* External entry point for add range to class * -*************************************************/ - -/* This function sets the overall range so that the internal functions can try -to avoid duplication when handling case-independence. - -Arguments: - classbits the bit map for characters < 256 - uchardptr points to the pointer for extra data - options the options bits - xoptions the extra options bits - cb compile data - start start of range character - end end of range character - -Returns: the number of < 256 characters added - the pointer to extra data is updated -*/ - -static unsigned int -add_to_class(uint8_t *classbits, PCRE2_UCHAR **uchardptr, uint32_t options, - uint32_t xoptions, compile_block *cb, uint32_t start, uint32_t end) -{ -cb->class_range_start = start; -cb->class_range_end = end; -return add_to_class_internal(classbits, uchardptr, options, xoptions, cb, - start, end); -} - - -/************************************************* -* External entry point for add list to class * -*************************************************/ - -/* This function is used for adding a list of horizontal or vertical whitespace -characters to a class. The list must be in order so that ranges of characters -can be detected and handled appropriately. This function sets the overall range -so that the internal functions can try to avoid duplication when handling -case-independence. - -Arguments: - classbits the bit map for characters < 256 - uchardptr points to the pointer for extra data - options the options bits - xoptions the extra options bits - cb contains pointers to tables etc. - p points to row of 32-bit values, terminated by NOTACHAR - except character to omit; this is used when adding lists of - case-equivalent characters to avoid including the one we - already know about - -Returns: the number of < 256 characters added - the pointer to extra data is updated -*/ - -static unsigned int -add_list_to_class(uint8_t *classbits, PCRE2_UCHAR **uchardptr, uint32_t options, - uint32_t xoptions, compile_block *cb, const uint32_t *p, unsigned int except) -{ -unsigned int n8 = 0; -while (p[0] < NOTACHAR) - { - unsigned int n = 0; - if (p[0] != except) - { - while(p[n+1] == p[0] + n + 1) n++; - cb->class_range_start = p[0]; - cb->class_range_end = p[n]; - n8 += add_to_class_internal(classbits, uchardptr, options, xoptions, cb, - p[0], p[n]); - } - p += n + 1; - } -return n8; -} - - - -/************************************************* -* Add characters not in a list to a class * -*************************************************/ - -/* This function is used for adding the complement of a list of horizontal or -vertical whitespace to a class. The list must be in order. - -Arguments: - classbits the bit map for characters < 256 - uchardptr points to the pointer for extra data - options the options bits - xoptions the extra options bits - cb contains pointers to tables etc. - p points to row of 32-bit values, terminated by NOTACHAR - -Returns: the number of < 256 characters added - the pointer to extra data is updated -*/ - -static unsigned int -add_not_list_to_class(uint8_t *classbits, PCRE2_UCHAR **uchardptr, - uint32_t options, uint32_t xoptions, compile_block *cb, const uint32_t *p) -{ -BOOL utf = (options & PCRE2_UTF) != 0; -unsigned int n8 = 0; -if (p[0] > 0) - n8 += add_to_class(classbits, uchardptr, options, xoptions, cb, 0, p[0] - 1); -while (p[0] < NOTACHAR) - { - while (p[1] == p[0] + 1) p++; - n8 += add_to_class(classbits, uchardptr, options, xoptions, cb, p[0] + 1, - (p[1] == NOTACHAR) ? (utf ? 0x10ffffu : 0xffffffffu) : p[1] - 1); - p++; - } -return n8; +PCRE2_DEBUG_UNREACHABLE(); /* Control should never reach here */ } @@ -5571,6 +5882,7 @@ have duplicate names. Give an internal error. */ if (i >= cb->names_found) { + PCRE2_DEBUG_UNREACHABLE(); *errorcodeptr = ERR53; cb->erroroffset = name - cb->start_pattern; return FALSE; @@ -5648,12 +5960,13 @@ uint32_t options = *optionsptr; /* May change dynamically */ uint32_t xoptions = *xoptionsptr; /* May change dynamically */ uint32_t firstcu, reqcu; uint32_t zeroreqcu, zerofirstcu; -uint32_t escape; uint32_t *pptr = *pptrptr; uint32_t meta, meta_arg; uint32_t firstcuflags, reqcuflags; uint32_t zeroreqcuflags, zerofirstcuflags; uint32_t req_caseopt, reqvary, tempreqvary; +/* Some opcodes, such as META_SCS_NUMBER or META_SCS_NAME, +depends on the previous value of offset. */ PCRE2_SIZE offset = 0; PCRE2_SIZE length_prevgroup = 0; PCRE2_UCHAR *code = *codeptr; @@ -5667,8 +5980,6 @@ BOOL had_accept = FALSE; BOOL matched_char = FALSE; BOOL previous_matched_char = FALSE; BOOL reset_caseful = FALSE; -const uint8_t *cbits = cb->cbits; -uint8_t classbits[32]; /* We can fish out the UTF setting once and for all into a BOOL, but we must not do this for other options (e.g. PCRE2_EXTENDED) that may change dynamically @@ -5681,17 +5992,6 @@ BOOL ucp = (options & PCRE2_UCP) != 0; BOOL utf = FALSE; #endif -/* Helper variables for OP_XCLASS opcode (for characters > 255). We define -class_uchardata always so that it can be passed to add_to_class() always, -though it will not be used in non-UTF 8-bit cases. This avoids having to supply -alternative calls for the different cases. */ - -PCRE2_UCHAR *class_uchardata; -#ifdef SUPPORT_WIDE_CHARS -BOOL xclass; -PCRE2_UCHAR *class_uchardata_base; -#endif - /* Set up the default and non-default settings for greediness */ greedy_default = ((options & PCRE2_UNGREEDY) != 0); @@ -5721,15 +6021,8 @@ req_caseopt = ((options & PCRE2_CASELESS) != 0)? REQ_CASELESS : 0; for (;; pptr++) { -#ifdef SUPPORT_WIDE_CHARS - BOOL xclass_has_prop; -#endif - BOOL negate_class; - BOOL should_flip_negation; - BOOL match_all_or_no_wide_chars; BOOL possessive_quantifier; BOOL note_group_empty; - int class_has_8bitchar; uint32_t mclength; uint32_t skipunits; uint32_t subreqcu, subfirstcu; @@ -5752,8 +6045,13 @@ for (;; pptr++) if (code > cb->start_workspace + cb->workspace_size - WORK_SIZE_SAFETY_MARGIN) /* Check for overrun */ { - *errorcodeptr = (code >= cb->start_workspace + cb->workspace_size)? - ERR52 : ERR86; + if (code >= cb->start_workspace + cb->workspace_size) + { + PCRE2_DEBUG_UNREACHABLE(); + *errorcodeptr = ERR52; /* Over-ran workspace - internal error */ + } + else + *errorcodeptr = ERR86; return 0; } @@ -5859,13 +6157,24 @@ for (;; pptr++) /* ===================================================================*/ /* Empty character classes are allowed if PCRE2_ALLOW_EMPTY_CLASS is set. Otherwise, an initial ']' is taken as a data character. When empty classes - are allowed, [] must always fail, so generate OP_FAIL, whereas [^] must - match any character, so generate OP_ALLANY. */ + are allowed, [] must generate an empty class - we have no dedicated opcode + to optimise the representation, but it's a rare case (the '(*FAIL)' + construct would be a clearer way for a pattern author to represent a + non-matching branch, but it does have different semantics to '[]' if both + are followed by a quantifier). The empty-negated [^] matches any character, + so is useful: generate OP_ALLANY for this. */ case META_CLASS_EMPTY: case META_CLASS_EMPTY_NOT: matched_char = TRUE; - *code++ = (meta == META_CLASS_EMPTY_NOT)? OP_ALLANY : OP_FAIL; + if (meta == META_CLASS_EMPTY_NOT) *code++ = OP_ALLANY; + else + { + *code++ = OP_CLASS; + memset(code, 0, 32); + code += 32 / sizeof(PCRE2_UCHAR); + } + if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE; zerofirstcu = firstcu; zerofirstcuflags = firstcuflags; @@ -5888,7 +6197,16 @@ for (;; pptr++) case META_CLASS_NOT: case META_CLASS: matched_char = TRUE; - negate_class = meta == META_CLASS_NOT; + + /* Check for complex extended classes and handle them separately. */ + + if ((*pptr & CLASS_IS_ECLASS) != 0) + { + if (!PRIV(compile_class_nested)(options, xoptions, &pptr, &code, + errorcodeptr, cb, lengthptr)) + return 0; + goto CLASS_END_PROCESSING; + } /* We can optimize the case of a single character in a class by generating OP_CHAR or OP_CHARI if it's positive, or OP_NOT or OP_NOTI if it's @@ -5901,9 +6219,6 @@ for (;; pptr++) if (pptr[1] < META_END && pptr[2] == META_CLASS_END) { -#ifdef SUPPORT_UNICODE - uint32_t d; -#endif uint32_t c = pptr[1]; pptr += 2; /* Move on to class end */ @@ -5924,18 +6239,35 @@ for (;; pptr++) /* For caseless UTF or UCP mode, check whether this character has more than one other case. If so, generate a special OP_NOTPROP item instead of OP_NOTI. When restricted by PCRE2_EXTRA_CASELESS_RESTRICT, ignore any - caseless set that starts with an ASCII character. */ + caseless set that starts with an ASCII character. If the character is + affected by the special Turkish rules, hardcode the not-matching + characters using a caseset. */ #ifdef SUPPORT_UNICODE - if ((utf||ucp) && (options & PCRE2_CASELESS) != 0 && - (d = UCD_CASESET(c)) != 0 && - ((xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) == 0 || - PRIV(ucd_caseless_sets)[d] > 127)) + if ((utf||ucp) && (options & PCRE2_CASELESS) != 0) { - *code++ = OP_NOTPROP; - *code++ = PT_CLIST; - *code++ = d; - break; /* We are finished with this class */ + uint32_t caseset; + + if ((xoptions & (PCRE2_EXTRA_TURKISH_CASING|PCRE2_EXTRA_CASELESS_RESTRICT)) == + PCRE2_EXTRA_TURKISH_CASING && + UCD_ANY_I(c)) + { + caseset = PRIV(ucd_turkish_dotted_i_caseset) + (UCD_DOTTED_I(c)? 0 : 3); + } + else if ((caseset = UCD_CASESET(c)) != 0 && + (xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) != 0 && + PRIV(ucd_caseless_sets)[caseset] < 128) + { + caseset = 0; /* Ignore the caseless set if it's restricted. */ + } + + if (caseset != 0) + { + *code++ = OP_NOTPROP; + *code++ = PT_CLIST; + *code++ = caseset; + break; /* We are finished with this class */ + } } #endif /* Char has only one other (usable) case, or UCP not available */ @@ -5950,7 +6282,8 @@ for (;; pptr++) they are case partners. This can be optimized to generate a caseless single character match (which also sets first/required code units if relevant). When casing restrictions apply, ignore a caseless set if both characters - are ASCII. */ + are ASCII. When Turkish casing applies, an 'i' does not match its normal + Unicode "othercase". */ if (meta == META_CLASS && pptr[1] < META_END && pptr[2] < META_END && pptr[3] == META_CLASS_END) @@ -5958,9 +6291,12 @@ for (;; pptr++) uint32_t c = pptr[1]; #ifdef SUPPORT_UNICODE - if (UCD_CASESET(c) == 0 || - ((xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) != 0 && - c < 128 && pptr[2] < 128)) + if ((UCD_CASESET(c) == 0 || + ((xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) != 0 && + c < 128 && pptr[2] < 128)) && + !((xoptions & (PCRE2_EXTRA_TURKISH_CASING|PCRE2_EXTRA_CASELESS_RESTRICT)) == + PCRE2_EXTRA_TURKISH_CASING && + UCD_ANY_I(c))) #endif { uint32_t d; @@ -5990,384 +6326,15 @@ for (;; pptr++) } } - /* If a non-extended class contains a negative special such as \S, we need - to flip the negation flag at the end, so that support for characters > 255 - works correctly (they are all included in the class). An extended class may - need to insert specific matching or non-matching code for wide characters. - */ + /* Now emit the OP_CLASS/OP_NCLASS/OP_XCLASS/OP_ALLANY opcode. */ - should_flip_negation = match_all_or_no_wide_chars = FALSE; + pptr = PRIV(compile_class_not_nested)(options, xoptions, pptr + 1, + &code, meta == META_CLASS_NOT, NULL, + errorcodeptr, cb, lengthptr); + if (pptr == NULL) return 0; + PCRE2_ASSERT(*pptr == META_CLASS_END); - /* Extended class (xclass) will be used when characters > 255 - might match. */ - -#ifdef SUPPORT_WIDE_CHARS - xclass = FALSE; - class_uchardata = code + LINK_SIZE + 2; /* For XCLASS items */ - class_uchardata_base = class_uchardata; /* Save the start */ -#endif - - /* For optimization purposes, we track some properties of the class: - class_has_8bitchar will be non-zero if the class contains at least one - character with a code point less than 256; xclass_has_prop will be TRUE if - Unicode property checks are present in the class. */ - - class_has_8bitchar = 0; -#ifdef SUPPORT_WIDE_CHARS - xclass_has_prop = FALSE; -#endif - - /* Initialize the 256-bit (32-byte) bit map to all zeros. We build the map - in a temporary bit of memory, in case the class contains fewer than two - 8-bit characters because in that case the compiled code doesn't use the bit - map. */ - - memset(classbits, 0, 32 * sizeof(uint8_t)); - - /* Process items until META_CLASS_END is reached. */ - - while ((meta = *(++pptr)) != META_CLASS_END) - { - /* Handle POSIX classes such as [:alpha:] etc. */ - - if (meta == META_POSIX || meta == META_POSIX_NEG) - { - BOOL local_negate = (meta == META_POSIX_NEG); - int posix_class = *(++pptr); - int taboffset, tabopt; - uint8_t pbits[32]; - - should_flip_negation = local_negate; /* Note negative special */ - - /* If matching is caseless, upper and lower are converted to alpha. - This relies on the fact that the class table starts with alpha, - lower, upper as the first 3 entries. */ - - if ((options & PCRE2_CASELESS) != 0 && posix_class <= 2) - posix_class = 0; - - /* When PCRE2_UCP is set, some of the POSIX classes are converted to - different escape sequences that use Unicode properties \p or \P. - Others that are not available via \p or \P have to generate - XCL_PROP/XCL_NOTPROP directly, which is done here. */ - -#ifdef SUPPORT_UNICODE - if ((options & PCRE2_UCP) != 0 && - (xoptions & PCRE2_EXTRA_ASCII_POSIX) == 0) - { - switch(posix_class) - { - case PC_GRAPH: - case PC_PRINT: - case PC_PUNCT: - *class_uchardata++ = local_negate? XCL_NOTPROP : XCL_PROP; - *class_uchardata++ = (PCRE2_UCHAR) - ((posix_class == PC_GRAPH)? PT_PXGRAPH : - (posix_class == PC_PRINT)? PT_PXPRINT : PT_PXPUNCT); - *class_uchardata++ = 0; - xclass_has_prop = TRUE; - goto CONTINUE_CLASS; - - /* For the other POSIX classes (ex: ascii) we are going to - fall through to the non-UCP case and build a bit map for - characters with code points less than 256. However, if we are in - a negated POSIX class, characters with code points greater than - 255 must either all match or all not match, depending on whether - the whole class is not or is negated. For example, for - [[:^ascii:]... they must all match, whereas for [^[:^ascii:]... - they must not. - - In the special case where there are no xclass items, this is - automatically handled by the use of OP_CLASS or OP_NCLASS, but an - explicit range is needed for OP_XCLASS. Setting a flag here - causes the range to be generated later when it is known that - OP_XCLASS is required. In the 8-bit library this is relevant only in - utf mode, since no wide characters can exist otherwise. */ - - default: -#if PCRE2_CODE_UNIT_WIDTH == 8 - if (utf) -#endif - match_all_or_no_wide_chars |= local_negate; - break; - } - } -#endif /* SUPPORT_UNICODE */ - - /* In the non-UCP case, or when UCP makes no difference, we build the - bit map for the POSIX class in a chunk of local store because we may - be adding and subtracting from it, and we don't want to subtract bits - that may be in the main map already. At the end we or the result into - the bit map that is being built. */ - - posix_class *= 3; - - /* Copy in the first table (always present) */ - - memcpy(pbits, cbits + posix_class_maps[posix_class], - 32 * sizeof(uint8_t)); - - /* If there is a second table, add or remove it as required. */ - - taboffset = posix_class_maps[posix_class + 1]; - tabopt = posix_class_maps[posix_class + 2]; - - if (taboffset >= 0) - { - if (tabopt >= 0) - for (int i = 0; i < 32; i++) pbits[i] |= cbits[(int)i + taboffset]; - else - for (int i = 0; i < 32; i++) pbits[i] &= ~cbits[(int)i + taboffset]; - } - - /* Now see if we need to remove any special characters. An option - value of 1 removes vertical space and 2 removes underscore. */ - - if (tabopt < 0) tabopt = -tabopt; - if (tabopt == 1) pbits[1] &= ~0x3c; - else if (tabopt == 2) pbits[11] &= 0x7f; - - /* Add the POSIX table or its complement into the main table that is - being built and we are done. */ - - if (local_negate) - for (int i = 0; i < 32; i++) classbits[i] |= (uint8_t)(~pbits[i]); - else - for (int i = 0; i < 32; i++) classbits[i] |= pbits[i]; - - /* Every class contains at least one < 256 character. */ - - class_has_8bitchar = 1; - goto CONTINUE_CLASS; /* End of POSIX handling */ - } - - /* Other than POSIX classes, the only items we should encounter are - \d-type escapes and literal characters (possibly as ranges). */ - - if (meta == META_BIGVALUE) - { - meta = *(++pptr); - goto CLASS_LITERAL; - } - - /* Any other non-literal must be an escape */ - - if (meta >= META_END) - { - if (META_CODE(meta) != META_ESCAPE) - { -#ifdef DEBUG_SHOW_PARSED - fprintf(stderr, "** Unrecognized parsed pattern item 0x%.8x " - "in character class\n", meta); -#endif - *errorcodeptr = ERR89; /* Internal error - unrecognized. */ - return 0; - } - escape = META_DATA(meta); - - /* Every class contains at least one < 256 character. */ - - class_has_8bitchar++; - - switch(escape) - { - case ESC_d: - for (int i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_digit]; - break; - - case ESC_D: - should_flip_negation = TRUE; - for (int i = 0; i < 32; i++) - classbits[i] |= (uint8_t)(~cbits[i+cbit_digit]); - break; - - case ESC_w: - for (int i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_word]; - break; - - case ESC_W: - should_flip_negation = TRUE; - for (int i = 0; i < 32; i++) - classbits[i] |= (uint8_t)(~cbits[i+cbit_word]); - break; - - /* Perl 5.004 onwards omitted VT from \s, but restored it at Perl - 5.18. Before PCRE 8.34, we had to preserve the VT bit if it was - previously set by something earlier in the character class. - Luckily, the value of CHAR_VT is 0x0b in both ASCII and EBCDIC, so - we could just adjust the appropriate bit. From PCRE 8.34 we no - longer treat \s and \S specially. */ - - case ESC_s: - for (int i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_space]; - break; - - case ESC_S: - should_flip_negation = TRUE; - for (int i = 0; i < 32; i++) - classbits[i] |= (uint8_t)(~cbits[i+cbit_space]); - break; - - /* When adding the horizontal or vertical space lists to a class, or - their complements, disable PCRE2_CASELESS, because it justs wastes - time, and in the "not-x" UTF cases can create unwanted duplicates in - the XCLASS list (provoked by characters that have more than one other - case and by both cases being in the same "not-x" sublist). */ - - case ESC_h: - (void)add_list_to_class(classbits, &class_uchardata, - options & ~PCRE2_CASELESS, xoptions, cb, PRIV(hspace_list), - NOTACHAR); - break; - - case ESC_H: - (void)add_not_list_to_class(classbits, &class_uchardata, - options & ~PCRE2_CASELESS, xoptions, cb, PRIV(hspace_list)); - break; - - case ESC_v: - (void)add_list_to_class(classbits, &class_uchardata, - options & ~PCRE2_CASELESS, xoptions, cb, PRIV(vspace_list), - NOTACHAR); - break; - - case ESC_V: - (void)add_not_list_to_class(classbits, &class_uchardata, - options & ~PCRE2_CASELESS, xoptions, cb, PRIV(vspace_list)); - break; - - /* If Unicode is not supported, \P and \p are not allowed and are - faulted at parse time, so will never appear here. */ - -#ifdef SUPPORT_UNICODE - case ESC_p: - case ESC_P: - { - uint32_t ptype = *(++pptr) >> 16; - uint32_t pdata = *pptr & 0xffff; - *class_uchardata++ = (escape == ESC_p)? XCL_PROP : XCL_NOTPROP; - *class_uchardata++ = ptype; - *class_uchardata++ = pdata; - xclass_has_prop = TRUE; - class_has_8bitchar--; /* Undo! */ - } - break; -#endif - } - - goto CONTINUE_CLASS; - } /* End handling \d-type escapes */ - - /* A literal character may be followed by a range meta. At parse time - there are checks for out-of-order characters, for ranges where the two - characters are equal, and for hyphens that cannot indicate a range. At - this point, therefore, no checking is needed. */ - - else - { - uint32_t c, d; - - CLASS_LITERAL: - c = d = meta; - - /* Remember if \r or \n were explicitly used */ - - if (c == CHAR_CR || c == CHAR_NL) cb->external_flags |= PCRE2_HASCRORLF; - - /* Process a character range */ - - if (pptr[1] == META_RANGE_LITERAL || pptr[1] == META_RANGE_ESCAPED) - { -#ifdef EBCDIC - BOOL range_is_literal = (pptr[1] == META_RANGE_LITERAL); -#endif - pptr += 2; - d = *pptr; - if (d == META_BIGVALUE) d = *(++pptr); - - /* Remember an explicit \r or \n, and add the range to the class. */ - - if (d == CHAR_CR || d == CHAR_NL) cb->external_flags |= PCRE2_HASCRORLF; - - /* In an EBCDIC environment, Perl treats alphabetic ranges specially - because there are holes in the encoding, and simply using the range - A-Z (for example) would include the characters in the holes. This - applies only to literal ranges; [\xC1-\xE9] is different to [A-Z]. */ - -#ifdef EBCDIC - if (range_is_literal && - (cb->ctypes[c] & ctype_letter) != 0 && - (cb->ctypes[d] & ctype_letter) != 0 && - (c <= CHAR_z) == (d <= CHAR_z)) - { - uint32_t uc = (d <= CHAR_z)? 0 : 64; - uint32_t C = c - uc; - uint32_t D = d - uc; - - if (C <= CHAR_i) - { - class_has_8bitchar += - add_to_class(classbits, &class_uchardata, options, xoptions, - cb, C + uc, ((D < CHAR_i)? D : CHAR_i) + uc); - C = CHAR_j; - } - - if (C <= D && C <= CHAR_r) - { - class_has_8bitchar += - add_to_class(classbits, &class_uchardata, options, xoptions, - cb, C + uc, ((D < CHAR_r)? D : CHAR_r) + uc); - C = CHAR_s; - } - - if (C <= D) - { - class_has_8bitchar += - add_to_class(classbits, &class_uchardata, options, xoptions, - cb, C + uc, D + uc); - } - } - else -#endif - /* Not an EBCDIC special range */ - - class_has_8bitchar += add_to_class(classbits, &class_uchardata, - options, xoptions, cb, c, d); - goto CONTINUE_CLASS; /* Go get the next char in the class */ - } /* End of range handling */ - - - /* Handle a single character. */ - - class_has_8bitchar += - add_to_class(classbits, &class_uchardata, options, xoptions, cb, - meta, meta); - } - - /* Continue to the next item in the class. */ - - CONTINUE_CLASS: - -#ifdef SUPPORT_WIDE_CHARS - /* If any wide characters or Unicode properties have been encountered, - set xclass = TRUE. Then, in the pre-compile phase, accumulate the length - of the extra data and reset the pointer. This is so that very large - classes that contain a zillion wide characters or Unicode property tests - do not overwrite the workspace (which is on the stack). */ - - if (class_uchardata > class_uchardata_base) - { - xclass = TRUE; - if (lengthptr != NULL) - { - *lengthptr += class_uchardata - class_uchardata_base; - class_uchardata = class_uchardata_base; - } - } -#endif - - continue; /* Needed to avoid error when not supporting wide chars */ - } /* End of main class-processing loop */ + CLASS_END_PROCESSING: /* If this class is the first thing in the branch, there can be no first char setting, whatever the repeat count. Any reqcu setting must remain @@ -6378,108 +6345,6 @@ for (;; pptr++) zerofirstcuflags = firstcuflags; zeroreqcu = reqcu; zeroreqcuflags = reqcuflags; - - /* If there are characters with values > 255, or Unicode property settings - (\p or \P), we have to compile an extended class, with its own opcode, - unless there were no property settings and there was a negated special such - as \S in the class, and PCRE2_UCP is not set, because in that case all - characters > 255 are in or not in the class, so any that were explicitly - given as well can be ignored. - - In the UCP case, if certain negated POSIX classes (ex: [:^ascii:]) were - were present in a class, we either have to match or not match all wide - characters (depending on whether the whole class is or is not negated). - This requirement is indicated by match_all_or_no_wide_chars being true. - We do this by including an explicit range, which works in both cases. - This applies only in UTF and 16-bit and 32-bit non-UTF modes, since there - cannot be any wide characters in 8-bit non-UTF mode. - - When there *are* properties in a positive UTF-8 or any 16-bit or 32_bit - class where \S etc is present without PCRE2_UCP, causing an extended class - to be compiled, we make sure that all characters > 255 are included by - forcing match_all_or_no_wide_chars to be true. - - If, when generating an xclass, there are no characters < 256, we can omit - the bitmap in the actual compiled code. */ - -#ifdef SUPPORT_WIDE_CHARS /* Defined for 16/32 bits, or 8-bit with Unicode */ - if (xclass && ( -#ifdef SUPPORT_UNICODE - (options & PCRE2_UCP) != 0 || -#endif - xclass_has_prop || !should_flip_negation)) - { - if (match_all_or_no_wide_chars || ( -#if PCRE2_CODE_UNIT_WIDTH == 8 - utf && -#endif - should_flip_negation && !negate_class && (options & PCRE2_UCP) == 0)) - { - *class_uchardata++ = XCL_RANGE; - if (utf) /* Will always be utf in the 8-bit library */ - { - class_uchardata += PRIV(ord2utf)(0x100, class_uchardata); - class_uchardata += PRIV(ord2utf)(MAX_UTF_CODE_POINT, class_uchardata); - } - else /* Can only happen for the 16-bit & 32-bit libraries */ - { -#if PCRE2_CODE_UNIT_WIDTH == 16 - *class_uchardata++ = 0x100; - *class_uchardata++ = 0xffffu; -#elif PCRE2_CODE_UNIT_WIDTH == 32 - *class_uchardata++ = 0x100; - *class_uchardata++ = 0xffffffffu; -#endif - } - } - *class_uchardata++ = XCL_END; /* Marks the end of extra data */ - *code++ = OP_XCLASS; - code += LINK_SIZE; - *code = negate_class? XCL_NOT:0; - if (xclass_has_prop) *code |= XCL_HASPROP; - - /* If the map is required, move up the extra data to make room for it; - otherwise just move the code pointer to the end of the extra data. */ - - if (class_has_8bitchar > 0) - { - *code++ |= XCL_MAP; - (void)memmove(code + (32 / sizeof(PCRE2_UCHAR)), code, - CU2BYTES(class_uchardata - code)); - if (negate_class && !xclass_has_prop) - { - /* Using 255 ^ instead of ~ avoids clang sanitize warning. */ - for (int i = 0; i < 32; i++) classbits[i] = 255 ^ classbits[i]; - } - memcpy(code, classbits, 32); - code = class_uchardata + (32 / sizeof(PCRE2_UCHAR)); - } - else code = class_uchardata; - - /* Now fill in the complete length of the item */ - - PUT(previous, 1, (int)(code - previous)); - break; /* End of class handling */ - } -#endif /* SUPPORT_WIDE_CHARS */ - - /* If there are no characters > 255, or they are all to be included or - excluded, set the opcode to OP_CLASS or OP_NCLASS, depending on whether the - whole class was negated and whether there were negative specials such as \S - (non-UCP) in the class. Then copy the 32-byte map into the code vector, - negating it if necessary. */ - - *code++ = (negate_class == should_flip_negation) ? OP_CLASS : OP_NCLASS; - if (lengthptr == NULL) /* Save time in the pre-compile phase */ - { - if (negate_class) - { - /* Using 255 ^ instead of ~ avoids clang sanitize warning. */ - for (int i = 0; i < 32; i++) classbits[i] = 255 ^ classbits[i]; - } - memcpy(code, classbits, 32); - } - code += 32 / sizeof(PCRE2_UCHAR); break; /* End of class processing */ @@ -6585,6 +6450,15 @@ for (;; pptr++) req_caseopt = ((options & PCRE2_CASELESS) != 0)? REQ_CASELESS : 0; break; + case META_OFFSET: + GETPLUSOFFSET(offset, pptr); + break; + + case META_SCS: + bravalue = OP_ASSERT_SCS; + cb->assert_depth += 1; + goto GROUP_PROCESS; + /* ===================================================================*/ /* Handle conditional subpatterns. The case of (?(Rdigits) is ambiguous @@ -6596,6 +6470,7 @@ for (;; pptr++) case META_COND_RNUMBER: /* (?(Rdigits) */ case META_COND_NAME: /* (?(name) or (?'name') or ?() */ case META_COND_RNAME: /* (?(R&name) - test for recursion */ + case META_SCS_NAME: /* Name of scan substring */ bravalue = OP_COND; { int count, index; @@ -6604,7 +6479,10 @@ for (;; pptr++) named_group *ng = cb->named_groups; uint32_t length = *(++pptr); - GETPLUSOFFSET(offset, pptr); + if (meta == META_SCS_NAME) + offset += meta_arg; + else + GETPLUSOFFSET(offset, pptr); name = cb->start_pattern + offset; /* In the first pass, the names generated in the pre-pass are available, @@ -6614,34 +6492,21 @@ for (;; pptr++) numerical group. */ for (i = 0; i < cb->names_found; i++, ng++) - { if (length == ng->length && - PRIV(strncmp)(name, ng->name, length) == 0) - { - if (!ng->isdup) - { - code[1+LINK_SIZE] = (meta == META_COND_RNAME)? OP_RREF : OP_CREF; - PUT2(code, 2+LINK_SIZE, ng->number); - if (ng->number > cb->top_backref) cb->top_backref = ng->number; - skipunits = 1+IMM2_SIZE; - goto GROUP_PROCESS_NOTE_EMPTY; - } - break; /* Found a duplicated name */ - } - } - - /* If the name was not found we have a bad reference, unless we are - dealing with R, which is treated as a recursion test by number. - */ + PRIV(strncmp)(name, ng->name, length) == 0) break; if (i >= cb->names_found) { + /* If the name was not found we have a bad reference, unless we are + dealing with R, which is treated as a recursion test by + number. */ + groupnumber = 0; if (meta == META_COND_RNUMBER) { for (i = 1; i < length; i++) { - groupnumber = groupnumber * 10 + name[i] - CHAR_0; + groupnumber = groupnumber * 10 + (name[i] - CHAR_0); if (groupnumber > MAX_GROUP_NUMBER) { *errorcodeptr = ERR61; @@ -6668,11 +6533,26 @@ for (;; pptr++) skipunits = 1+IMM2_SIZE; goto GROUP_PROCESS_NOTE_EMPTY; } + else if (!ng->isdup) + { + /* Otherwise found a duplicated name */ + if (ng->number > cb->top_backref) cb->top_backref = ng->number; - /* A duplicated name was found. Note that if an R name is found - (META_COND_RNUMBER), it is a reference test, not a recursion test. */ + if (meta == META_SCS_NAME) + { + code[0] = OP_CREF; + PUT2(code, 1, ng->number); + code += 1+IMM2_SIZE; + break; + } - code[1+LINK_SIZE] = (meta == META_COND_RNAME)? OP_RREF : OP_CREF; + code[1+LINK_SIZE] = (meta == META_COND_RNAME)? OP_RREF : OP_CREF; + PUT2(code, 2+LINK_SIZE, ng->number); + skipunits = 1+IMM2_SIZE; + if (meta != META_SCS_NAME) goto GROUP_PROCESS_NOTE_EMPTY; + cb->assert_depth += 1; + goto GROUP_PROCESS; + } /* We have a duplicated name. In the compile pass we have to search the main table in order to get the index and count values. */ @@ -6682,14 +6562,27 @@ for (;; pptr++) if (lengthptr == NULL && !find_dupname_details(name, length, &index, &count, errorcodeptr, cb)) return 0; - /* Add one to the opcode to change CREF/RREF into DNCREF/DNRREF and - insert appropriate data values. */ + if (meta == META_SCS_NAME) + { + code[0] = OP_DNCREF; + PUT2(code, 1, index); + PUT2(code, 1+IMM2_SIZE, count); + code += 1+2*IMM2_SIZE; + break; + } - code[1+LINK_SIZE]++; + /* A duplicated name was found. Note that if an R name is found + (META_COND_RNUMBER), it is a reference test, not a recursion test. */ + + code[1+LINK_SIZE] = (meta == META_COND_RNAME)? OP_DNRREF : OP_DNCREF; + + /* Insert appropriate data values. */ skipunits = 1+2*IMM2_SIZE; PUT2(code, 2+LINK_SIZE, index); PUT2(code, 2+LINK_SIZE+IMM2_SIZE, count); } + + PCRE2_ASSERT(meta != META_SCS_NAME); goto GROUP_PROCESS_NOTE_EMPTY; /* The DEFINE condition is always false. Its internal groups may never @@ -6706,8 +6599,13 @@ for (;; pptr++) /* Conditional test of a group's being set. */ case META_COND_NUMBER: + case META_SCS_NUMBER: bravalue = OP_COND; - GETPLUSOFFSET(offset, pptr); + if (meta == META_SCS_NUMBER) + offset += meta_arg; + else + GETPLUSOFFSET(offset, pptr); + groupnumber = *(++pptr); if (groupnumber > cb->bracount) { @@ -6716,7 +6614,17 @@ for (;; pptr++) return 0; } if (groupnumber > cb->top_backref) cb->top_backref = groupnumber; - offset -= 2; /* Point at initial ( for too many branches error */ + + if (meta == META_SCS_NUMBER) + { + code[0] = OP_CREF; + PUT2(code, 1, groupnumber); + code += 1+IMM2_SIZE; + break; + } + + /* Point at initial ( for too many branches error */ + offset -= 2; code[1+LINK_SIZE] = OP_CREF; skipunits = 1+IMM2_SIZE; PUT2(code, 2+LINK_SIZE, groupnumber); @@ -6854,7 +6762,7 @@ for (;; pptr++) /* If we've just compiled an assertion, pop the assert depth. */ - if (bravalue >= OP_ASSERT && bravalue <= OP_ASSERTBACK_NA) + if (bravalue >= OP_ASSERT && bravalue <= OP_ASSERT_SCS) cb->assert_depth -= 1; /* At the end of compiling, code is still pointing to the start of the @@ -7093,6 +7001,11 @@ for (;; pptr++) *code++ = ((options & PCRE2_CASELESS) != 0)? OP_DNREFI : OP_DNREF; PUT2INC(code, 0, index); PUT2INC(code, 0, count); + if ((options & PCRE2_CASELESS) != 0) + *code++ = (((xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) != 0)? + REFI_FLAG_CASELESS_RESTRICT : 0) | + (((xoptions & PCRE2_EXTRA_TURKISH_CASING) != 0)? + REFI_FLAG_TURKISH_CASING : 0); } break; @@ -7212,7 +7125,6 @@ for (;; pptr++) single-char opcodes. */ reqvary = (repeat_min == repeat_max)? 0 : REQ_VARY; - op_type = 0; /* Adjust first and required code units for a zero repeat. */ @@ -7253,6 +7165,7 @@ for (;; pptr++) /* Save start of previous item, in case we have to move it up in order to insert something before it, and remember what it was. */ + PCRE2_ASSERT(previous != NULL); tempcode = previous; op_previous = *previous; @@ -7312,6 +7225,7 @@ for (;; pptr++) #ifdef SUPPORT_WIDE_CHARS case OP_XCLASS: + case OP_ECLASS: #endif case OP_CLASS: case OP_NCLASS: @@ -7342,14 +7256,6 @@ for (;; pptr++) } break; - /* If previous is OP_FAIL, it was generated by an empty class [] - (PCRE2_ALLOW_EMPTY_CLASS is set). The other ways in which OP_FAIL can be - generated, that is by (*FAIL) or (?!), disallow a quantifier at parse - time. We can just ignore this repeat. */ - - case OP_FAIL: - goto END_REPEAT; - /* Prior to 10.30, repeated recursions were wrapped in OP_ONCE brackets because pcre2_match() could not handle backtracking into recursively called groups. Now that this backtracking is available, we no longer need @@ -7433,6 +7339,7 @@ for (;; pptr++) case OP_ASSERTBACK: case OP_ASSERTBACK_NOT: case OP_ASSERTBACK_NA: + case OP_ASSERT_SCS: case OP_ONCE: case OP_SCRIPT_RUN: case OP_BRA: @@ -7549,7 +7456,8 @@ for (;; pptr++) if (lengthptr != NULL) { PCRE2_SIZE delta; - if (PRIV(ckd_smul)(&delta, repeat_min - 1, length_prevgroup) || + if (PRIV(ckd_smul)(&delta, repeat_min - 1, + (int)length_prevgroup) || OFLOW_MAX - *lengthptr < delta) { *errorcodeptr = ERR20; @@ -7599,7 +7507,7 @@ for (;; pptr++) { PCRE2_SIZE delta; if (PRIV(ckd_smul)(&delta, repeat_max, - length_prevgroup + 1 + 2 + 2*LINK_SIZE) || + (int)length_prevgroup + 1 + 2 + 2*LINK_SIZE) || OFLOW_MAX + (2 + 2*LINK_SIZE) - *lengthptr < delta) { *errorcodeptr = ERR20; @@ -7756,9 +7664,10 @@ for (;; pptr++) here because it just makes it horribly messy. */ default: - if (op_previous >= OP_EODN) /* Not a character type - internal error */ + if (op_previous >= OP_EODN || op_previous <= OP_WORD_BOUNDARY) { - *errorcodeptr = ERR10; + PCRE2_DEBUG_UNREACHABLE(); + *errorcodeptr = ERR10; /* Not a character type - internal error */ return 0; } else @@ -7778,7 +7687,8 @@ for (;; pptr++) } else { - /* Come here from just above with a character in mcbuffer/mclength. */ + /* Come here from just above with a character in mcbuffer/mclength. + You must also set op_type before the jump. */ OUTPUT_SINGLE_REPEAT: prop_type = prop_value = -1; } @@ -7961,6 +7871,7 @@ for (;; pptr++) #ifdef SUPPORT_WIDE_CHARS case OP_XCLASS: + case OP_ECLASS: tempcode += GET(tempcode, 1); break; #endif @@ -8045,6 +7956,11 @@ for (;; pptr++) if (firstcuflags == REQ_UNSET) zerofirstcuflags = firstcuflags = REQ_NONE; *code++ = ((options & PCRE2_CASELESS) != 0)? OP_REFI : OP_REF; PUT2INC(code, 0, meta_arg); + if ((options & PCRE2_CASELESS) != 0) + *code++ = (((xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) != 0)? + REFI_FLAG_CASELESS_RESTRICT : 0) | + (((xoptions & PCRE2_EXTRA_TURKISH_CASING) != 0)? + REFI_FLAG_TURKISH_CASING : 0); /* Update the map of back references, and keep the highest one. We could do this in parse_regex() for numerical back references, but not @@ -8137,12 +8053,30 @@ for (;; pptr++) uint32_t ptype = *(++pptr) >> 16; uint32_t pdata = *pptr & 0xffff; - /* The special case of \p{Any} is compiled to OP_ALLANY so as to benefit - from the auto-anchoring code. */ + /* In caseless matching, particular characteristics Lu, Ll, and Lt get + converted to the general characteristic L&. That is, upper, lower, and + title case letters are all conflated. */ - if (meta_arg == ESC_p && ptype == PT_ANY) + if ((options & PCRE2_CASELESS) != 0 && ptype == PT_PC && + (pdata == ucp_Lu || pdata == ucp_Ll || pdata == ucp_Lt)) { - *code++ = OP_ALLANY; + ptype = PT_LAMP; + pdata = 0; + } + + /* The special case of \p{Any} is compiled to OP_ALLANY and \P{Any} + is compiled to [] so as to benefit from the auto-anchoring code. */ + + if (ptype == PT_ANY) + { + if (meta_arg == ESC_P) + { + *code++ = OP_CLASS; + memset(code, 0, 32); + code += 32 / sizeof(PCRE2_UCHAR); + } + else + *code++ = OP_ALLANY; } else { @@ -8211,9 +8145,7 @@ for (;; pptr++) default: if (meta >= META_END) { -#ifdef DEBUG_SHOW_PARSED - fprintf(stderr, "** Unrecognized parsed pattern item 0x%.8x\n", *pptr); -#endif + PCRE2_DEBUG_UNREACHABLE(); *errorcodeptr = ERR89; /* Internal error - unrecognized. */ return 0; } @@ -8229,15 +8161,28 @@ for (;; pptr++) /* For caseless UTF or UCP mode, check whether this character has more than one other case. If so, generate a special OP_PROP item instead of OP_CHARI. When casing restrictions apply, ignore caseless sets that start with an - ASCII character. */ + ASCII character. If the character is affected by the special Turkish rules, + hardcode the matching characters using a caseset. */ #ifdef SUPPORT_UNICODE if ((utf||ucp) && (options & PCRE2_CASELESS) != 0) { - uint32_t caseset = UCD_CASESET(meta); - if (caseset != 0 && - ((xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) == 0 || - PRIV(ucd_caseless_sets)[caseset] > 127)) + uint32_t caseset; + + if ((xoptions & (PCRE2_EXTRA_TURKISH_CASING|PCRE2_EXTRA_CASELESS_RESTRICT)) == + PCRE2_EXTRA_TURKISH_CASING && + UCD_ANY_I(meta)) + { + caseset = PRIV(ucd_turkish_dotted_i_caseset) + (UCD_DOTTED_I(meta)? 0 : 3); + } + else if ((caseset = UCD_CASESET(meta)) != 0 && + (xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) != 0 && + PRIV(ucd_caseless_sets)[caseset] < 128) + { + caseset = 0; /* Ignore the caseless set if it's restricted. */ + } + + if (caseset != 0) { *code++ = OP_PROP; *code++ = PT_CLIST; @@ -8335,7 +8280,8 @@ for (;; pptr++) } /* End of big switch */ } /* End of big loop */ -/* Control never reaches here. */ +PCRE2_DEBUG_UNREACHABLE(); /* Control should never reach here */ +return 0; /* Avoid compiler warnings */ } @@ -8391,8 +8337,6 @@ uint32_t firstcu, reqcu; uint32_t lookbehindlength; uint32_t lookbehindminlength; uint32_t firstcuflags, reqcuflags; -uint32_t branchfirstcu, branchreqcu; -uint32_t branchfirstcuflags, branchreqcuflags; PCRE2_SIZE length; branch_chain bc; @@ -8461,9 +8405,11 @@ code += 1 + LINK_SIZE + skipunits; for (;;) { int branch_return; + uint32_t branchfirstcu = 0, branchreqcu = 0; + uint32_t branchfirstcuflags = REQ_UNSET, branchreqcuflags = REQ_UNSET; /* Insert OP_REVERSE or OP_VREVERSE if this is a lookbehind assertion. There - is only a single mimimum length for the whole assertion. When the mimimum + is only a single minimum length for the whole assertion. When the minimum length is LOOKBEHIND_MAX it means that all branches are of fixed length, though not necessarily the same length. In this case, the original OP_REVERSE can be used. It can also be used if a branch in a variable length lookbehind @@ -8575,10 +8521,10 @@ for (;;) { if (lengthptr == NULL) { - PCRE2_SIZE branch_length = code - last_branch; + uint32_t branch_length = (uint32_t)(code - last_branch); do { - PCRE2_SIZE prev_length = GET(last_branch, 1); + uint32_t prev_length = GET(last_branch, 1); PUT(last_branch, 1, branch_length); branch_length = prev_length; last_branch -= branch_length; @@ -8589,7 +8535,7 @@ for (;;) /* Fill in the ket */ *code = OP_KET; - PUT(code, 1, (int)(code - start_bracket)); + PUT(code, 1, (uint32_t)(code - start_bracket)); code += 1 + LINK_SIZE; /* Set values to pass back */ @@ -8640,7 +8586,9 @@ for (;;) lookbehindlength = META_DATA(*pptr); pptr++; } -/* Control never reaches here */ + +PCRE2_DEBUG_UNREACHABLE(); /* Control should never reach here */ +return 0; /* Avoid compiler warnings */ } @@ -8683,13 +8631,14 @@ Arguments: cb points to the compile data block atomcount atomic group level inassert TRUE if in an assertion + dotstar_anchor TRUE if automatic anchoring optimization is enabled Returns: TRUE or FALSE */ static BOOL is_anchored(PCRE2_SPTR code, uint32_t bracket_map, compile_block *cb, - int atomcount, BOOL inassert) + int atomcount, BOOL inassert, BOOL dotstar_anchor) { do { PCRE2_SPTR scode = first_significant_code( @@ -8701,7 +8650,7 @@ do { if (op == OP_BRA || op == OP_BRAPOS || op == OP_SBRA || op == OP_SBRAPOS) { - if (!is_anchored(scode, bracket_map, cb, atomcount, inassert)) + if (!is_anchored(scode, bracket_map, cb, atomcount, inassert, dotstar_anchor)) return FALSE; } @@ -8712,14 +8661,14 @@ do { { int n = GET2(scode, 1+LINK_SIZE); uint32_t new_map = bracket_map | ((n < 32)? (1u << n) : 1); - if (!is_anchored(scode, new_map, cb, atomcount, inassert)) return FALSE; + if (!is_anchored(scode, new_map, cb, atomcount, inassert, dotstar_anchor)) return FALSE; } /* Positive forward assertion */ else if (op == OP_ASSERT || op == OP_ASSERT_NA) { - if (!is_anchored(scode, bracket_map, cb, atomcount, TRUE)) return FALSE; + if (!is_anchored(scode, bracket_map, cb, atomcount, TRUE, dotstar_anchor)) return FALSE; } /* Condition. If there is no second branch, it can't be anchored. */ @@ -8727,7 +8676,7 @@ do { else if (op == OP_COND || op == OP_SCOND) { if (scode[GET(scode,1)] != OP_ALT) return FALSE; - if (!is_anchored(scode, bracket_map, cb, atomcount, inassert)) + if (!is_anchored(scode, bracket_map, cb, atomcount, inassert, dotstar_anchor)) return FALSE; } @@ -8735,7 +8684,7 @@ do { else if (op == OP_ONCE) { - if (!is_anchored(scode, bracket_map, cb, atomcount + 1, inassert)) + if (!is_anchored(scode, bracket_map, cb, atomcount + 1, inassert, dotstar_anchor)) return FALSE; } @@ -8750,8 +8699,7 @@ do { op == OP_TYPEPOSSTAR)) { if (scode[1] != OP_ALLANY || (bracket_map & cb->backref_map) != 0 || - atomcount > 0 || cb->had_pruneorskip || inassert || - (cb->external_options & PCRE2_NO_DOTSTAR_ANCHOR) != 0) + atomcount > 0 || cb->had_pruneorskip || inassert || !dotstar_anchor) return FALSE; } @@ -8788,13 +8736,14 @@ Arguments: cb points to the compile data atomcount atomic group level inassert TRUE if in an assertion + dotstar_anchor TRUE if automatic anchoring optimization is enabled Returns: TRUE or FALSE */ static BOOL is_startline(PCRE2_SPTR code, unsigned int bracket_map, compile_block *cb, - int atomcount, BOOL inassert) + int atomcount, BOOL inassert, BOOL dotstar_anchor) { do { PCRE2_SPTR scode = first_significant_code( @@ -8825,7 +8774,8 @@ do { return FALSE; default: /* Assertion */ - if (!is_startline(scode, bracket_map, cb, atomcount, TRUE)) return FALSE; + if (!is_startline(scode, bracket_map, cb, atomcount, TRUE, dotstar_anchor)) + return FALSE; do scode += GET(scode, 1); while (*scode == OP_ALT); scode += 1 + LINK_SIZE; break; @@ -8839,7 +8789,7 @@ do { if (op == OP_BRA || op == OP_BRAPOS || op == OP_SBRA || op == OP_SBRAPOS) { - if (!is_startline(scode, bracket_map, cb, atomcount, inassert)) + if (!is_startline(scode, bracket_map, cb, atomcount, inassert, dotstar_anchor)) return FALSE; } @@ -8850,14 +8800,15 @@ do { { int n = GET2(scode, 1+LINK_SIZE); unsigned int new_map = bracket_map | ((n < 32)? (1u << n) : 1); - if (!is_startline(scode, new_map, cb, atomcount, inassert)) return FALSE; + if (!is_startline(scode, new_map, cb, atomcount, inassert, dotstar_anchor)) + return FALSE; } /* Positive forward assertions */ else if (op == OP_ASSERT || op == OP_ASSERT_NA) { - if (!is_startline(scode, bracket_map, cb, atomcount, TRUE)) + if (!is_startline(scode, bracket_map, cb, atomcount, TRUE, dotstar_anchor)) return FALSE; } @@ -8865,7 +8816,7 @@ do { else if (op == OP_ONCE) { - if (!is_startline(scode, bracket_map, cb, atomcount + 1, inassert)) + if (!is_startline(scode, bracket_map, cb, atomcount + 1, inassert, dotstar_anchor)) return FALSE; } @@ -8879,8 +8830,7 @@ do { else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR || op == OP_TYPEPOSSTAR) { if (scode[1] != OP_ANY || (bracket_map & cb->backref_map) != 0 || - atomcount > 0 || cb->had_pruneorskip || inassert || - (cb->external_options & PCRE2_NO_DOTSTAR_ANCHOR) != 0) + atomcount > 0 || cb->had_pruneorskip || inassert || !dotstar_anchor) return FALSE; } @@ -8914,8 +8864,8 @@ Arguments: Returns: pointer to the opcode for OP_RECURSE, or NULL if not found */ -static PCRE2_SPTR -find_recurse(PCRE2_SPTR code, BOOL utf) +static PCRE2_UCHAR * +find_recurse(PCRE2_UCHAR *code, BOOL utf) { for (;;) { @@ -8924,12 +8874,13 @@ for (;;) if (c == OP_RECURSE) return code; /* XCLASS is used for classes that cannot be represented just by a bit map. - This includes negated single high-valued characters. CALLOUT_STR is used for - callouts with string arguments. In both cases the length in the table is + This includes negated single high-valued characters. ECLASS is used for + classes that use set operations internally. CALLOUT_STR is used for + callouts with string arguments. In each case the length in the table is zero; the actual length is stored in the compiled code. */ - if (c == OP_XCLASS) code += GET(code, 1); - else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE); + if (c == OP_XCLASS || c == OP_ECLASS) code += GET(code, 1); + else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE); /* Otherwise, we can get the item's length from the table, except that for repeated character types, we have to test for \p and \P, which have an extra @@ -9259,9 +9210,12 @@ for (;; pptr++) if (meta < META_END) continue; /* Literal */ break; - /* This should never occur. */ - case META_END: + + /* The parsed regex is malformed; we have reached the end and did + not find the end of the construct which we are skipping over. */ + + PCRE2_DEBUG_UNREACHABLE(); return NULL; /* The data for these items is variable in length. */ @@ -9270,19 +9224,9 @@ for (;; pptr++) if (META_DATA(*pptr) >= 10) pptr += SIZEOFFSET; break; - case META_ESCAPE: /* A few escapes are followed by data items. */ - switch (META_DATA(*pptr)) - { - case ESC_P: - case ESC_p: - pptr += 1; - break; - - case ESC_g: - case ESC_k: - pptr += 1 + SIZEOFFSET; - break; - } + case META_ESCAPE: + if (*pptr - META_ESCAPE == ESC_P || *pptr - META_ESCAPE == ESC_p) + pptr += 1; /* Skip prop data */ break; case META_MARK: /* Add the length of the name. */ @@ -9308,6 +9252,7 @@ for (;; pptr++) case META_COND_RNAME: case META_COND_RNUMBER: case META_COND_VERSION: + case META_SCS: case META_LOOKAHEAD: case META_LOOKAHEADNOT: case META_LOOKAHEAD_NA: @@ -9335,8 +9280,8 @@ for (;; pptr++) if (meta >= sizeof(meta_extra_lengths)) return NULL; pptr += meta_extra_lengths[meta]; } -/* Control never reaches here */ -return pptr; + +PCRE2_UNREACHABLE(); /* Control never reaches here */ } @@ -9467,10 +9412,10 @@ for (;; pptr++) parsed_recurse_check *r; uint32_t *gptr, *gptrend; uint32_t escape; + uint32_t min, max; uint32_t group = 0; uint32_t itemlength = 0; uint32_t itemminlength = 0; - uint32_t min, max; if (*pptr < META_END) { @@ -9569,6 +9514,7 @@ for (;; pptr++) case META_LOOKAHEAD: case META_LOOKAHEADNOT: case META_LOOKAHEAD_NA: + case META_SCS: *errcodeptr = check_lookbehinds(pptr + 1, &pptr, recurses, cb, lcptr); if (*errcodeptr != 0) return -1; @@ -9600,7 +9546,9 @@ for (;; pptr++) break; /* A nested lookbehind does not contribute any length to this lookbehind, - but must itself be checked and have its lengths set. */ + but must itself be checked and have its lengths set. Note that + set_lookbehind_lengths() updates pptr, leaving it pointing to the final ket + of the group, so no need to update it here. */ case META_LOOKBEHIND: case META_LOOKBEHINDNOT: @@ -9836,7 +9784,8 @@ EXIT: return branchlength; PARSED_SKIP_FAILED: -*errcodeptr = ERR90; +PCRE2_DEBUG_UNREACHABLE(); +*errcodeptr = ERR90; /* Unhandled META code - internal error */ return -1; } @@ -9908,7 +9857,7 @@ do *bptr |= branchlength; /* branchlength never more than 65535 */ bptr = *pptrptr; } -while (*bptr == META_ALT); +while (META_CODE(*bptr) == META_ALT); /* If any branch is of variable length, the whole lookbehind is of variable length. If the maximum length of any branch exceeds the maximum for variable @@ -9920,7 +9869,7 @@ possibly different) length. */ if (variable) { gbptr[1] = minlength; - if ((uint32_t)maxlength > cb->max_varlookbehind) + if ((PCRE2_SIZE)maxlength > cb->max_varlookbehind) { *errcodeptr = ERR100; cb->erroroffset = offset; @@ -9929,8 +9878,6 @@ if (variable) } else gbptr[1] = LOOKBEHIND_MAX; - -gbptr[1] = variable? minlength : LOOKBEHIND_MAX; return TRUE; } @@ -9976,11 +9923,18 @@ for (; *pptr != META_END; pptr++) switch (META_CODE(*pptr)) { default: + + /* The following erroroffset is a bogus but safe value. This branch should + be avoided by providing a proper implementation for all supported cases + below. */ + + PCRE2_DEBUG_UNREACHABLE(); + cb->erroroffset = 0; return ERR70; /* Unrecognized meta code */ case META_ESCAPE: if (*pptr - META_ESCAPE == ESC_P || *pptr - META_ESCAPE == ESC_p) - pptr += 1; + pptr += 1; /* Skip prop data */ break; case META_KET: @@ -9994,6 +9948,7 @@ for (; *pptr != META_END; pptr++) case META_ATOMIC: case META_CAPTURE: case META_COND_ASSERT: + case META_SCS: case META_LOOKAHEAD: case META_LOOKAHEADNOT: case META_LOOKAHEAD_NA: @@ -10031,6 +9986,7 @@ for (; *pptr != META_END; pptr++) case META_THEN: break; + case META_OFFSET: case META_RECURSE: pptr += SIZEOFFSET; break; @@ -10065,6 +10021,8 @@ for (; *pptr != META_END; pptr++) case META_BIGVALUE: case META_POSIX: case META_POSIX_NEG: + case META_SCS_NAME: + case META_SCS_NUMBER: pptr += 1; break; @@ -10087,6 +10045,9 @@ for (; *pptr != META_END; pptr++) pptr += 1 + pptr[1]; break; + /* Note that set_lookbehind_lengths() updates pptr, leaving it pointing to + the final ket of the group, so no need to update it here. */ + case META_LOOKBEHIND: case META_LOOKBEHINDNOT: case META_LOOKBEHIND_NA: @@ -10133,19 +10094,19 @@ compile_block cb; /* "Static" compile-time data */ const uint8_t *tables; /* Char tables base pointer */ PCRE2_UCHAR *code; /* Current pointer in compiled code */ -PCRE2_SPTR codestart; /* Start of compiled code */ +PCRE2_UCHAR * codestart; /* Start of compiled code */ PCRE2_SPTR ptr; /* Current pointer in pattern */ uint32_t *pptr; /* Current pointer in parsed pattern */ PCRE2_SIZE length = 1; /* Allow for final END opcode */ PCRE2_SIZE usedlength; /* Actual length used */ PCRE2_SIZE re_blocksize; /* Size of memory block */ -PCRE2_SIZE big32count = 0; /* 32-bit literals >= 0x80000000 */ PCRE2_SIZE parsed_size_needed; /* Needed for parsed pattern */ uint32_t firstcuflags, reqcuflags; /* Type of first/req code unit */ uint32_t firstcu, reqcu; /* Value of first/req code unit */ uint32_t setflags = 0; /* NL and BSR set flags */ +uint32_t xoptions; /* Flags from context, modified */ uint32_t skipatstart; /* When checking (*UTF) etc */ uint32_t limit_heap = UINT32_MAX; @@ -10159,6 +10120,10 @@ int regexrc; /* Return from compile */ uint32_t i; /* Local loop counter */ +/* Enable all optimizations by default. */ +uint32_t optim_flags = ccontext != NULL ? ccontext->optimization_flags : + PCRE2_OPTIMIZATION_ALL; + /* Comments at the head of this file explain about these variables. */ uint32_t stack_groupinfo[GROUPINFO_DEFAULT_SIZE]; @@ -10222,6 +10187,7 @@ PCRE2_ZERO_TERMINATED. Check for an overlong pattern. */ if ((zero_terminated = (patlen == PCRE2_ZERO_TERMINATED))) patlen = PRIV(strlen)(pattern); +(void)zero_terminated; /* Silence compiler; only used if Valgrind enabled */ if (patlen > ccontext->max_pattern_length) { @@ -10229,6 +10195,18 @@ if (patlen > ccontext->max_pattern_length) return NULL; } +/* Optimization flags in 'options' can override those in the compile context. +This is because some options to disable optimizations were added before the +optimization flags word existed, and we need to continue supporting them +for backwards compatibility. */ + +if ((options & PCRE2_NO_AUTO_POSSESS) != 0) + optim_flags &= ~PCRE2_OPTIM_AUTO_POSSESS; +if ((options & PCRE2_NO_DOTSTAR_ANCHOR) != 0) + optim_flags &= ~PCRE2_OPTIM_DOTSTAR_ANCHOR; +if ((options & PCRE2_NO_START_OPTIMIZE) != 0) + optim_flags &= ~PCRE2_OPTIM_START_OPTIMIZE; + /* From here on, all returns from this function should end up going via the EXIT label. */ @@ -10267,6 +10245,11 @@ cb.start_code = cworkspace; cb.start_pattern = pattern; cb.start_workspace = cworkspace; cb.workspace_size = COMPILE_WORK_SIZE; +#ifdef SUPPORT_WIDE_CHARS +cb.cranges = NULL; +cb.next_cranges = NULL; +cb.char_lists_size = 0; +#endif /* Maximum back reference and backref bitmap. The bitmap records up to 31 back references to help in deciding whether (.*) can be treated as anchored or not. @@ -10300,6 +10283,7 @@ non-zero-terminated patterns. */ if (zero_terminated) VALGRIND_MAKE_MEM_NOACCESS(pattern + patlen, CU2BYTES(1)); #endif +xoptions = ccontext->extra_options; ptr = pattern; skipatstart = 0; @@ -10311,13 +10295,13 @@ if ((options & PCRE2_LITERAL) == 0) { for (i = 0; i < sizeof(pso_list)/sizeof(pso); i++) { - uint32_t c, pp; const pso *p = pso_list + i; if (patlen - skipatstart - 2 >= p->length && - PRIV(strncmp_c8)(ptr + skipatstart + 2, (char *)(p->name), - p->length) == 0) + PRIV(strncmp_c8)(ptr + skipatstart + 2, p->name, p->length) == 0) { + uint32_t c, pp; + skipatstart += p->length + 2; switch(p->type) { @@ -10325,6 +10309,10 @@ if ((options & PCRE2_LITERAL) == 0) cb.external_options |= p->value; break; + case PSO_XOPT: + xoptions |= p->value; + break; + case PSO_FLG: setflags |= p->value; break; @@ -10344,18 +10332,12 @@ if ((options & PCRE2_LITERAL) == 0) case PSO_LIMH: c = 0; pp = skipatstart; - if (!IS_DIGIT(ptr[pp])) - { - errorcode = ERR60; - ptr += pp; - goto HAD_EARLY_ERROR; - } - while (IS_DIGIT(ptr[pp])) + while (pp < patlen && IS_DIGIT(ptr[pp])) { if (c > UINT32_MAX / 10 - 1) break; /* Integer overflow */ c = c*10 + (ptr[pp++] - CHAR_0); } - if (ptr[pp++] != CHAR_RIGHT_PARENTHESIS) + if (pp >= patlen || pp == skipatstart || ptr[pp] != CHAR_RIGHT_PARENTHESIS) { errorcode = ERR60; ptr += pp; @@ -10364,14 +10346,45 @@ if ((options & PCRE2_LITERAL) == 0) if (p->type == PSO_LIMH) limit_heap = c; else if (p->type == PSO_LIMM) limit_match = c; else limit_depth = c; - skipatstart += pp - skipatstart; + skipatstart = ++pp; break; + + case PSO_OPTMZ: + optim_flags &= ~(p->value); + + /* For backward compatibility the three original VERBs to disable + optimizations need to also update the corresponding bit in the + external options. */ + + switch(p->value) + { + case PCRE2_OPTIM_AUTO_POSSESS: + cb.external_options |= PCRE2_NO_AUTO_POSSESS; + break; + + case PCRE2_OPTIM_DOTSTAR_ANCHOR: + cb.external_options |= PCRE2_NO_DOTSTAR_ANCHOR; + break; + + case PCRE2_OPTIM_START_OPTIMIZE: + cb.external_options |= PCRE2_NO_START_OPTIMIZE; + break; + } + + break; + + default: + /* All values in the enum need an explicit entry for this switch + but until a better way to prevent coding mistakes is invented keep + a catch all that triggers a debug build assert as a failsafe */ + PCRE2_DEBUG_UNREACHABLE(); } break; /* Out of the table scan loop */ } } if (i >= sizeof(pso_list)/sizeof(pso)) break; /* Out of pso loop */ } + PCRE2_ASSERT(skipatstart <= patlen); } /* End of pattern-start options; advance to start of real regex. */ @@ -10423,6 +10436,31 @@ if (ucp && (cb.external_options & PCRE2_NEVER_UCP) != 0) goto HAD_EARLY_ERROR; } +/* PCRE2_EXTRA_TURKISH_CASING checks */ + +if ((xoptions & PCRE2_EXTRA_TURKISH_CASING) != 0) + { + if (!utf && !ucp) + { + errorcode = ERR104; + goto HAD_EARLY_ERROR; + } + +#if PCRE2_CODE_UNIT_WIDTH == 8 + if (!utf) + { + errorcode = ERR105; + goto HAD_EARLY_ERROR; + } +#endif + + if ((xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) != 0) + { + errorcode = ERR106; + goto HAD_EARLY_ERROR; + } + } + /* Process the BSR setting. */ if (bsr == 0) bsr = ccontext->bsr_convention; @@ -10463,6 +10501,7 @@ switch(newline) break; default: + PCRE2_DEBUG_UNREACHABLE(); errorcode = ERR56; goto HAD_EARLY_ERROR; } @@ -10471,42 +10510,31 @@ switch(newline) their numerical equivalents, so that this information is always available for the remaining processing. (2) At the same time, parse the pattern and put a processed version into the parsed_pattern vector. This has escapes interpreted -and comments removed (amongst other things). +and comments removed (amongst other things). */ -In all but one case, when PCRE2_AUTO_CALLOUT is not set, the number of unsigned -32-bit ints in the parsed pattern is bounded by the length of the pattern plus -one (for the terminator) plus four if PCRE2_EXTRA_WORD or PCRE2_EXTRA_LINE is -set. The exceptional case is when running in 32-bit, non-UTF mode, when literal -characters greater than META_END (0x80000000) have to be coded as two units. In -this case, therefore, we scan the pattern to check for such values. */ +/* Ensure that the parsed pattern buffer is big enough. For many smaller +patterns the vector on the stack (which was set up above) can be used. */ -#if PCRE2_CODE_UNIT_WIDTH == 32 -if (!utf) - { - PCRE2_SPTR p; - for (p = ptr; p < cb.end_pattern; p++) if (*p >= META_END) big32count++; - } -#endif +parsed_size_needed = max_parsed_pattern(ptr, cb.end_pattern, utf, options); -/* Ensure that the parsed pattern buffer is big enough. When PCRE2_AUTO_CALLOUT -is set we have to assume a numerical callout (4 elements) for each character -plus one at the end. This is overkill, but memory is plentiful these days. For -many smaller patterns the vector on the stack (which was set up above) can be -used. */ - -parsed_size_needed = patlen - skipatstart + big32count; +/* Allow for 2x uint32_t at the start and 2 at the end, for +PCRE2_EXTRA_MATCH_WORD or PCRE2_EXTRA_MATCH_LINE (which are exclusive). */ if ((ccontext->extra_options & (PCRE2_EXTRA_MATCH_WORD|PCRE2_EXTRA_MATCH_LINE)) != 0) parsed_size_needed += 4; -if ((options & PCRE2_AUTO_CALLOUT) != 0) - parsed_size_needed = (parsed_size_needed + 1) * 5; +/* When PCRE2_AUTO_CALLOUT is set we allow for one callout at the end. */ -if (parsed_size_needed >= PARSED_PATTERN_DEFAULT_SIZE) +if ((options & PCRE2_AUTO_CALLOUT) != 0) + parsed_size_needed += 4; + +parsed_size_needed += 1; /* For the final META_END */ + +if (parsed_size_needed > PARSED_PATTERN_DEFAULT_SIZE) { uint32_t *heap_parsed_pattern = ccontext->memctl.malloc( - (parsed_size_needed + 1) * sizeof(uint32_t), ccontext->memctl.memory_data); + parsed_size_needed * sizeof(uint32_t), ccontext->memctl.memory_data); if (heap_parsed_pattern == NULL) { *errorptr = ERR21; @@ -10514,11 +10542,11 @@ if (parsed_size_needed >= PARSED_PATTERN_DEFAULT_SIZE) } cb.parsed_pattern = heap_parsed_pattern; } -cb.parsed_pattern_end = cb.parsed_pattern + parsed_size_needed + 1; +cb.parsed_pattern_end = cb.parsed_pattern + parsed_size_needed; /* Do the parsing scan. */ -errorcode = parse_regex(ptr, cb.external_options, &has_lookbehind, &cb); +errorcode = parse_regex(ptr, cb.external_options, xoptions, &has_lookbehind, &cb); if (errorcode != 0) goto HAD_CB_ERROR; /* If there are any lookbehinds, scan the parsed pattern to figure out their @@ -10587,7 +10615,7 @@ pptr = cb.parsed_pattern; code = cworkspace; *code = OP_BRA; -(void)compile_regex(cb.external_options, ccontext->extra_options, &code, &pptr, +(void)compile_regex(cb.external_options, xoptions, &code, &pptr, &errorcode, 0, &firstcu, &firstcuflags, &reqcu, &reqcuflags, NULL, NULL, &cb, &length); @@ -10595,20 +10623,47 @@ if (errorcode != 0) goto HAD_CB_ERROR; /* Offset is in cb.erroroffset */ /* This should be caught in compile_regex(), but just in case... */ +#if defined SUPPORT_WIDE_CHARS +PCRE2_ASSERT((cb.char_lists_size & 0x3) == 0); +if (length > MAX_PATTERN_SIZE || + MAX_PATTERN_SIZE - length < (cb.char_lists_size / sizeof(PCRE2_UCHAR))) +#else if (length > MAX_PATTERN_SIZE) +#endif { errorcode = ERR20; goto HAD_CB_ERROR; } -/* Compute the size of, and then get and initialize, the data block for storing -the compiled pattern and names table. Integer overflow should no longer be -possible because nowadays we limit the maximum value of cb.names_found and -cb.name_entry_size. */ +/* Compute the size of, then, if not too large, get and initialize the data +block for storing the compiled pattern and names table. Integer overflow should +no longer be possible because nowadays we limit the maximum value of +cb.names_found and cb.name_entry_size. */ -re_blocksize = sizeof(pcre2_real_code) + - CU2BYTES(length + - (PCRE2_SIZE)cb.names_found * (PCRE2_SIZE)cb.name_entry_size); +re_blocksize = + CU2BYTES((PCRE2_SIZE)cb.names_found * (PCRE2_SIZE)cb.name_entry_size); + +#if defined SUPPORT_WIDE_CHARS +if (cb.char_lists_size != 0) + { +#if PCRE2_CODE_UNIT_WIDTH != 32 + /* Align to 32 bit first. This ensures the + allocated area will also be 32 bit aligned. */ + re_blocksize = (PCRE2_SIZE)CLIST_ALIGN_TO(re_blocksize, sizeof(uint32_t)); +#endif + re_blocksize += cb.char_lists_size; + } +#endif + +re_blocksize += CU2BYTES(length); + +if (re_blocksize > ccontext->max_pattern_compiled_length) + { + errorcode = ERR101; + goto HAD_CB_ERROR; + } + +re_blocksize += sizeof(pcre2_real_code); re = (pcre2_real_code *) ccontext->memctl.malloc(re_blocksize, ccontext->memctl.memory_data); if (re == NULL) @@ -10629,10 +10684,11 @@ re->tables = tables; re->executable_jit = NULL; memset(re->start_bitmap, 0, 32 * sizeof(uint8_t)); re->blocksize = re_blocksize; +re->code_start = re_blocksize - CU2BYTES(length); re->magic_number = MAGIC_NUMBER; re->compile_options = options; re->overall_options = cb.external_options; -re->extra_options = ccontext->extra_options; +re->extra_options = xoptions; re->flags = PCRE2_CODE_UNIT_WIDTH/8 | cb.external_flags | setflags; re->limit_heap = limit_heap; re->limit_match = limit_match; @@ -10647,12 +10703,12 @@ re->top_bracket = 0; re->top_backref = 0; re->name_entry_size = cb.name_entry_size; re->name_count = cb.names_found; +re->optimization_flags = optim_flags; /* The basic block is immediately followed by the name table, and the compiled code follows after that. */ -codestart = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code)) + - re->name_entry_size * re->name_count; +codestart = (PCRE2_UCHAR *)((uint8_t *)re + re->code_start); /* Update the compile data block for the actual compile. The starting points of the name/number translation table and of the code are passed around in the @@ -10667,6 +10723,10 @@ cb.start_code = codestart; cb.req_varyopt = 0; cb.had_accept = FALSE; cb.had_pruneorskip = FALSE; +#ifdef SUPPORT_WIDE_CHARS +cb.char_lists_size = 0; +#endif + /* If any named groups were found, create the name/number table from the list created in the pre-pass. */ @@ -10685,7 +10745,7 @@ of the function here. */ pptr = cb.parsed_pattern; code = (PCRE2_UCHAR *)codestart; *code = OP_BRA; -regexrc = compile_regex(re->overall_options, ccontext->extra_options, &code, +regexrc = compile_regex(re->overall_options, re->extra_options, &code, &pptr, &errorcode, 0, &firstcu, &firstcuflags, &reqcu, &reqcuflags, NULL, NULL, &cb, NULL); if (regexrc < 0) re->flags |= PCRE2_MATCH_EMPTY; @@ -10707,7 +10767,12 @@ memory as unaddressable, so that any out-of-bound reads can be detected. */ *code++ = OP_END; usedlength = code - codestart; -if (usedlength > length) errorcode = ERR23; else +if (usedlength > length) + { + PCRE2_DEBUG_UNREACHABLE(); + errorcode = ERR23; /* Overflow of code block - internal error */ + } +else { re->blocksize -= CU2BYTES(length - usedlength); #ifdef SUPPORT_VALGRIND @@ -10729,9 +10794,9 @@ if (errorcode == 0 && cb.had_recurse) int start = RSCAN_CACHE_SIZE; recurse_cache rc[RSCAN_CACHE_SIZE]; - for (rcode = (PCRE2_UCHAR *)find_recurse(codestart, utf); + for (rcode = find_recurse(codestart, utf); rcode != NULL; - rcode = (PCRE2_UCHAR *)find_recurse(rcode + 1 + LINK_SIZE, utf)) + rcode = find_recurse(rcode + 1 + LINK_SIZE, utf)) { int p, groupnumber; @@ -10760,6 +10825,7 @@ if (errorcode == 0 && cb.had_recurse) rgroup = PRIV(find_bracket)(search_from, utf, groupnumber); if (rgroup == NULL) { + PCRE2_DEBUG_UNREACHABLE(); errorcode = ERR53; break; } @@ -10770,7 +10836,7 @@ if (errorcode == 0 && cb.had_recurse) } } - PUT(rcode, 1, rgroup - codestart); + PUT(rcode, 1, (uint32_t)(rgroup - codestart)); } } @@ -10789,10 +10855,14 @@ used in this code because at least one compiler gives a warning about loss of "const" attribute if the cast (PCRE2_UCHAR *)codestart is used directly in the function call. */ -if (errorcode == 0 && (re->overall_options & PCRE2_NO_AUTO_POSSESS) == 0) +if (errorcode == 0 && (optim_flags & PCRE2_OPTIM_AUTO_POSSESS) != 0) { PCRE2_UCHAR *temp = (PCRE2_UCHAR *)codestart; - if (PRIV(auto_possessify)(temp, &cb) != 0) errorcode = ERR80; + if (PRIV(auto_possessify)(temp, &cb) != 0) + { + PCRE2_DEBUG_UNREACHABLE(); + errorcode = ERR80; + } } /* Failed to compile, or error while post-processing. */ @@ -10805,18 +10875,21 @@ or anything else, such as starting with non-atomic .* when DOTALL is set and there are no occurrences of *PRUNE or *SKIP (though there is an option to disable this case). */ -if ((re->overall_options & PCRE2_ANCHORED) == 0 && - is_anchored(codestart, 0, &cb, 0, FALSE)) - re->overall_options |= PCRE2_ANCHORED; +if ((re->overall_options & PCRE2_ANCHORED) == 0) + { + BOOL dotstar_anchor = ((optim_flags & PCRE2_OPTIM_DOTSTAR_ANCHOR) != 0); + if (is_anchored(codestart, 0, &cb, 0, FALSE, dotstar_anchor)) + re->overall_options |= PCRE2_ANCHORED; + } /* Set up the first code unit or startline flag, the required code unit, and -then study the pattern. This code need not be obeyed if PCRE2_NO_START_OPTIMIZE -is set, as the data it would create will not be used. Note that a first code +then study the pattern. This code need not be obeyed if PCRE2_OPTIM_START_OPTIMIZE +is disabled, as the data it would create will not be used. Note that a first code unit (but not the startline flag) is useful for anchored patterns because it can still give a quick "no match" and also avoid searching for a last code unit. */ -if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0) +if ((optim_flags & PCRE2_OPTIM_START_OPTIMIZE) != 0) { int minminlength = 0; /* For minimal minlength from first/required CU */ @@ -10824,8 +10897,19 @@ if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0) (these are not saved during the compile because they can cause conflicts with actual literals that follow). */ - if (firstcuflags >= REQ_NONE) - firstcu = find_firstassertedcu(codestart, &firstcuflags, 0); + if (firstcuflags >= REQ_NONE) { + uint32_t assertedcuflags = 0; + uint32_t assertedcu = find_firstassertedcu(codestart, &assertedcuflags, 0); + /* It would be wrong to use the asserted first code unit as `firstcu` for + * regexes which are able to match a 1-character string (e.g. /(?=a)b?a/) + * For that example, if we set both firstcu and reqcu to 'a', it would mean + * the subject string needs to be at least 2 characters long, which is wrong. + * With more analysis, we would be able to set firstcu in more cases. */ + if (assertedcuflags < REQ_NONE && assertedcu != reqcu) { + firstcu = assertedcu; + firstcuflags = assertedcuflags; + } + } /* Save the data for a first code unit. The existence of one means the minimum length must be at least 1. */ @@ -10846,8 +10930,8 @@ if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0) } /* The first code unit is > 128 in UTF or UCP mode, or > 255 otherwise. - In 8-bit UTF mode, codepoints in the range 128-255 are introductory code - points and cannot have another case, but if UCP is set they may do. */ + In 8-bit UTF mode, code units in the range 128-255 are introductory code + units and cannot have another case, but if UCP is set they may do. */ #ifdef SUPPORT_UNICODE #if PCRE2_CODE_UNIT_WIDTH == 8 @@ -10868,9 +10952,12 @@ if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0) non-DOTALL matches when *PRUNE and SKIP are not present. (There is an option that disables this case.) */ - else if ((re->overall_options & PCRE2_ANCHORED) == 0 && - is_startline(codestart, 0, &cb, 0, FALSE)) - re->flags |= PCRE2_STARTLINE; + else if ((re->overall_options & PCRE2_ANCHORED) == 0) + { + BOOL dotstar_anchor = ((optim_flags & PCRE2_OPTIM_DOTSTAR_ANCHOR) != 0); + if (is_startline(codestart, 0, &cb, 0, FALSE, dotstar_anchor)) + re->flags |= PCRE2_STARTLINE; + } /* Handle the "required code unit", if one is set. In the UTF case we can increment the minimum minimum length only if we are sure this really is a @@ -10930,6 +11017,7 @@ if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0) if (PRIV(study)(re) != 0) { + PCRE2_DEBUG_UNREACHABLE(); errorcode = ERR31; goto HAD_CB_ERROR; } @@ -10952,6 +11040,10 @@ version of the pattern, free it before returning. Also free the list of named groups if a larger one had to be obtained, and likewise the group information vector. */ +#ifdef SUPPORT_UNICODE +PCRE2_ASSERT(cb.cranges == NULL); +#endif + EXIT: #ifdef SUPPORT_VALGRIND if (zero_terminated) VALGRIND_MAKE_MEM_DEFINED(pattern + patlen, CU2BYTES(1)); @@ -10962,6 +11054,7 @@ if (cb.named_group_list_size > NAMED_GROUP_LIST_SIZE) ccontext->memctl.free((void *)cb.named_groups, ccontext->memctl.memory_data); if (cb.groupinfo != stack_groupinfo) ccontext->memctl.free((void *)cb.groupinfo, ccontext->memctl.memory_data); + return re; /* Will be NULL after an error */ /* Errors discovered in parse_regex() set the offset value in the compile @@ -10974,12 +11067,28 @@ HAD_CB_ERROR: ptr = pattern + cb.erroroffset; HAD_EARLY_ERROR: +PCRE2_ASSERT(ptr >= pattern); /* Ensure we don't return invalid erroroffset */ +PCRE2_ASSERT(ptr <= (pattern + patlen)); *erroroffset = ptr - pattern; HAD_ERROR: *errorptr = errorcode; pcre2_code_free(re); re = NULL; + +#ifdef SUPPORT_WIDE_CHARS +if (cb.cranges != NULL) + { + class_ranges* cranges = cb.cranges; + do + { + class_ranges* next_cranges = cranges->next; + cb.cx->memctl.free(cranges, cb.cx->memctl.memory_data); + cranges = next_cranges; + } + while (cranges != NULL); + } +#endif goto EXIT; } diff --git a/libpcre/src/pcre2_compile.h b/libpcre/src/pcre2_compile.h new file mode 100644 index 000000000..c8bf610be --- /dev/null +++ b/libpcre/src/pcre2_compile.h @@ -0,0 +1,280 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE2 is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + +#ifndef PCRE2_COMPILE_H_IDEMPOTENT_GUARD +#define PCRE2_COMPILE_H_IDEMPOTENT_GUARD + +#include "pcre2_internal.h" + +/* Compile time error code numbers. They are given names so that they can more +easily be tracked. When a new number is added, the tables called eint1 and +eint2 in pcre2posix.c may need to be updated, and a new error text must be +added to compile_error_texts in pcre2_error.c. Also, the error codes in +pcre2.h.in must be updated - their values are exactly 100 greater than these +values. */ + +enum { ERR0 = COMPILE_ERROR_BASE, + ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9, ERR10, + ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19, ERR20, + ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR28, ERR29, ERR30, + ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39, ERR40, + ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, ERR50, + ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, ERR60, + ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70, + ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, ERR80, + ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERR88, ERR89, ERR90, + ERR91, ERR92, ERR93, ERR94, ERR95, ERR96, ERR97, ERR98, ERR99, ERR100, + ERR101,ERR102,ERR103,ERR104,ERR105,ERR106,ERR107,ERR108,ERR109,ERR110, + ERR111,ERR112,ERR113,ERR114,ERR115,ERR116 }; + +/* Code values for parsed patterns, which are stored in a vector of 32-bit +unsigned ints. Values less than META_END are literal data values. The coding +for identifying the item is in the top 16-bits, leaving 16 bits for the +additional data that some of them need. The META_CODE, META_DATA, and META_DIFF +macros are used to manipulate parsed pattern elements. + +NOTE: When these definitions are changed, the table of extra lengths for each +code (meta_extra_lengths) must be updated to remain in step. */ + +#define META_END 0x80000000u /* End of pattern */ + +#define META_ALT 0x80010000u /* alternation */ +#define META_ATOMIC 0x80020000u /* atomic group */ +#define META_BACKREF 0x80030000u /* Back ref */ +#define META_BACKREF_BYNAME 0x80040000u /* \k'name' */ +#define META_BIGVALUE 0x80050000u /* Next is a literal > META_END */ +#define META_CALLOUT_NUMBER 0x80060000u /* (?C with numerical argument */ +#define META_CALLOUT_STRING 0x80070000u /* (?C with string argument */ +#define META_CAPTURE 0x80080000u /* Capturing parenthesis */ +#define META_CIRCUMFLEX 0x80090000u /* ^ metacharacter */ +#define META_CLASS 0x800a0000u /* start non-empty class */ +#define META_CLASS_EMPTY 0x800b0000u /* empty class */ +#define META_CLASS_EMPTY_NOT 0x800c0000u /* negative empty class */ +#define META_CLASS_END 0x800d0000u /* end of non-empty class */ +#define META_CLASS_NOT 0x800e0000u /* start non-empty negative class */ +#define META_COND_ASSERT 0x800f0000u /* (?(?assertion)... */ +#define META_COND_DEFINE 0x80100000u /* (?(DEFINE)... */ +#define META_COND_NAME 0x80110000u /* (?()... */ +#define META_COND_NUMBER 0x80120000u /* (?(digits)... */ +#define META_COND_RNAME 0x80130000u /* (?(R&name)... */ +#define META_COND_RNUMBER 0x80140000u /* (?(Rdigits)... */ +#define META_COND_VERSION 0x80150000u /* (?(VERSIONx.y)... */ +#define META_OFFSET 0x80160000u /* Setting offset for various + META codes (e.g. META_SCS_NAME) */ +#define META_SCS 0x80170000u /* (*scan_substring:... */ +#define META_SCS_NAME 0x80180000u /* Next of scan_substring */ +#define META_SCS_NUMBER 0x80190000u /* Next digits of scan_substring */ +#define META_DOLLAR 0x801a0000u /* $ metacharacter */ +#define META_DOT 0x801b0000u /* . metacharacter */ +#define META_ESCAPE 0x801c0000u /* \d and friends */ +#define META_KET 0x801d0000u /* closing parenthesis */ +#define META_NOCAPTURE 0x801e0000u /* no capture parens */ +#define META_OPTIONS 0x801f0000u /* (?i) and friends */ +#define META_POSIX 0x80200000u /* POSIX class item */ +#define META_POSIX_NEG 0x80210000u /* negative POSIX class item */ +#define META_RANGE_ESCAPED 0x80220000u /* range with at least one escape */ +#define META_RANGE_LITERAL 0x80230000u /* range defined literally */ +#define META_RECURSE 0x80240000u /* Recursion */ +#define META_RECURSE_BYNAME 0x80250000u /* (?&name) */ +#define META_SCRIPT_RUN 0x80260000u /* (*script_run:...) */ + +/* These must be kept together to make it easy to check that an assertion +is present where expected in a conditional group. */ + +#define META_LOOKAHEAD 0x80270000u /* (?= */ +#define META_LOOKAHEADNOT 0x80280000u /* (?! */ +#define META_LOOKBEHIND 0x80290000u /* (?<= */ +#define META_LOOKBEHINDNOT 0x802a0000u /* (?>16) + +/* Extended class management flags. */ + +#define CLASS_IS_ECLASS 0x1 + +/* Macro for the highest character value. */ + +#if PCRE2_CODE_UNIT_WIDTH == 8 +#define MAX_UCHAR_VALUE 0xffu +#elif PCRE2_CODE_UNIT_WIDTH == 16 +#define MAX_UCHAR_VALUE 0xffffu +#else +#define MAX_UCHAR_VALUE 0xffffffffu +#endif + +#define GET_MAX_CHAR_VALUE(utf) \ + ((utf) ? MAX_UTF_CODE_POINT : MAX_UCHAR_VALUE) + +/* Macro for setting individual bits in class bitmaps. */ + +#define SETBIT(a,b) a[(b) >> 3] |= (uint8_t)(1u << ((b) & 0x7)) + +/* Macro for 8 bit specific checks. */ +#if PCRE2_CODE_UNIT_WIDTH == 8 +#define SELECT_VALUE8(value8, value) (value8) +#else +#define SELECT_VALUE8(value8, value) (value) +#endif + +/* Macro for aligning data. */ +#define CLIST_ALIGN_TO(base, align) \ + ((base + ((size_t)(align) - 1)) & ~((size_t)(align) - 1)) + +/* Structure for holding information about an OP_ECLASS internal operand. +An "operand" here could be just a single OP_[X]CLASS, or it could be some +complex expression; but it's some sequence of ECL_* codes which pushes one +value to the stack. */ +typedef struct { + /* The position of the operand - or NULL if (lengthptr != NULL). */ + PCRE2_UCHAR *code_start; + PCRE2_SIZE length; + /* The operand's type if it is a single code (ECL_XCLASS, ECL_ANY, ECL_NONE); + otherwise zero if the operand is not atomic. */ + uint8_t op_single_type; + /* Regardless of whether it's a single code or not, we fully constant-fold + the bitmap for code points < 256. */ + class_bits_storage bits; +} eclass_op_info; + +/* Macros for the definitions below, to prevent name collisions. */ + +#define _pcre2_posix_class_maps PCRE2_SUFFIX(_pcre2_posix_class_maps) +#define _pcre2_update_classbits PCRE2_SUFFIX(_pcre2_update_classbits_) +#define _pcre2_compile_class_nested PCRE2_SUFFIX(_pcre2_compile_class_nested_) +#define _pcre2_compile_class_not_nested PCRE2_SUFFIX(_pcre2_compile_class_not_nested_) + + +/* Indices of the POSIX classes in posix_names, posix_name_lengths, +posix_class_maps, and posix_substitutes. They must be kept in sync. */ + +#define PC_DIGIT 7 +#define PC_GRAPH 8 +#define PC_PRINT 9 +#define PC_PUNCT 10 +#define PC_XDIGIT 13 + +extern const int PRIV(posix_class_maps)[]; + + +/* Set bits in classbits according to the property type */ + +void PRIV(update_classbits)(uint32_t ptype, uint32_t pdata, BOOL negated, + uint8_t *classbits); + +/* Compile the META codes from start_ptr...end_ptr, writing a single OP_CLASS +OP_CLASS, OP_NCLASS, OP_XCLASS, or OP_ALLANY into pcode. */ + +uint32_t *PRIV(compile_class_not_nested)(uint32_t options, uint32_t xoptions, + uint32_t *start_ptr, PCRE2_UCHAR **pcode, BOOL negate_class, BOOL* has_bitmap, + int *errorcodeptr, compile_block *cb, PCRE2_SIZE *lengthptr); + +/* Compile the META codes in pptr into opcodes written to pcode. The pptr must +start at a META_CLASS or META_CLASS_NOT. + +The pptr will be left pointing at the matching META_CLASS_END. */ + +BOOL PRIV(compile_class_nested)(uint32_t options, uint32_t xoptions, + uint32_t **pptr, PCRE2_UCHAR **pcode, int *errorcodeptr, + compile_block *cb, PCRE2_SIZE *lengthptr); + +#endif /* PCRE2_COMPILE_H_IDEMPOTENT_GUARD */ + +/* End of pcre2_compile.h */ diff --git a/libpcre/src/pcre2_compile_class.c b/libpcre/src/pcre2_compile_class.c new file mode 100644 index 000000000..6a73bb9a7 --- /dev/null +++ b/libpcre/src/pcre2_compile_class.c @@ -0,0 +1,2737 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "pcre2_compile.h" + +typedef struct { + /* Option bits for eclass. */ + uint32_t options; + uint32_t xoptions; + /* Rarely used members. */ + int *errorcodeptr; + compile_block *cb; + /* Bitmap is needed. */ + BOOL needs_bitmap; +} eclass_context; + +/* Checks the allowed tokens at the end of a class structure in debug mode. +When a new token is not processed by all loops, and the token is equals to +a) one of the cases here: + the compiler will complain about a duplicated case value. +b) none of the cases here: + the loop without the handler will stop with an assertion failure. */ + +#ifdef PCRE2_DEBUG +#define CLASS_END_CASES(meta) \ + default: \ + PCRE2_ASSERT((meta) <= META_END); \ + /* Fall through */ \ + case META_CLASS: \ + case META_CLASS_NOT: \ + case META_CLASS_EMPTY: \ + case META_CLASS_EMPTY_NOT: \ + case META_CLASS_END: \ + case META_ECLASS_AND: \ + case META_ECLASS_OR: \ + case META_ECLASS_SUB: \ + case META_ECLASS_XOR: \ + case META_ECLASS_NOT: +#else +#define CLASS_END_CASES(meta) \ + default: +#endif + +#ifdef SUPPORT_WIDE_CHARS + +/* Heapsort algorithm. */ + +static void do_heapify(uint32_t *buffer, size_t size, size_t i) +{ +size_t max; +size_t left; +size_t right; +uint32_t tmp1, tmp2; + +while (TRUE) + { + max = i; + left = (i << 1) + 2; + right = left + 2; + + if (left < size && buffer[left] > buffer[max]) max = left; + if (right < size && buffer[right] > buffer[max]) max = right; + if (i == max) return; + + /* Swap items. */ + tmp1 = buffer[i]; + tmp2 = buffer[i + 1]; + buffer[i] = buffer[max]; + buffer[i + 1] = buffer[max + 1]; + buffer[max] = tmp1; + buffer[max + 1] = tmp2; + i = max; + } +} + +#ifdef SUPPORT_UNICODE + +#define PARSE_CLASS_UTF 0x1 +#define PARSE_CLASS_CASELESS_UTF 0x2 +#define PARSE_CLASS_RESTRICTED_UTF 0x4 +#define PARSE_CLASS_TURKISH_UTF 0x8 + +/* Get the range of nocase characters which includes the +'c' character passed as argument, or directly follows 'c'. */ + +static const uint32_t* +get_nocase_range(uint32_t c) +{ +uint32_t left = 0; +uint32_t right = PRIV(ucd_nocase_ranges_size); +uint32_t middle; + +if (c > MAX_UTF_CODE_POINT) return PRIV(ucd_nocase_ranges) + right; + +while (TRUE) + { + /* Range end of the middle element. */ + middle = ((left + right) >> 1) | 0x1; + + if (PRIV(ucd_nocase_ranges)[middle] <= c) + left = middle + 1; + else if (middle > 1 && PRIV(ucd_nocase_ranges)[middle - 2] > c) + right = middle - 1; + else + return PRIV(ucd_nocase_ranges) + (middle - 1); + } +} + +/* Get the list of othercase characters, which belongs to the passed range. +Create ranges from these characters, and append them to the buffer argument. */ + +static size_t +utf_caseless_extend(uint32_t start, uint32_t end, uint32_t options, + uint32_t *buffer) +{ +uint32_t new_start = start; +uint32_t new_end = end; +uint32_t c = start; +const uint32_t *list; +uint32_t tmp[3]; +size_t result = 2; +const uint32_t *skip_range = get_nocase_range(c); +uint32_t skip_start = skip_range[0]; + +#if PCRE2_CODE_UNIT_WIDTH == 8 +PCRE2_ASSERT(options & PARSE_CLASS_UTF); +#endif + +#if PCRE2_CODE_UNIT_WIDTH == 32 +if (end > MAX_UTF_CODE_POINT) end = MAX_UTF_CODE_POINT; +#endif + +while (c <= end) + { + uint32_t co; + + if (c > skip_start) + { + c = skip_range[1]; + skip_range += 2; + skip_start = skip_range[0]; + continue; + } + + /* Compute caseless set. */ + + if ((options & (PARSE_CLASS_TURKISH_UTF|PARSE_CLASS_RESTRICTED_UTF)) == + PARSE_CLASS_TURKISH_UTF && + UCD_ANY_I(c)) + { + co = PRIV(ucd_turkish_dotted_i_caseset) + (UCD_DOTTED_I(c)? 0 : 3); + } + else if ((co = UCD_CASESET(c)) != 0 && + (options & PARSE_CLASS_RESTRICTED_UTF) != 0 && + PRIV(ucd_caseless_sets)[co] < 128) + { + co = 0; /* Ignore the caseless set if it's restricted. */ + } + + if (co != 0) + list = PRIV(ucd_caseless_sets) + co; + else + { + co = UCD_OTHERCASE(c); + list = tmp; + tmp[0] = c; + tmp[1] = NOTACHAR; + + if (co != c) + { + tmp[1] = co; + tmp[2] = NOTACHAR; + } + } + c++; + + /* Add characters. */ + do + { +#if PCRE2_CODE_UNIT_WIDTH == 16 + if (!(options & PARSE_CLASS_UTF) && *list > 0xffff) continue; +#endif + + if (*list < new_start) + { + if (*list + 1 == new_start) + { + new_start--; + continue; + } + } + else if (*list > new_end) + { + if (*list - 1 == new_end) + { + new_end++; + continue; + } + } + else continue; + + result += 2; + if (buffer != NULL) + { + buffer[0] = *list; + buffer[1] = *list; + buffer += 2; + } + } + while (*(++list) != NOTACHAR); + } + + if (buffer != NULL) + { + buffer[0] = new_start; + buffer[1] = new_end; + buffer += 2; + (void)buffer; + } + return result; +} + +#endif + +/* Add a character list to a buffer. */ + +static size_t +append_char_list(const uint32_t *p, uint32_t *buffer) +{ +const uint32_t *n; +size_t result = 0; + +while (*p != NOTACHAR) + { + n = p; + while (n[0] == n[1] - 1) n++; + + PCRE2_ASSERT(*p < 0xffff); + + if (buffer != NULL) + { + buffer[0] = *p; + buffer[1] = *n; + buffer += 2; + } + + result += 2; + p = n + 1; + } + + return result; +} + +static uint32_t +get_highest_char(uint32_t options) +{ +(void)options; /* Avoid compiler warning. */ + +#if PCRE2_CODE_UNIT_WIDTH == 8 +return MAX_UTF_CODE_POINT; +#else +#ifdef SUPPORT_UNICODE +return GET_MAX_CHAR_VALUE((options & PARSE_CLASS_UTF) != 0); +#else +return MAX_UCHAR_VALUE; +#endif +#endif +} + +/* Add a negated character list to a buffer. */ +static size_t +append_negated_char_list(const uint32_t *p, uint32_t options, uint32_t *buffer) +{ +const uint32_t *n; +uint32_t start = 0; +size_t result = 2; + +PCRE2_ASSERT(*p > 0); + +while (*p != NOTACHAR) + { + n = p; + while (n[0] == n[1] - 1) n++; + + PCRE2_ASSERT(*p < 0xffff); + + if (buffer != NULL) + { + buffer[0] = start; + buffer[1] = *p - 1; + buffer += 2; + } + + result += 2; + start = *n + 1; + p = n + 1; + } + + if (buffer != NULL) + { + buffer[0] = start; + buffer[1] = get_highest_char(options); + buffer += 2; + (void)buffer; + } + + return result; +} + +static uint32_t * +append_non_ascii_range(uint32_t options, uint32_t *buffer) +{ + if (buffer == NULL) return NULL; + + buffer[0] = 0x100; + buffer[1] = get_highest_char(options); + return buffer + 2; +} + +static size_t +parse_class(uint32_t *ptr, uint32_t options, uint32_t *buffer) +{ +size_t total_size = 0; +size_t size; +uint32_t meta_arg; +uint32_t start_char; + +while (TRUE) + { + switch (META_CODE(*ptr)) + { + case META_ESCAPE: + meta_arg = META_DATA(*ptr); + switch (meta_arg) + { + case ESC_D: + case ESC_W: + case ESC_S: + buffer = append_non_ascii_range(options, buffer); + total_size += 2; + break; + + case ESC_h: + size = append_char_list(PRIV(hspace_list), buffer); + total_size += size; + if (buffer != NULL) buffer += size; + break; + + case ESC_H: + size = append_negated_char_list(PRIV(hspace_list), options, buffer); + total_size += size; + if (buffer != NULL) buffer += size; + break; + + case ESC_v: + size = append_char_list(PRIV(vspace_list), buffer); + total_size += size; + if (buffer != NULL) buffer += size; + break; + + case ESC_V: + size = append_negated_char_list(PRIV(vspace_list), options, buffer); + total_size += size; + if (buffer != NULL) buffer += size; + break; + + case ESC_p: + case ESC_P: + ptr++; + if (meta_arg == ESC_p && (*ptr >> 16) == PT_ANY) + { + if (buffer != NULL) + { + buffer[0] = 0; + buffer[1] = get_highest_char(options); + buffer += 2; + } + total_size += 2; + } + break; + } + ptr++; + continue; + case META_POSIX_NEG: + buffer = append_non_ascii_range(options, buffer); + total_size += 2; + ptr += 2; + continue; + case META_POSIX: + ptr += 2; + continue; + case META_BIGVALUE: + /* Character literal */ + ptr++; + break; + CLASS_END_CASES(*ptr) + if (*ptr >= META_END) return total_size; + break; + } + + start_char = *ptr; + + if (ptr[1] == META_RANGE_LITERAL || ptr[1] == META_RANGE_ESCAPED) + { + ptr += 2; + PCRE2_ASSERT(*ptr < META_END || *ptr == META_BIGVALUE); + + if (*ptr == META_BIGVALUE) ptr++; + +#ifdef EBCDIC +#error "Missing EBCDIC support" +#endif + } + +#ifdef SUPPORT_UNICODE + if (options & PARSE_CLASS_CASELESS_UTF) + { + size = utf_caseless_extend(start_char, *ptr++, options, buffer); + if (buffer != NULL) buffer += size; + total_size += size; + continue; + } +#endif + + if (buffer != NULL) + { + buffer[0] = start_char; + buffer[1] = *ptr; + buffer += 2; + } + + ptr++; + total_size += 2; + } + + return total_size; +} + +/* Extra uint32_t values for storing the lengths of range lists in +the worst case. Two uint32_t lengths and a range end for a range +starting before 255 */ +#define CHAR_LIST_EXTRA_SIZE 3 + +/* Starting character values for each character list. */ + +static const uint32_t char_list_starts[] = { +#if PCRE2_CODE_UNIT_WIDTH == 32 + XCL_CHAR_LIST_HIGH_32_START, +#endif +#if PCRE2_CODE_UNIT_WIDTH == 32 || defined SUPPORT_UNICODE + XCL_CHAR_LIST_LOW_32_START, +#endif + XCL_CHAR_LIST_HIGH_16_START, + /* Must be terminated by XCL_CHAR_LIST_LOW_16_START, + which also represents the end of the bitset. */ + XCL_CHAR_LIST_LOW_16_START, +}; + +static class_ranges * +compile_optimize_class(uint32_t *start_ptr, uint32_t options, + uint32_t xoptions, compile_block *cb) +{ +class_ranges* cranges; +uint32_t *ptr; +uint32_t *buffer; +uint32_t *dst; +uint32_t class_options = 0; +size_t range_list_size = 0, total_size, i; +uint32_t tmp1, tmp2; +const uint32_t *char_list_next; +uint16_t *next_char; +uint32_t char_list_start, char_list_end; +uint32_t range_start, range_end; + +#ifdef SUPPORT_UNICODE +if (options & PCRE2_UTF) + class_options |= PARSE_CLASS_UTF; + +if ((options & PCRE2_CASELESS) && (options & (PCRE2_UTF|PCRE2_UCP))) + class_options |= PARSE_CLASS_CASELESS_UTF; + +if (xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) + class_options |= PARSE_CLASS_RESTRICTED_UTF; + +if (xoptions & PCRE2_EXTRA_TURKISH_CASING) + class_options |= PARSE_CLASS_TURKISH_UTF; +#endif + +/* Compute required space for the range. */ + +range_list_size = parse_class(start_ptr, class_options, NULL); +PCRE2_ASSERT((range_list_size & 0x1) == 0); + +/* Allocate buffer. The total_size also represents the end of the buffer. */ + +total_size = range_list_size + + ((range_list_size >= 2) ? CHAR_LIST_EXTRA_SIZE : 0); + +cranges = cb->cx->memctl.malloc( + sizeof(class_ranges) + total_size * sizeof(uint32_t), + cb->cx->memctl.memory_data); + +if (cranges == NULL) return NULL; + +cranges->next = NULL; +cranges->range_list_size = (uint16_t)range_list_size; +cranges->char_lists_types = 0; +cranges->char_lists_size = 0; +cranges->char_lists_start = 0; + +if (range_list_size == 0) return cranges; + +buffer = (uint32_t*)(cranges + 1); +parse_class(start_ptr, class_options, buffer); + +/* Using <= instead of == to help static analysis. */ +if (range_list_size <= 2) return cranges; + +/* In-place sorting of ranges. */ + +i = (((range_list_size >> 2) - 1) << 1); +while (TRUE) + { + do_heapify(buffer, range_list_size, i); + if (i == 0) break; + i -= 2; + } + +i = range_list_size - 2; +while (TRUE) + { + tmp1 = buffer[i]; + tmp2 = buffer[i + 1]; + buffer[i] = buffer[0]; + buffer[i + 1] = buffer[1]; + buffer[0] = tmp1; + buffer[1] = tmp2; + + do_heapify(buffer, i, 0); + if (i == 0) break; + i -= 2; + } + +/* Merge ranges whenever possible. */ +dst = buffer; +ptr = buffer + 2; +range_list_size -= 2; + +/* The second condition is a very rare corner case, where the end of the last +range is the maximum character. This range cannot be extended further. */ + +while (range_list_size > 0 && dst[1] != ~(uint32_t)0) + { + if (dst[1] + 1 < ptr[0]) + { + dst += 2; + dst[0] = ptr[0]; + dst[1] = ptr[1]; + } + else if (dst[1] < ptr[1]) dst[1] = ptr[1]; + + ptr += 2; + range_list_size -= 2; + } + +PCRE2_ASSERT(dst[1] <= get_highest_char(class_options)); + +/* When the number of ranges are less than six, +they are not converted to range lists. */ + +ptr = buffer; +while (ptr < dst && ptr[1] < 0x100) ptr += 2; +if (dst - ptr < (2 * (6 - 1))) + { + cranges->range_list_size = (uint16_t)(dst + 2 - buffer); + return cranges; + } + +/* Compute character lists structures. */ + +char_list_next = char_list_starts; +char_list_start = *char_list_next++; +#if PCRE2_CODE_UNIT_WIDTH == 32 +char_list_end = XCL_CHAR_LIST_HIGH_32_END; +#elif defined SUPPORT_UNICODE +char_list_end = XCL_CHAR_LIST_LOW_32_END; +#else +char_list_end = XCL_CHAR_LIST_HIGH_16_END; +#endif +next_char = (uint16_t*)(buffer + total_size); + +tmp1 = 0; +tmp2 = ((sizeof(char_list_starts) / sizeof(uint32_t)) - 1) * XCL_TYPE_BIT_LEN; +PCRE2_ASSERT(tmp2 <= 3 * XCL_TYPE_BIT_LEN && tmp2 >= XCL_TYPE_BIT_LEN); +range_start = dst[0]; +range_end = dst[1]; + +while (TRUE) + { + if (range_start >= char_list_start) + { + if (range_start == range_end || range_end < char_list_end) + { + tmp1++; + next_char--; + + if (char_list_start < XCL_CHAR_LIST_LOW_32_START) + *next_char = (uint16_t)((range_end << XCL_CHAR_SHIFT) | XCL_CHAR_END); + else + *(uint32_t*)(--next_char) = + (range_end << XCL_CHAR_SHIFT) | XCL_CHAR_END; + } + + if (range_start < range_end) + { + if (range_start > char_list_start) + { + tmp1++; + next_char--; + + if (char_list_start < XCL_CHAR_LIST_LOW_32_START) + *next_char = (uint16_t)(range_start << XCL_CHAR_SHIFT); + else + *(uint32_t*)(--next_char) = (range_start << XCL_CHAR_SHIFT); + } + else + cranges->char_lists_types |= XCL_BEGIN_WITH_RANGE << tmp2; + } + + PCRE2_ASSERT((uint32_t*)next_char >= dst + 2); + + if (dst > buffer) + { + dst -= 2; + range_start = dst[0]; + range_end = dst[1]; + continue; + } + + range_start = 0; + range_end = 0; + } + + if (range_end >= char_list_start) + { + PCRE2_ASSERT(range_start < char_list_start); + + if (range_end < char_list_end) + { + tmp1++; + next_char--; + + if (char_list_start < XCL_CHAR_LIST_LOW_32_START) + *next_char = (uint16_t)((range_end << XCL_CHAR_SHIFT) | XCL_CHAR_END); + else + *(uint32_t*)(--next_char) = + (range_end << XCL_CHAR_SHIFT) | XCL_CHAR_END; + + PCRE2_ASSERT((uint32_t*)next_char >= dst + 2); + } + + cranges->char_lists_types |= XCL_BEGIN_WITH_RANGE << tmp2; + } + + if (tmp1 >= XCL_ITEM_COUNT_MASK) + { + cranges->char_lists_types |= XCL_ITEM_COUNT_MASK << tmp2; + next_char--; + + if (char_list_start < XCL_CHAR_LIST_LOW_32_START) + *next_char = (uint16_t)tmp1; + else + *(uint32_t*)(--next_char) = tmp1; + } + else + cranges->char_lists_types |= tmp1 << tmp2; + + if (range_start < XCL_CHAR_LIST_LOW_16_START) break; + + PCRE2_ASSERT(tmp2 >= XCL_TYPE_BIT_LEN); + char_list_end = char_list_start - 1; + char_list_start = *char_list_next++; + tmp1 = 0; + tmp2 -= XCL_TYPE_BIT_LEN; + } + +if (dst[0] < XCL_CHAR_LIST_LOW_16_START) dst += 2; +PCRE2_ASSERT((uint16_t*)dst <= next_char); + +cranges->char_lists_size = + (size_t)((uint8_t*)(buffer + total_size) - (uint8_t*)next_char); +cranges->char_lists_start = (size_t)((uint8_t*)next_char - (uint8_t*)buffer); +cranges->range_list_size = (uint16_t)(dst - buffer); +return cranges; +} + +#endif /* SUPPORT_WIDE_CHARS */ + +#ifdef SUPPORT_UNICODE + +void PRIV(update_classbits)(uint32_t ptype, uint32_t pdata, BOOL negated, + uint8_t *classbits) +{ +/* Update PRIV(xclass) when this function is changed. */ +int c, chartype; +const ucd_record *prop; +uint32_t gentype; +BOOL set_bit; + +if (ptype == PT_ANY) + { + if (!negated) memset(classbits, 0xff, 32); + return; + } + +for (c = 0; c < 256; c++) + { + prop = GET_UCD(c); + set_bit = FALSE; + (void)set_bit; + + switch (ptype) + { + case PT_LAMP: + chartype = prop->chartype; + set_bit = (chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt); + break; + + case PT_GC: + set_bit = (PRIV(ucp_gentype)[prop->chartype] == pdata); + break; + + case PT_PC: + set_bit = (prop->chartype == pdata); + break; + + case PT_SC: + set_bit = (prop->script == pdata); + break; + + case PT_SCX: + set_bit = (prop->script == pdata || + MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), pdata) != 0); + break; + + case PT_ALNUM: + gentype = PRIV(ucp_gentype)[prop->chartype]; + set_bit = (gentype == ucp_L || gentype == ucp_N); + break; + + case PT_SPACE: /* Perl space */ + case PT_PXSPACE: /* POSIX space */ + switch(c) + { + HSPACE_BYTE_CASES: + VSPACE_BYTE_CASES: + set_bit = TRUE; + break; + + default: + set_bit = (PRIV(ucp_gentype)[prop->chartype] == ucp_Z); + break; + } + break; + + case PT_WORD: + chartype = prop->chartype; + gentype = PRIV(ucp_gentype)[chartype]; + set_bit = (gentype == ucp_L || gentype == ucp_N || + chartype == ucp_Mn || chartype == ucp_Pc); + break; + + case PT_UCNC: + set_bit = (c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT || + c == CHAR_GRAVE_ACCENT || c >= 0xa0); + break; + + case PT_BIDICL: + set_bit = (UCD_BIDICLASS_PROP(prop) == pdata); + break; + + case PT_BOOL: + set_bit = MAPBIT(PRIV(ucd_boolprop_sets) + + UCD_BPROPS_PROP(prop), pdata) != 0; + break; + + case PT_PXGRAPH: + chartype = prop->chartype; + gentype = PRIV(ucp_gentype)[chartype]; + set_bit = (gentype != ucp_Z && (gentype != ucp_C || chartype == ucp_Cf)); + break; + + case PT_PXPRINT: + chartype = prop->chartype; + set_bit = (chartype != ucp_Zl && chartype != ucp_Zp && + (PRIV(ucp_gentype)[chartype] != ucp_C || chartype == ucp_Cf)); + break; + + case PT_PXPUNCT: + gentype = PRIV(ucp_gentype)[prop->chartype]; + set_bit = (gentype == ucp_P || (c < 128 && gentype == ucp_S)); + break; + + default: + PCRE2_ASSERT(ptype == PT_PXXDIGIT); + set_bit = (c >= CHAR_0 && c <= CHAR_9) || + (c >= CHAR_A && c <= CHAR_F) || + (c >= CHAR_a && c <= CHAR_f); + break; + } + + if (negated) set_bit = !set_bit; + if (set_bit) *classbits |= (uint8_t)(1 << (c & 0x7)); + if ((c & 0x7) == 0x7) classbits++; + } +} + +#endif /* SUPPORT_UNICODE */ + + + +#ifdef SUPPORT_WIDE_CHARS + +/************************************************* +* XClass related properties * +*************************************************/ + +/* XClass needs to be generated. */ +#define XCLASS_REQUIRED 0x1 +/* XClass has 8 bit character. */ +#define XCLASS_HAS_8BIT_CHARS 0x2 +/* XClass has properties. */ +#define XCLASS_HAS_PROPS 0x4 +/* XClass has character lists. */ +#define XCLASS_HAS_CHAR_LISTS 0x8 +/* XClass matches to all >= 256 characters. */ +#define XCLASS_HIGH_ANY 0x10 + +#endif + + +/************************************************* +* Internal entry point for add range to class * +*************************************************/ + +/* This function sets the overall range for characters < 256. +It also handles non-utf case folding. + +Arguments: + options the options bits + xoptions the extra options bits + cb compile data + start start of range character + end end of range character + +Returns: cb->classbits is updated +*/ + +static void +add_to_class(uint32_t options, uint32_t xoptions, compile_block *cb, + uint32_t start, uint32_t end) +{ +uint8_t *classbits = cb->classbits.classbits; +uint32_t c, byte_start, byte_end; +uint32_t classbits_end = (end <= 0xff ? end : 0xff); + +/* If caseless matching is required, scan the range and process alternate +cases. In Unicode, there are 8-bit characters that have alternate cases that +are greater than 255 and vice-versa (though these may be ignored if caseless +restriction is in force). Sometimes we can just extend the original range. */ + +if ((options & PCRE2_CASELESS) != 0) + { +#ifdef SUPPORT_UNICODE + /* UTF mode. This branch is taken if we don't support wide characters (e.g. + 8-bit library, without UTF), but we do treat those characters as Unicode + (if UCP flag is set). In this case, we only need to expand the character class + set to include the case pairs which are in the 0-255 codepoint range. */ + if ((options & (PCRE2_UTF|PCRE2_UCP)) != 0) + { + BOOL turkish_i = (xoptions & (PCRE2_EXTRA_TURKISH_CASING|PCRE2_EXTRA_CASELESS_RESTRICT)) == + PCRE2_EXTRA_TURKISH_CASING; + if (start < 128) + { + uint32_t lo_end = (classbits_end < 127 ? classbits_end : 127); + for (c = start; c <= lo_end; c++) + { + if (turkish_i && UCD_ANY_I(c)) continue; + SETBIT(classbits, cb->fcc[c]); + } + } + if (classbits_end >= 128) + { + uint32_t hi_start = (start > 128 ? start : 128); + for (c = hi_start; c <= classbits_end; c++) + { + uint32_t co = UCD_OTHERCASE(c); + if (co <= 0xff) SETBIT(classbits, co); + } + } + } + + else +#endif /* SUPPORT_UNICODE */ + + /* Not UTF mode */ + { + for (c = start; c <= classbits_end; c++) + SETBIT(classbits, cb->fcc[c]); + } + } + +/* Use the bitmap for characters < 256. Otherwise use extra data. */ + +byte_start = (start + 7) >> 3; +byte_end = (classbits_end + 1) >> 3; + +if (byte_start >= byte_end) + { + for (c = start; c <= classbits_end; c++) + /* Regardless of start, c will always be <= 255. */ + SETBIT(classbits, c); + return; + } + +for (c = byte_start; c < byte_end; c++) + classbits[c] = 0xff; + +byte_start <<= 3; +byte_end <<= 3; + +for (c = start; c < byte_start; c++) + SETBIT(classbits, c); + +for (c = byte_end; c <= classbits_end; c++) + SETBIT(classbits, c); +} + + +#if PCRE2_CODE_UNIT_WIDTH == 8 +/************************************************* +* Internal entry point for add list to class * +*************************************************/ + +/* This function is used for adding a list of horizontal or vertical whitespace +characters to a class. The list must be in order so that ranges of characters +can be detected and handled appropriately. This function sets the overall range +so that the internal functions can try to avoid duplication when handling +case-independence. + +Arguments: + options the options bits + xoptions the extra options bits + cb contains pointers to tables etc. + p points to row of 32-bit values, terminated by NOTACHAR + +Returns: cb->classbits is updated +*/ + +static void +add_list_to_class(uint32_t options, uint32_t xoptions, compile_block *cb, + const uint32_t *p) +{ +while (p[0] < 256) + { + unsigned int n = 0; + + while(p[n+1] == p[0] + n + 1) n++; + add_to_class(options, xoptions, cb, p[0], p[n]); + + p += n + 1; + } +} + + + +/************************************************* +* Add characters not in a list to a class * +*************************************************/ + +/* This function is used for adding the complement of a list of horizontal or +vertical whitespace to a class. The list must be in order. + +Arguments: + options the options bits + xoptions the extra options bits + cb contains pointers to tables etc. + p points to row of 32-bit values, terminated by NOTACHAR + +Returns: cb->classbits is updated +*/ + +static void +add_not_list_to_class(uint32_t options, uint32_t xoptions, compile_block *cb, + const uint32_t *p) +{ +if (p[0] > 0) + add_to_class(options, xoptions, cb, 0, p[0] - 1); +while (p[0] < 256) + { + while (p[1] == p[0] + 1) p++; + add_to_class(options, xoptions, cb, p[0] + 1, (p[1] > 255) ? 255 : p[1] - 1); + p++; + } +} +#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ + + + +/************************************************* +* Main entry-point to compile a character class * +*************************************************/ + +/* This function consumes a "leaf", which is a set of characters that will +become a single OP_CLASS OP_NCLASS, OP_XCLASS, or OP_ALLANY. */ + +uint32_t * +PRIV(compile_class_not_nested)(uint32_t options, uint32_t xoptions, + uint32_t *start_ptr, PCRE2_UCHAR **pcode, BOOL negate_class, BOOL* has_bitmap, + int *errorcodeptr, compile_block *cb, PCRE2_SIZE *lengthptr) +{ +uint32_t *pptr = start_ptr; +PCRE2_UCHAR *code = *pcode; +BOOL should_flip_negation; +const uint8_t *cbits = cb->cbits; +/* Some functions such as add_to_class() or eclass processing +expects that the bitset is stored in cb->classbits.classbits. */ +uint8_t *const classbits = cb->classbits.classbits; + +#ifdef SUPPORT_UNICODE +BOOL utf = (options & PCRE2_UTF) != 0; +#else /* No Unicode support */ +BOOL utf = FALSE; +#endif + +/* Helper variables for OP_XCLASS opcode (for characters > 255). */ + +#ifdef SUPPORT_WIDE_CHARS +uint32_t xclass_props; +PCRE2_UCHAR *class_uchardata; +class_ranges* cranges; +#endif + +/* If an XClass contains a negative special such as \S, we need to flip the +negation flag at the end, so that support for characters > 255 works correctly +(they are all included in the class). An XClass may need to insert specific +matching or non-matching code for wide characters. +*/ + +should_flip_negation = FALSE; + +/* XClass will be used when characters > 255 might match. */ + +#ifdef SUPPORT_WIDE_CHARS +xclass_props = 0; + +#if PCRE2_CODE_UNIT_WIDTH == 8 +cranges = NULL; + +if (utf) +#endif + { + if (lengthptr != NULL) + { + cranges = compile_optimize_class(pptr, options, xoptions, cb); + + if (cranges == NULL) + { + *errorcodeptr = ERR21; + return NULL; + } + + /* Caching the pre-processed character ranges. */ + if (cb->next_cranges != NULL) + cb->next_cranges->next = cranges; + else + cb->cranges = cranges; + + cb->next_cranges = cranges; + } + else + { + /* Reuse the pre-processed character ranges. */ + cranges = cb->cranges; + PCRE2_ASSERT(cranges != NULL); + cb->cranges = cranges->next; + } + + if (cranges->range_list_size > 0) + { + const uint32_t *ranges = (const uint32_t*)(cranges + 1); + + if (ranges[0] <= 255) + xclass_props |= XCLASS_HAS_8BIT_CHARS; + + if (ranges[cranges->range_list_size - 1] == GET_MAX_CHAR_VALUE(utf) && + ranges[cranges->range_list_size - 2] <= 256) + xclass_props |= XCLASS_HIGH_ANY; + } + } + +class_uchardata = code + LINK_SIZE + 2; /* For XCLASS items */ +#endif /* SUPPORT_WIDE_CHARS */ + +/* Initialize the 256-bit (32-byte) bit map to all zeros. We build the map +in a temporary bit of memory, in case the class contains fewer than two +8-bit characters because in that case the compiled code doesn't use the bit +map. */ + +memset(classbits, 0, 32); + +/* Process items until end_ptr is reached. */ + +while (TRUE) + { + uint32_t meta = *(pptr++); + BOOL local_negate; + int posix_class; + int taboffset, tabopt; + class_bits_storage pbits; + uint32_t escape, c; + + /* Handle POSIX classes such as [:alpha:] etc. */ + switch (META_CODE(meta)) + { + case META_POSIX: + case META_POSIX_NEG: + + local_negate = (meta == META_POSIX_NEG); + posix_class = *(pptr++); + + if (local_negate) should_flip_negation = TRUE; /* Note negative special */ + + /* If matching is caseless, upper and lower are converted to alpha. + This relies on the fact that the class table starts with alpha, + lower, upper as the first 3 entries. */ + + if ((options & PCRE2_CASELESS) != 0 && posix_class <= 2) + posix_class = 0; + + /* When PCRE2_UCP is set, some of the POSIX classes are converted to + different escape sequences that use Unicode properties \p or \P. + Others that are not available via \p or \P have to generate + XCL_PROP/XCL_NOTPROP directly, which is done here. */ + +#ifdef SUPPORT_UNICODE + /* TODO This entire block of code here appears to be unreachable!? I simply + can't see how it can be hit, given that the frontend parser doesn't emit + META_POSIX for GRAPH/PRINT/PUNCT when UCP is set. */ + if ((options & PCRE2_UCP) != 0 && + (xoptions & PCRE2_EXTRA_ASCII_POSIX) == 0) + { + uint32_t ptype; + + switch(posix_class) + { + case PC_GRAPH: + case PC_PRINT: + case PC_PUNCT: + ptype = (posix_class == PC_GRAPH)? PT_PXGRAPH : + (posix_class == PC_PRINT)? PT_PXPRINT : PT_PXPUNCT; + + PRIV(update_classbits)(ptype, 0, local_negate, classbits); + + if ((xclass_props & XCLASS_HIGH_ANY) == 0) + { + if (lengthptr != NULL) + *lengthptr += 3; + else + { + *class_uchardata++ = local_negate? XCL_NOTPROP : XCL_PROP; + *class_uchardata++ = (PCRE2_UCHAR)ptype; + *class_uchardata++ = 0; + } + xclass_props |= XCLASS_REQUIRED | XCLASS_HAS_PROPS; + } + continue; + + /* For the other POSIX classes (ex: ascii) we are going to + fall through to the non-UCP case and build a bit map for + characters with code points less than 256. However, if we are in + a negated POSIX class, characters with code points greater than + 255 must either all match or all not match, depending on whether + the whole class is not or is negated. For example, for + [[:^ascii:]... they must all match, whereas for [^[:^ascii:]... + they must not. + + In the special case where there are no xclass items, this is + automatically handled by the use of OP_CLASS or OP_NCLASS, but an + explicit range is needed for OP_XCLASS. Setting a flag here + causes the range to be generated later when it is known that + OP_XCLASS is required. In the 8-bit library this is relevant only in + utf mode, since no wide characters can exist otherwise. */ + + default: + break; + } + } +#endif /* SUPPORT_UNICODE */ + + /* In the non-UCP case, or when UCP makes no difference, we build the + bit map for the POSIX class in a chunk of local store because we may + be adding and subtracting from it, and we don't want to subtract bits + that may be in the main map already. At the end we or the result into + the bit map that is being built. */ + + posix_class *= 3; + + /* Copy in the first table (always present) */ + + memcpy(pbits.classbits, cbits + PRIV(posix_class_maps)[posix_class], 32); + + /* If there is a second table, add or remove it as required. */ + + taboffset = PRIV(posix_class_maps)[posix_class + 1]; + tabopt = PRIV(posix_class_maps)[posix_class + 2]; + + if (taboffset >= 0) + { + if (tabopt >= 0) + for (int i = 0; i < 32; i++) + pbits.classbits[i] |= cbits[i + taboffset]; + else + for (int i = 0; i < 32; i++) + pbits.classbits[i] &= (uint8_t)(~cbits[i + taboffset]); + } + + /* Now see if we need to remove any special characters. An option + value of 1 removes vertical space and 2 removes underscore. */ + + if (tabopt < 0) tabopt = -tabopt; + if (tabopt == 1) pbits.classbits[1] &= ~0x3c; + else if (tabopt == 2) pbits.classbits[11] &= 0x7f; + + /* Add the POSIX table or its complement into the main table that is + being built and we are done. */ + + { + uint32_t *classwords = cb->classbits.classwords; + + if (local_negate) + for (int i = 0; i < 8; i++) + classwords[i] |= (uint32_t)(~pbits.classwords[i]); + else + for (int i = 0; i < 8; i++) + classwords[i] |= pbits.classwords[i]; + } + +#ifdef SUPPORT_WIDE_CHARS + /* Every class contains at least one < 256 character. */ + xclass_props |= XCLASS_HAS_8BIT_CHARS; +#endif + continue; /* End of POSIX handling */ + + /* Other than POSIX classes, the only items we should encounter are + \d-type escapes and literal characters (possibly as ranges). */ + case META_BIGVALUE: + meta = *(pptr++); + break; + + case META_ESCAPE: + escape = META_DATA(meta); + + switch(escape) + { + case ESC_d: + for (int i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_digit]; + break; + + case ESC_D: + should_flip_negation = TRUE; + for (int i = 0; i < 32; i++) + classbits[i] |= (uint8_t)(~cbits[i+cbit_digit]); + break; + + case ESC_w: + for (int i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_word]; + break; + + case ESC_W: + should_flip_negation = TRUE; + for (int i = 0; i < 32; i++) + classbits[i] |= (uint8_t)(~cbits[i+cbit_word]); + break; + + /* Perl 5.004 onwards omitted VT from \s, but restored it at Perl + 5.18. Before PCRE 8.34, we had to preserve the VT bit if it was + previously set by something earlier in the character class. + Luckily, the value of CHAR_VT is 0x0b in both ASCII and EBCDIC, so + we could just adjust the appropriate bit. From PCRE 8.34 we no + longer treat \s and \S specially. */ + + case ESC_s: + for (int i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_space]; + break; + + case ESC_S: + should_flip_negation = TRUE; + for (int i = 0; i < 32; i++) + classbits[i] |= (uint8_t)(~cbits[i+cbit_space]); + break; + + /* When adding the horizontal or vertical space lists to a class, or + their complements, disable PCRE2_CASELESS, because it justs wastes + time, and in the "not-x" UTF cases can create unwanted duplicates in + the XCLASS list (provoked by characters that have more than one other + case and by both cases being in the same "not-x" sublist). */ + + case ESC_h: +#if PCRE2_CODE_UNIT_WIDTH == 8 +#ifdef SUPPORT_UNICODE + if (cranges != NULL) break; +#endif + add_list_to_class(options & ~PCRE2_CASELESS, xoptions, + cb, PRIV(hspace_list)); +#else + PCRE2_ASSERT(cranges != NULL); +#endif + break; + + case ESC_H: +#if PCRE2_CODE_UNIT_WIDTH == 8 +#ifdef SUPPORT_UNICODE + if (cranges != NULL) break; +#endif + add_not_list_to_class(options & ~PCRE2_CASELESS, xoptions, + cb, PRIV(hspace_list)); +#else + PCRE2_ASSERT(cranges != NULL); +#endif + break; + + case ESC_v: +#if PCRE2_CODE_UNIT_WIDTH == 8 +#ifdef SUPPORT_UNICODE + if (cranges != NULL) break; +#endif + add_list_to_class(options & ~PCRE2_CASELESS, xoptions, + cb, PRIV(vspace_list)); +#else + PCRE2_ASSERT(cranges != NULL); +#endif + break; + + case ESC_V: +#if PCRE2_CODE_UNIT_WIDTH == 8 +#ifdef SUPPORT_UNICODE + if (cranges != NULL) break; +#endif + add_not_list_to_class(options & ~PCRE2_CASELESS, xoptions, + cb, PRIV(vspace_list)); +#else + PCRE2_ASSERT(cranges != NULL); +#endif + break; + + /* If Unicode is not supported, \P and \p are not allowed and are + faulted at parse time, so will never appear here. */ + +#ifdef SUPPORT_UNICODE + case ESC_p: + case ESC_P: + { + uint32_t ptype = *pptr >> 16; + uint32_t pdata = *(pptr++) & 0xffff; + + /* The "Any" is processed by PRIV(update_classbits)(). */ + if (ptype == PT_ANY) + { +#if PCRE2_CODE_UNIT_WIDTH == 8 + if (!utf && escape == ESC_p) memset(classbits, 0xff, 32); +#endif + continue; + } + + PRIV(update_classbits)(ptype, pdata, (escape == ESC_P), classbits); + + if ((xclass_props & XCLASS_HIGH_ANY) == 0) + { + if (lengthptr != NULL) + *lengthptr += 3; + else + { + *class_uchardata++ = (escape == ESC_p)? XCL_PROP : XCL_NOTPROP; + *class_uchardata++ = ptype; + *class_uchardata++ = pdata; + } + xclass_props |= XCLASS_REQUIRED | XCLASS_HAS_PROPS; + } + } + continue; +#endif + } + +#ifdef SUPPORT_WIDE_CHARS + /* Every non-property class contains at least one < 256 character. */ + xclass_props |= XCLASS_HAS_8BIT_CHARS; +#endif + /* End handling \d-type escapes */ + continue; + + CLASS_END_CASES(meta) + /* Literals. */ + if (meta < META_END) break; + /* Non-literals: end of class contents. */ + goto END_PROCESSING; + } + + /* A literal character may be followed by a range meta. At parse time + there are checks for out-of-order characters, for ranges where the two + characters are equal, and for hyphens that cannot indicate a range. At + this point, therefore, no checking is needed. */ + + c = meta; + + /* Remember if \r or \n were explicitly used */ + + if (c == CHAR_CR || c == CHAR_NL) cb->external_flags |= PCRE2_HASCRORLF; + + /* Process a character range */ + + if (*pptr == META_RANGE_LITERAL || *pptr == META_RANGE_ESCAPED) + { + uint32_t d; + +#ifdef EBCDIC + BOOL range_is_literal = (*pptr == META_RANGE_LITERAL); +#endif + ++pptr; + d = *(pptr++); + if (d == META_BIGVALUE) d = *(pptr++); + + /* Remember an explicit \r or \n, and add the range to the class. */ + + if (d == CHAR_CR || d == CHAR_NL) cb->external_flags |= PCRE2_HASCRORLF; + +#if PCRE2_CODE_UNIT_WIDTH == 8 +#ifdef SUPPORT_UNICODE + if (cranges != NULL) continue; + xclass_props |= XCLASS_HAS_8BIT_CHARS; +#endif + + /* In an EBCDIC environment, Perl treats alphabetic ranges specially + because there are holes in the encoding, and simply using the range + A-Z (for example) would include the characters in the holes. This + applies only to literal ranges; [\xC1-\xE9] is different to [A-Z]. */ + +#ifdef EBCDIC + if (range_is_literal && + (cb->ctypes[c] & ctype_letter) != 0 && + (cb->ctypes[d] & ctype_letter) != 0 && + (c <= CHAR_z) == (d <= CHAR_z)) + { + uint32_t uc = (d <= CHAR_z)? 0 : 64; + uint32_t C = c - uc; + uint32_t D = d - uc; + + if (C <= CHAR_i) + { + add_to_class(options, xoptions, cb, C + uc, + ((D < CHAR_i)? D : CHAR_i) + uc); + C = CHAR_j; + } + + if (C <= D && C <= CHAR_r) + { + add_to_class(options, xoptions, cb, C + uc, + ((D < CHAR_r)? D : CHAR_r) + uc); + C = CHAR_s; + } + + if (C <= D) + add_to_class(options, xoptions, cb, C + uc, D + uc); + } + else +#endif + /* Not an EBCDIC special range */ + + add_to_class(options, xoptions, cb, c, d); +#else + PCRE2_ASSERT(cranges != NULL); +#endif + continue; + } /* End of range handling */ + + /* Character ranges are ignored when class_ranges is present. */ +#if PCRE2_CODE_UNIT_WIDTH == 8 +#ifdef SUPPORT_UNICODE + if (cranges != NULL) continue; + xclass_props |= XCLASS_HAS_8BIT_CHARS; +#endif + /* Handle a single character. */ + + add_to_class(options, xoptions, cb, meta, meta); +#else + PCRE2_ASSERT(cranges != NULL); +#endif + } /* End of main class-processing loop */ + +END_PROCESSING: + +#ifdef SUPPORT_WIDE_CHARS +PCRE2_ASSERT((xclass_props & XCLASS_HAS_PROPS) == 0 || + (xclass_props & XCLASS_HIGH_ANY) == 0); + +if (cranges != NULL) + { + uint32_t *range = (uint32_t*)(cranges + 1); + uint32_t *end = range + cranges->range_list_size; + + while (range < end && range[0] < 256) + { + PCRE2_ASSERT((xclass_props & XCLASS_HAS_8BIT_CHARS) != 0); + /* Add range to bitset. If we are in UTF or UCP mode, then clear the + caseless bit, because the cranges handle caselessness (only) in this + condition; see the condition for PARSE_CLASS_CASELESS_UTF in + compile_optimize_class(). */ + add_to_class(((options & (PCRE2_UTF|PCRE2_UCP)) != 0)? + (options & ~PCRE2_CASELESS) : options, xoptions, cb, range[0], range[1]); + + if (range[1] > 255) break; + range += 2; + } + + if (cranges->char_lists_size > 0) + { + /* The cranges structure is still used and freed later. */ + PCRE2_ASSERT((xclass_props & XCLASS_HIGH_ANY) == 0); + xclass_props |= XCLASS_REQUIRED | XCLASS_HAS_CHAR_LISTS; + } + else + { + if ((xclass_props & XCLASS_HIGH_ANY) != 0) + { + PCRE2_ASSERT(range + 2 == end && range[0] <= 256 && + range[1] >= GET_MAX_CHAR_VALUE(utf)); + should_flip_negation = TRUE; + range = end; + } + + while (range < end) + { + uint32_t range_start = range[0]; + uint32_t range_end = range[1]; + + range += 2; + xclass_props |= XCLASS_REQUIRED; + + if (range_start < 256) range_start = 256; + + if (lengthptr != NULL) + { +#ifdef SUPPORT_UNICODE + if (utf) + { + *lengthptr += 1; + + if (range_start < range_end) + *lengthptr += PRIV(ord2utf)(range_start, class_uchardata); + + *lengthptr += PRIV(ord2utf)(range_end, class_uchardata); + continue; + } +#endif /* SUPPORT_UNICODE */ + + *lengthptr += range_start < range_end ? 3 : 2; + continue; + } + +#ifdef SUPPORT_UNICODE + if (utf) + { + if (range_start < range_end) + { + *class_uchardata++ = XCL_RANGE; + class_uchardata += PRIV(ord2utf)(range_start, class_uchardata); + } + else + *class_uchardata++ = XCL_SINGLE; + + class_uchardata += PRIV(ord2utf)(range_end, class_uchardata); + continue; + } +#endif /* SUPPORT_UNICODE */ + + /* Without UTF support, character values are constrained + by the bit length, and can only be > 256 for 16-bit and + 32-bit libraries. */ +#if PCRE2_CODE_UNIT_WIDTH != 8 + if (range_start < range_end) + { + *class_uchardata++ = XCL_RANGE; + *class_uchardata++ = range_start; + } + else + *class_uchardata++ = XCL_SINGLE; + + *class_uchardata++ = range_end; +#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ + } + + if (lengthptr == NULL) + cb->cx->memctl.free(cranges, cb->cx->memctl.memory_data); + } + } +#endif /* SUPPORT_WIDE_CHARS */ + +/* If there are characters with values > 255, or Unicode property settings +(\p or \P), we have to compile an extended class, with its own opcode, +unless there were no property settings and there was a negated special such +as \S in the class, and PCRE2_UCP is not set, because in that case all +characters > 255 are in or not in the class, so any that were explicitly +given as well can be ignored. + +In the UCP case, if certain negated POSIX classes (ex: [:^ascii:]) were +were present in a class, we either have to match or not match all wide +characters (depending on whether the whole class is or is not negated). +This requirement is indicated by match_all_or_no_wide_chars being true. +We do this by including an explicit range, which works in both cases. +This applies only in UTF and 16-bit and 32-bit non-UTF modes, since there +cannot be any wide characters in 8-bit non-UTF mode. + +When there *are* properties in a positive UTF-8 or any 16-bit or 32_bit +class where \S etc is present without PCRE2_UCP, causing an extended class +to be compiled, we make sure that all characters > 255 are included by +forcing match_all_or_no_wide_chars to be true. + +If, when generating an xclass, there are no characters < 256, we can omit +the bitmap in the actual compiled code. */ + +#ifdef SUPPORT_WIDE_CHARS /* Defined for 16/32 bits, or 8-bit with Unicode */ +if ((xclass_props & XCLASS_REQUIRED) != 0) + { + PCRE2_UCHAR *previous = code; + + if ((xclass_props & XCLASS_HAS_CHAR_LISTS) == 0) + *class_uchardata++ = XCL_END; /* Marks the end of extra data */ + *code++ = OP_XCLASS; + code += LINK_SIZE; + *code = negate_class? XCL_NOT:0; + if ((xclass_props & XCLASS_HAS_PROPS) != 0) *code |= XCL_HASPROP; + + /* If the map is required, move up the extra data to make room for it; + otherwise just move the code pointer to the end of the extra data. */ + + if ((xclass_props & XCLASS_HAS_8BIT_CHARS) != 0 || has_bitmap != NULL) + { + if (negate_class) + { + uint32_t *classwords = cb->classbits.classwords; + for (int i = 0; i < 8; i++) classwords[i] = ~classwords[i]; + } + + if (has_bitmap == NULL) + { + *code++ |= XCL_MAP; + (void)memmove(code + (32 / sizeof(PCRE2_UCHAR)), code, + CU2BYTES(class_uchardata - code)); + memcpy(code, classbits, 32); + code = class_uchardata + (32 / sizeof(PCRE2_UCHAR)); + } + else + { + code = class_uchardata; + if ((xclass_props & XCLASS_HAS_8BIT_CHARS) != 0) + *has_bitmap = TRUE; + } + } + else code = class_uchardata; + + if ((xclass_props & XCLASS_HAS_CHAR_LISTS) != 0) + { + /* Char lists size is an even number, because all items are 16 or 32 + bit values. The character list data is always aligned to 32 bits. */ + size_t char_lists_size = cranges->char_lists_size; + PCRE2_ASSERT((char_lists_size & 0x1) == 0 && + (cb->char_lists_size & 0x3) == 0); + + if (lengthptr != NULL) + { + char_lists_size = CLIST_ALIGN_TO(char_lists_size, sizeof(uint32_t)); + +#if PCRE2_CODE_UNIT_WIDTH == 8 + *lengthptr += 2 + LINK_SIZE; +#else + *lengthptr += 1 + LINK_SIZE; +#endif + + cb->char_lists_size += char_lists_size; + + char_lists_size /= sizeof(PCRE2_UCHAR); + + /* Storage space for character lists is included + in the maximum pattern size. */ + if (*lengthptr > MAX_PATTERN_SIZE || + MAX_PATTERN_SIZE - *lengthptr < char_lists_size) + { + *errorcodeptr = ERR20; /* Pattern is too large */ + return NULL; + } + } + else + { + uint8_t *data; + + PCRE2_ASSERT(cranges->char_lists_types <= XCL_TYPE_MASK); +#if PCRE2_CODE_UNIT_WIDTH == 8 + /* Encode as high / low bytes. */ + code[0] = (uint8_t)(XCL_LIST | + (cranges->char_lists_types >> 8)); + code[1] = (uint8_t)cranges->char_lists_types; + code += 2; +#else + *code++ = (PCRE2_UCHAR)(XCL_LIST | cranges->char_lists_types); +#endif + + /* Character lists are stored in backwards direction from + byte code start. The non-dfa/dfa matchers can access these + lists using the byte code start stored in match blocks. + Each list is aligned to 32 bit with an optional unused + 16 bit value at the beginning of the character list. */ + + cb->char_lists_size += char_lists_size; + data = (uint8_t*)cb->start_code - cb->char_lists_size; + + memcpy(data, (uint8_t*)(cranges + 1) + cranges->char_lists_start, + char_lists_size); + + /* Since character lists total size is less than MAX_PATTERN_SIZE, + their starting offset fits into a value which size is LINK_SIZE. */ + + char_lists_size = cb->char_lists_size; + PUT(code, 0, (uint32_t)(char_lists_size >> 1)); + code += LINK_SIZE; + +#if defined PCRE2_DEBUG || defined SUPPORT_VALGRIND + if ((char_lists_size & 0x2) != 0) + { + /* In debug the unused 16 bit value is set + to a fixed value and marked unused. */ + ((uint16_t*)data)[-1] = 0x5555; +#ifdef SUPPORT_VALGRIND + VALGRIND_MAKE_MEM_NOACCESS(data - 2, 2); +#endif + } +#endif + + cb->char_lists_size = + CLIST_ALIGN_TO(char_lists_size, sizeof(uint32_t)); + + cb->cx->memctl.free(cranges, cb->cx->memctl.memory_data); + } + } + + /* Now fill in the complete length of the item */ + + PUT(previous, 1, (int)(code - previous)); + goto DONE; /* End of class handling */ + } +#endif /* SUPPORT_WIDE_CHARS */ + +/* If there are no characters > 255, or they are all to be included or +excluded, set the opcode to OP_CLASS or OP_NCLASS, depending on whether the +whole class was negated and whether there were negative specials such as \S +(non-UCP) in the class. Then copy the 32-byte map into the code vector, +negating it if necessary. */ + +if (negate_class) + { + uint32_t *classwords = cb->classbits.classwords; + + for (int i = 0; i < 8; i++) classwords[i] = ~classwords[i]; + } + +if ((SELECT_VALUE8(!utf, 0) || negate_class != should_flip_negation) && + cb->classbits.classwords[0] == ~(uint32_t)0) + { + const uint32_t *classwords = cb->classbits.classwords; + int i; + + for (i = 0; i < 8; i++) + if (classwords[i] != ~(uint32_t)0) break; + + if (i == 8) + { + *code++ = OP_ALLANY; + goto DONE; /* End of class handling */ + } + } + +*code++ = (negate_class == should_flip_negation) ? OP_CLASS : OP_NCLASS; +memcpy(code, classbits, 32); +code += 32 / sizeof(PCRE2_UCHAR); + +DONE: +*pcode = code; +return pptr - 1; +} + + + +/* ===================================================================*/ +/* Here follows a block of ECLASS-compiling functions. You may well want to +read them from top to bottom; they are ordered from leafmost (at the top) to +outermost parser (at the bottom of the file). */ + +/* This function folds one operand using the negation operator. +The new, combined chunk of stack code is written out to *pop_info. */ + +static void +fold_negation(eclass_op_info *pop_info, PCRE2_SIZE *lengthptr, + BOOL preserve_classbits) +{ +/* If the chunk of stack code is already composed of multiple ops, we won't +descend in and try and propagate the negation down the tree. (That would lead +to O(n^2) compile-time, which could be exploitable with a malicious regex - +although maybe that's not really too much of a worry in a library that offers +an exponential-time matching function!) */ + +if (pop_info->op_single_type == 0) + { + if (lengthptr != NULL) + *lengthptr += 1; + else + pop_info->code_start[pop_info->length] = ECL_NOT; + pop_info->length += 1; + } + +/* Otherwise, it's a nice single-op item, so we can easily fold in the negation +without needing to produce an ECL_NOT. */ + +else if (pop_info->op_single_type == ECL_ANY || + pop_info->op_single_type == ECL_NONE) + { + pop_info->op_single_type = (pop_info->op_single_type == ECL_NONE)? + ECL_ANY : ECL_NONE; + if (lengthptr == NULL) + *(pop_info->code_start) = pop_info->op_single_type; + } +else + { + PCRE2_ASSERT(pop_info->op_single_type == ECL_XCLASS && + pop_info->length >= 1 + LINK_SIZE + 1); + if (lengthptr == NULL) + pop_info->code_start[1 + LINK_SIZE] ^= XCL_NOT; + } + +if (!preserve_classbits) + { + for (int i = 0; i < 8; i++) + pop_info->bits.classwords[i] = ~pop_info->bits.classwords[i]; + } +} + + + +/* This function folds together two operands using a binary operator. +The new, combined chunk of stack code is written out to *lhs_op_info. */ + +static void +fold_binary(int op, eclass_op_info *lhs_op_info, eclass_op_info *rhs_op_info, + PCRE2_SIZE *lengthptr) +{ +switch (op) + { + /* ECL_AND truth table: + + LHS RHS RESULT + ---------------- + ANY * RHS + * ANY LHS + NONE * NONE + * NONE NONE + X Y X & Y + */ + + case ECL_AND: + if (rhs_op_info->op_single_type == ECL_ANY) + { + /* no-op: drop the RHS */ + } + else if (lhs_op_info->op_single_type == ECL_ANY) + { + /* no-op: drop the LHS, and memmove the RHS into its place */ + if (lengthptr == NULL) + memmove(lhs_op_info->code_start, rhs_op_info->code_start, + CU2BYTES(rhs_op_info->length)); + lhs_op_info->length = rhs_op_info->length; + lhs_op_info->op_single_type = rhs_op_info->op_single_type; + } + else if (rhs_op_info->op_single_type == ECL_NONE) + { + /* the result is ECL_NONE: write into the LHS */ + if (lengthptr == NULL) + lhs_op_info->code_start[0] = ECL_NONE; + lhs_op_info->length = 1; + lhs_op_info->op_single_type = ECL_NONE; + } + else if (lhs_op_info->op_single_type == ECL_NONE) + { + /* the result is ECL_NONE: drop the RHS */ + } + else + { + /* Both of LHS & RHS are either ECL_XCLASS, or compound operations. */ + if (lengthptr != NULL) + *lengthptr += 1; + else + { + PCRE2_ASSERT(rhs_op_info->code_start == + lhs_op_info->code_start + lhs_op_info->length); + rhs_op_info->code_start[rhs_op_info->length] = ECL_AND; + } + lhs_op_info->length += rhs_op_info->length + 1; + lhs_op_info->op_single_type = 0; + } + + for (int i = 0; i < 8; i++) + lhs_op_info->bits.classwords[i] &= rhs_op_info->bits.classwords[i]; + break; + + /* ECL_OR truth table: + + LHS RHS RESULT + ---------------- + ANY * ANY + * ANY ANY + NONE * RHS + * NONE LHS + X Y X | Y + */ + + case ECL_OR: + if (rhs_op_info->op_single_type == ECL_NONE) + { + /* no-op: drop the RHS */ + } + else if (lhs_op_info->op_single_type == ECL_NONE) + { + /* no-op: drop the LHS, and memmove the RHS into its place */ + if (lengthptr == NULL) + memmove(lhs_op_info->code_start, rhs_op_info->code_start, + CU2BYTES(rhs_op_info->length)); + lhs_op_info->length = rhs_op_info->length; + lhs_op_info->op_single_type = rhs_op_info->op_single_type; + } + else if (rhs_op_info->op_single_type == ECL_ANY) + { + /* the result is ECL_ANY: write into the LHS */ + if (lengthptr == NULL) + lhs_op_info->code_start[0] = ECL_ANY; + lhs_op_info->length = 1; + lhs_op_info->op_single_type = ECL_ANY; + } + else if (lhs_op_info->op_single_type == ECL_ANY) + { + /* the result is ECL_ANY: drop the RHS */ + } + else + { + /* Both of LHS & RHS are either ECL_XCLASS, or compound operations. */ + if (lengthptr != NULL) + *lengthptr += 1; + else + { + PCRE2_ASSERT(rhs_op_info->code_start == + lhs_op_info->code_start + lhs_op_info->length); + rhs_op_info->code_start[rhs_op_info->length] = ECL_OR; + } + lhs_op_info->length += rhs_op_info->length + 1; + lhs_op_info->op_single_type = 0; + } + + for (int i = 0; i < 8; i++) + lhs_op_info->bits.classwords[i] |= rhs_op_info->bits.classwords[i]; + break; + + /* ECL_XOR truth table: + + LHS RHS RESULT + ---------------- + ANY * !RHS + * ANY !LHS + NONE * RHS + * NONE LHS + X Y X ^ Y + */ + + case ECL_XOR: + if (rhs_op_info->op_single_type == ECL_NONE) + { + /* no-op: drop the RHS */ + } + else if (lhs_op_info->op_single_type == ECL_NONE) + { + /* no-op: drop the LHS, and memmove the RHS into its place */ + if (lengthptr == NULL) + memmove(lhs_op_info->code_start, rhs_op_info->code_start, + CU2BYTES(rhs_op_info->length)); + lhs_op_info->length = rhs_op_info->length; + lhs_op_info->op_single_type = rhs_op_info->op_single_type; + } + else if (rhs_op_info->op_single_type == ECL_ANY) + { + /* the result is !LHS: fold in the negation, and drop the RHS */ + /* Preserve the classbits, because we promise to deal with them later. */ + fold_negation(lhs_op_info, lengthptr, TRUE); + } + else if (lhs_op_info->op_single_type == ECL_ANY) + { + /* the result is !RHS: drop the LHS, memmove the RHS into its place, and + fold in the negation */ + if (lengthptr == NULL) + memmove(lhs_op_info->code_start, rhs_op_info->code_start, + CU2BYTES(rhs_op_info->length)); + lhs_op_info->length = rhs_op_info->length; + lhs_op_info->op_single_type = rhs_op_info->op_single_type; + + /* Preserve the classbits, because we promise to deal with them later. */ + fold_negation(lhs_op_info, lengthptr, TRUE); + } + else + { + /* Both of LHS & RHS are either ECL_XCLASS, or compound operations. */ + if (lengthptr != NULL) + *lengthptr += 1; + else + { + PCRE2_ASSERT(rhs_op_info->code_start == + lhs_op_info->code_start + lhs_op_info->length); + rhs_op_info->code_start[rhs_op_info->length] = ECL_XOR; + } + lhs_op_info->length += rhs_op_info->length + 1; + lhs_op_info->op_single_type = 0; + } + + for (int i = 0; i < 8; i++) + lhs_op_info->bits.classwords[i] ^= rhs_op_info->bits.classwords[i]; + break; + + default: + PCRE2_DEBUG_UNREACHABLE(); + break; + } +} + + + +static BOOL +compile_eclass_nested(eclass_context *context, BOOL negated, + uint32_t **pptr, PCRE2_UCHAR **pcode, + eclass_op_info *pop_info, PCRE2_SIZE *lengthptr); + +/* This function consumes a group of implicitly-unioned class elements. +These can be characters, ranges, properties, or nested classes, as long +as they are all joined by being placed adjacently. */ + +static BOOL +compile_class_operand(eclass_context *context, BOOL negated, + uint32_t **pptr, PCRE2_UCHAR **pcode, eclass_op_info *pop_info, + PCRE2_SIZE *lengthptr) +{ +uint32_t *ptr = *pptr; +uint32_t *prev_ptr; +PCRE2_UCHAR *code = *pcode; +PCRE2_UCHAR *code_start = code; +PCRE2_SIZE prev_length = (lengthptr != NULL)? *lengthptr : 0; +PCRE2_SIZE extra_length; +uint32_t meta = META_CODE(*ptr); + +switch (meta) + { + case META_CLASS_EMPTY_NOT: + case META_CLASS_EMPTY: + ++ptr; + pop_info->length = 1; + if ((meta == META_CLASS_EMPTY) == negated) + { + *code++ = pop_info->op_single_type = ECL_ANY; + memset(pop_info->bits.classbits, 0xff, 32); + } + else + { + *code++ = pop_info->op_single_type = ECL_NONE; + memset(pop_info->bits.classbits, 0, 32); + } + break; + + case META_CLASS: + case META_CLASS_NOT: + if ((*ptr & CLASS_IS_ECLASS) != 0) + { + if (!compile_eclass_nested(context, negated, &ptr, &code, + pop_info, lengthptr)) + return FALSE; + + PCRE2_ASSERT(*ptr == META_CLASS_END); + ptr++; + goto DONE; + } + + ptr++; + /* Fall through */ + + default: + /* Scan forward characters, ranges, and properties. + For example: inside [a-z_ -- m] we don't have brackets around "a-z_" but + we still need to collect that fragment up into a "leaf" OP_CLASS. */ + + prev_ptr = ptr; + ptr = PRIV(compile_class_not_nested)( + context->options, context->xoptions, ptr, &code, + (meta != META_CLASS_NOT) == negated, &context->needs_bitmap, + context->errorcodeptr, context->cb, lengthptr); + if (ptr == NULL) return FALSE; + + /* We must have a 100% guarantee that ptr increases when + compile_class_operand() returns, even on Release builds, so that we can + statically prove our loops terminate. */ + if (ptr <= prev_ptr) + { + PCRE2_DEBUG_UNREACHABLE(); + return FALSE; + } + + /* If we fell through above, consume the closing ']'. */ + if (meta == META_CLASS || meta == META_CLASS_NOT) + { + PCRE2_ASSERT(*ptr == META_CLASS_END); + ptr++; + } + + /* Regardless of whether (lengthptr == NULL), some data will still be written + out to *pcode, which we need: we have to peek at it, to transform the opcode + into the ECLASS version (since we need to hoist up the bitmaps). */ + PCRE2_ASSERT(code > code_start); + extra_length = (lengthptr != NULL)? *lengthptr - prev_length : 0; + + /* Easiest case: convert OP_ALLANY to ECL_ANY */ + + if (*code_start == OP_ALLANY) + { + PCRE2_ASSERT(code - code_start == 1 && extra_length == 0); + pop_info->length = 1; + *code_start = pop_info->op_single_type = ECL_ANY; + memset(pop_info->bits.classbits, 0xff, 32); + } + + /* For OP_CLASS and OP_NCLASS, we hoist out the bitmap and convert to + ECL_NONE / ECL_ANY respectively. */ + + else if (*code_start == OP_CLASS || *code_start == OP_NCLASS) + { + PCRE2_ASSERT(code - code_start == 1 + 32 / sizeof(PCRE2_UCHAR) && + extra_length == 0); + pop_info->length = 1; + *code_start = pop_info->op_single_type = + (*code_start == OP_CLASS)? ECL_NONE : ECL_ANY; + memcpy(pop_info->bits.classbits, code_start + 1, 32); + /* Rewind the code pointer, but make sure we adjust *lengthptr, because we + do need to reserve that space (even though we only use it temporarily). */ + if (lengthptr != NULL) + *lengthptr += code - (code_start + 1); + code = code_start + 1; + + if (!context->needs_bitmap && *code_start == ECL_NONE) + { + uint32_t *classwords = pop_info->bits.classwords; + + for (int i = 0; i < 8; i++) + if (classwords[i] != 0) + { + context->needs_bitmap = TRUE; + break; + } + } + else + context->needs_bitmap = TRUE; + } + + /* Finally, for OP_XCLASS we hoist out the bitmap (if any), and convert to + ECL_XCLASS. */ + + else + { + PCRE2_ASSERT(*code_start == OP_XCLASS); + *code_start = pop_info->op_single_type = ECL_XCLASS; + + PCRE2_ASSERT(code - code_start >= 1 + LINK_SIZE + 1); + + memcpy(pop_info->bits.classbits, context->cb->classbits.classbits, 32); + pop_info->length = (code - code_start) + extra_length; + } + + break; + } /* End of switch(meta) */ + +pop_info->code_start = (lengthptr == NULL)? code_start : NULL; + +if (lengthptr != NULL) + { + *lengthptr += code - code_start; + code = code_start; + } + +DONE: +PCRE2_ASSERT(lengthptr == NULL || (code == code_start)); + +*pptr = ptr; +*pcode = code; +return TRUE; +} + + + +/* This function consumes a group of implicitly-unioned class elements. +These can be characters, ranges, properties, or nested classes, as long +as they are all joined by being placed adjacently. */ + +static BOOL +compile_class_juxtaposition(eclass_context *context, BOOL negated, + uint32_t **pptr, PCRE2_UCHAR **pcode, eclass_op_info *pop_info, + PCRE2_SIZE *lengthptr) +{ +uint32_t *ptr = *pptr; +PCRE2_UCHAR *code = *pcode; +#ifdef PCRE2_DEBUG +PCRE2_UCHAR *start_code = *pcode; +#endif + +/* See compile_class_binary_loose() for comments on compile-time folding of +the "negated" flag. */ + +/* Because it's a non-empty class, there must be an operand at the start. */ +if (!compile_class_operand(context, negated, &ptr, &code, pop_info, lengthptr)) + return FALSE; + +while (*ptr != META_CLASS_END && + !(*ptr >= META_ECLASS_AND && *ptr <= META_ECLASS_NOT)) + { + uint32_t op; + BOOL rhs_negated; + eclass_op_info rhs_op_info; + + if (negated) + { + /* !(A juxtapose B) -> !A && !B */ + op = ECL_AND; + rhs_negated = TRUE; + } + else + { + /* A juxtapose B -> A || B */ + op = ECL_OR; + rhs_negated = FALSE; + } + + /* An operand must follow the operator. */ + if (!compile_class_operand(context, rhs_negated, &ptr, &code, + &rhs_op_info, lengthptr)) + return FALSE; + + /* Convert infix to postfix (RPN). */ + fold_binary(op, pop_info, &rhs_op_info, lengthptr); + if (lengthptr == NULL) + code = pop_info->code_start + pop_info->length; + } + +PCRE2_ASSERT(lengthptr == NULL || code == start_code); + +*pptr = ptr; +*pcode = code; +return TRUE; +} + + + +/* This function consumes unary prefix operators. */ + +static BOOL +compile_class_unary(eclass_context *context, BOOL negated, + uint32_t **pptr, PCRE2_UCHAR **pcode, eclass_op_info *pop_info, + PCRE2_SIZE *lengthptr) +{ +uint32_t *ptr = *pptr; +#ifdef PCRE2_DEBUG +PCRE2_UCHAR *start_code = *pcode; +#endif + +while (*ptr == META_ECLASS_NOT) + { + ++ptr; + negated = !negated; + } + +*pptr = ptr; +/* Because it's a non-empty class, there must be an operand. */ +if (!compile_class_juxtaposition(context, negated, pptr, pcode, + pop_info, lengthptr)) + return FALSE; + +PCRE2_ASSERT(lengthptr == NULL || *pcode == start_code); +return TRUE; +} + + + +/* This function consumes tightly-binding binary operators. */ + +static BOOL +compile_class_binary_tight(eclass_context *context, BOOL negated, + uint32_t **pptr, PCRE2_UCHAR **pcode, eclass_op_info *pop_info, + PCRE2_SIZE *lengthptr) +{ +uint32_t *ptr = *pptr; +PCRE2_UCHAR *code = *pcode; +#ifdef PCRE2_DEBUG +PCRE2_UCHAR *start_code = *pcode; +#endif + +/* See compile_class_binary_loose() for comments on compile-time folding of +the "negated" flag. */ + +/* Because it's a non-empty class, there must be an operand at the start. */ +if (!compile_class_unary(context, negated, &ptr, &code, pop_info, lengthptr)) + return FALSE; + +while (*ptr == META_ECLASS_AND) + { + uint32_t op; + BOOL rhs_negated; + eclass_op_info rhs_op_info; + + if (negated) + { + /* !(A && B) -> !A || !B */ + op = ECL_OR; + rhs_negated = TRUE; + } + else + { + /* A && B -> A && B */ + op = ECL_AND; + rhs_negated = FALSE; + } + + ++ptr; + + /* An operand must follow the operator. */ + if (!compile_class_unary(context, rhs_negated, &ptr, &code, + &rhs_op_info, lengthptr)) + return FALSE; + + /* Convert infix to postfix (RPN). */ + fold_binary(op, pop_info, &rhs_op_info, lengthptr); + if (lengthptr == NULL) + code = pop_info->code_start + pop_info->length; + } + +PCRE2_ASSERT(lengthptr == NULL || code == start_code); + +*pptr = ptr; +*pcode = code; +return TRUE; +} + + + +/* This function consumes loosely-binding binary operators. */ + +static BOOL +compile_class_binary_loose(eclass_context *context, BOOL negated, + uint32_t **pptr, PCRE2_UCHAR **pcode, eclass_op_info *pop_info, + PCRE2_SIZE *lengthptr) +{ +uint32_t *ptr = *pptr; +PCRE2_UCHAR *code = *pcode; +#ifdef PCRE2_DEBUG +PCRE2_UCHAR *start_code = *pcode; +#endif + +/* We really want to fold the negation operator, if at all possible, so that +simple cases can be reduced down. In particular, in 8-bit no-UTF mode, we want +to produce a fully-folded expression, so that we can guarantee not to emit any +OP_ECLASS codes (in the same way that we never emit OP_XCLASS in this mode). + +This has the consequence that with a little ingenuity, we can in fact avoid +emitting (nearly...) all cases of the "NOT" operator. Imagine that we have: + !(A ... +We have parsed the preceding "!", and we are about to parse the "A" operand. We +don't know yet whether there will even be a following binary operand! Both of +these are possibilities for what follows: + !(A && B) + !(A) +However, we can still fold the "!" into the "A" operand, because no matter what +the following binary operator will be, we can produce an expression which is +equivalent. */ + +/* Because it's a non-empty class, there must be an operand at the start. */ +if (!compile_class_binary_tight(context, negated, &ptr, &code, + pop_info, lengthptr)) + return FALSE; + +while (*ptr >= META_ECLASS_OR && *ptr <= META_ECLASS_XOR) + { + uint32_t op; + BOOL op_neg; + BOOL rhs_negated; + eclass_op_info rhs_op_info; + + if (negated) + { + /* The whole expression is being negated; we respond by unconditionally + negating the LHS A, before seeing what follows. And hooray! We can recover, + no matter what follows. */ + /* !(A || B) -> !A && !B */ + /* !(A -- B) -> !(A && !B) -> !A || B */ + /* !(A XOR B) -> !(!A XOR !B) -> !A XNOR !B */ + op = (*ptr == META_ECLASS_OR )? ECL_AND : + (*ptr == META_ECLASS_SUB)? ECL_OR : + /*ptr == META_ECLASS_XOR*/ ECL_XOR; + op_neg = (*ptr == META_ECLASS_XOR); + rhs_negated = *ptr != META_ECLASS_SUB; + } + else + { + /* A || B -> A || B */ + /* A -- B -> A && !B */ + /* A XOR B -> A XOR B */ + op = (*ptr == META_ECLASS_OR )? ECL_OR : + (*ptr == META_ECLASS_SUB)? ECL_AND : + /*ptr == META_ECLASS_XOR*/ ECL_XOR; + op_neg = FALSE; + rhs_negated = *ptr == META_ECLASS_SUB; + } + + ++ptr; + + /* An operand must follow the operator. */ + if (!compile_class_binary_tight(context, rhs_negated, &ptr, &code, + &rhs_op_info, lengthptr)) + return FALSE; + + /* Convert infix to postfix (RPN). */ + fold_binary(op, pop_info, &rhs_op_info, lengthptr); + if (op_neg) fold_negation(pop_info, lengthptr, FALSE); + if (lengthptr == NULL) + code = pop_info->code_start + pop_info->length; + } + +PCRE2_ASSERT(lengthptr == NULL || code == start_code); + +*pptr = ptr; +*pcode = code; +return TRUE; +} + + + +/* This function converts the META codes in pptr into opcodes written to +pcode. The pptr must start at a META_CLASS or META_CLASS_NOT. + +The class is compiled as a left-associative sequence of operator +applications. + +The pptr will be left pointing at the matching META_CLASS_END. */ + +static BOOL +compile_eclass_nested(eclass_context *context, BOOL negated, + uint32_t **pptr, PCRE2_UCHAR **pcode, + eclass_op_info *pop_info, PCRE2_SIZE *lengthptr) +{ +uint32_t *ptr = *pptr; +#ifdef PCRE2_DEBUG +PCRE2_UCHAR *start_code = *pcode; +#endif + +/* The CLASS_IS_ECLASS bit must be set since it is a nested class. */ +PCRE2_ASSERT(*ptr == (META_CLASS | CLASS_IS_ECLASS) || + *ptr == (META_CLASS_NOT | CLASS_IS_ECLASS)); + +if (*ptr++ == (META_CLASS_NOT | CLASS_IS_ECLASS)) + negated = !negated; + +(*pptr)++; + +/* Because it's a non-empty class, there must be an operand at the start. */ +if (!compile_class_binary_loose(context, negated, pptr, pcode, + pop_info, lengthptr)) + return FALSE; + +PCRE2_ASSERT(**pptr == META_CLASS_END); +PCRE2_ASSERT(lengthptr == NULL || *pcode == start_code); +return TRUE; +} + +BOOL +PRIV(compile_class_nested)(uint32_t options, uint32_t xoptions, + uint32_t **pptr, PCRE2_UCHAR **pcode, int *errorcodeptr, + compile_block *cb, PCRE2_SIZE *lengthptr) +{ +eclass_context context; +eclass_op_info op_info; +PCRE2_SIZE previous_length = (lengthptr != NULL)? *lengthptr : 0; +PCRE2_UCHAR *code = *pcode; +PCRE2_UCHAR *previous; +BOOL allbitsone = TRUE; + +context.needs_bitmap = FALSE; +context.options = options; +context.xoptions = xoptions; +context.errorcodeptr = errorcodeptr; +context.cb = cb; + +previous = code; +*code++ = OP_ECLASS; +code += LINK_SIZE; +*code++ = 0; /* Flags, currently zero. */ +if (!compile_eclass_nested(&context, FALSE, pptr, &code, &op_info, lengthptr)) + return FALSE; + +if (lengthptr != NULL) + { + *lengthptr += code - previous; + code = previous; + /* (*lengthptr - previous_length) now holds the amount of buffer that + we require to make the call to compile_class_nested() with + lengthptr = NULL, and including the (1+LINK_SIZE+1) that we write out + before that call. */ + } + +/* Do some useful counting of what's in the bitmap. */ +for (int i = 0; i < 8; i++) + if (op_info.bits.classwords[i] != 0xffffffff) + { + allbitsone = FALSE; + break; + } + +/* After constant-folding the extended class syntax, it may turn out to be +a simple class after all. In that case, we can unwrap it from the +OP_ECLASS container - and in fact, we must do so, because in 8-bit +no-Unicode mode the matcher is compiled without support for OP_ECLASS. */ + +#ifndef SUPPORT_WIDE_CHARS +PCRE2_ASSERT(op_info.op_single_type != 0); +#else +if (op_info.op_single_type != 0) +#endif + { + /* Rewind back over the OP_ECLASS. */ + code = previous; + + /* If the bits are all ones, and the "high characters" are all matched + too, we use a special-cased encoding of OP_ALLANY. */ + + if (op_info.op_single_type == ECL_ANY && allbitsone) + { + /* Advancing code means rewinding lengthptr, at this point. */ + if (lengthptr != NULL) *lengthptr -= 1; + *code++ = OP_ALLANY; + } + + /* If the high bits are all matched / all not-matched, then we emit an + OP_NCLASS/OP_CLASS respectively. */ + + else if (op_info.op_single_type == ECL_ANY || + op_info.op_single_type == ECL_NONE) + { + PCRE2_SIZE required_len = 1 + (32 / sizeof(PCRE2_UCHAR)); + + if (lengthptr != NULL) + { + if (required_len > (*lengthptr - previous_length)) + *lengthptr = previous_length + required_len; + } + + /* Advancing code means rewinding lengthptr, at this point. */ + if (lengthptr != NULL) *lengthptr -= required_len; + *code++ = (op_info.op_single_type == ECL_ANY)? OP_NCLASS : OP_CLASS; + memcpy(code, op_info.bits.classbits, 32); + code += 32 / sizeof(PCRE2_UCHAR); + } + + /* Otherwise, we have an ECL_XCLASS, so we have the OP_XCLASS data + there, but, we pulled out its bitmap into op_info, so now we have to + put that back into the OP_XCLASS. */ + + else + { +#ifndef SUPPORT_WIDE_CHARS + PCRE2_DEBUG_UNREACHABLE(); +#else + BOOL need_map = context.needs_bitmap; + PCRE2_SIZE required_len; + + PCRE2_ASSERT(op_info.op_single_type == ECL_XCLASS); + required_len = op_info.length + (need_map? 32/sizeof(PCRE2_UCHAR) : 0); + + if (lengthptr != NULL) + { + /* Don't unconditionally request all the space we need - we may + already have asked for more during processing of the ECLASS. */ + if (required_len > (*lengthptr - previous_length)) + *lengthptr = previous_length + required_len; + + /* The code we write out here won't be ignored, even during the + (lengthptr != NULL) phase, because if there's a following quantifier + it will peek backwards. So we do have to write out a (truncated) + OP_XCLASS, even on this branch. */ + *lengthptr -= 1 + LINK_SIZE + 1; + *code++ = OP_XCLASS; + PUT(code, 0, 1 + LINK_SIZE + 1); + code += LINK_SIZE; + *code++ = 0; + } + else + { + PCRE2_UCHAR *rest; + PCRE2_SIZE rest_len; + PCRE2_UCHAR flags; + + /* 1 unit: OP_XCLASS | LINK_SIZE units | 1 unit: flags | ...rest */ + PCRE2_ASSERT(op_info.length >= 1 + LINK_SIZE + 1); + rest = op_info.code_start + 1 + LINK_SIZE + 1; + rest_len = (op_info.code_start + op_info.length) - rest; + + /* First read any data we use, before memmove splats it. */ + flags = op_info.code_start[1 + LINK_SIZE]; + PCRE2_ASSERT((flags & XCL_MAP) == 0); + + /* Next do the memmove before any writes. */ + memmove(code + 1 + LINK_SIZE + 1 + (need_map? 32/sizeof(PCRE2_UCHAR) : 0), + rest, CU2BYTES(rest_len)); + + /* Finally write the header data. */ + *code++ = OP_XCLASS; + PUT(code, 0, (int)required_len); + code += LINK_SIZE; + *code++ = flags | (need_map? XCL_MAP : 0); + if (need_map) + { + memcpy(code, op_info.bits.classbits, 32); + code += 32 / sizeof(PCRE2_UCHAR); + } + code += rest_len; + } +#endif /* SUPPORT_WIDE_CHARS */ + } + } + +/* Otherwise, we're going to keep the OP_ECLASS. However, again we need +to do some adjustment to insert the bitmap if we have one. */ + +#ifdef SUPPORT_WIDE_CHARS +else + { + BOOL need_map = context.needs_bitmap; + PCRE2_SIZE required_len = + 1 + LINK_SIZE + 1 + (need_map? 32/sizeof(PCRE2_UCHAR) : 0) + op_info.length; + + if (lengthptr != NULL) + { + if (required_len > (*lengthptr - previous_length)) + *lengthptr = previous_length + required_len; + + /* As for the XCLASS branch above, we do have to write out a dummy + OP_ECLASS, because of the backwards peek by the quantifier code. Write + out a (truncated) OP_ECLASS, even on this branch. */ + *lengthptr -= 1 + LINK_SIZE + 1; + *code++ = OP_ECLASS; + PUT(code, 0, 1 + LINK_SIZE + 1); + code += LINK_SIZE; + *code++ = 0; + } + else + { + if (need_map) + { + PCRE2_UCHAR *map_start = previous + 1 + LINK_SIZE + 1; + previous[1 + LINK_SIZE] |= ECL_MAP; + memmove(map_start + 32/sizeof(PCRE2_UCHAR), map_start, + CU2BYTES(code - map_start)); + memcpy(map_start, op_info.bits.classbits, 32); + code += 32 / sizeof(PCRE2_UCHAR); + } + PUT(previous, 1, (int)(code - previous)); + } + } +#endif /* SUPPORT_WIDE_CHARS */ + +*pcode = code; +return TRUE; +} + +/* End of pcre2_compile_class.c */ diff --git a/libpcre/src/pcre2_config.c b/libpcre/src/pcre2_config.c index 5ef103caf..031981b09 100644 --- a/libpcre/src/pcre2_config.c +++ b/libpcre/src/pcre2_config.c @@ -224,8 +224,8 @@ switch (what) XSTRING when PCRE2_PRERELEASE is not empty, an unwanted space is inserted. There are problems using an "obvious" approach like this: - XSTRING(PCRE2_MAJOR) "." XSTRING(PCRE_MINOR) - XSTRING(PCRE2_PRERELEASE) " " XSTRING(PCRE_DATE) + XSTRING(PCRE2_MAJOR) "." XSTRING(PCRE2_MINOR) + XSTRING(PCRE2_PRERELEASE) " " XSTRING(PCRE2_DATE) because, when PCRE2_PRERELEASE is empty, this leads to an attempted expansion of STRING(). The C standard states: "If (before argument substitution) any diff --git a/libpcre/src/pcre2_context.c b/libpcre/src/pcre2_context.c index 0bc2ea0b0..2345145d3 100644 --- a/libpcre/src/pcre2_context.c +++ b/libpcre/src/pcre2_context.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2023 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -130,17 +130,19 @@ return gcontext; /* A default compile context is set up to save having to initialize at run time when no context is supplied to the compile function. */ -const pcre2_compile_context PRIV(default_compile_context) = { +pcre2_compile_context PRIV(default_compile_context) = { { default_malloc, default_free, NULL }, /* Default memory handling */ NULL, /* Stack guard */ NULL, /* Stack guard data */ PRIV(default_tables), /* Character tables */ PCRE2_UNSET, /* Max pattern length */ + PCRE2_UNSET, /* Max pattern compiled length */ BSR_DEFAULT, /* Backslash R default */ NEWLINE_DEFAULT, /* Newline convention */ PARENS_NEST_LIMIT, /* As it says */ 0, /* Extra options */ - MAX_VARLOOKBEHIND /* As it says */ + MAX_VARLOOKBEHIND, /* As it says */ + PCRE2_OPTIMIZATION_ALL /* All optimizations enabled */ }; /* The create function copies the default into the new memory, but must @@ -162,7 +164,7 @@ return ccontext; /* A default match context is set up to save having to initialize at run time when no context is supplied to a match function. */ -const pcre2_match_context PRIV(default_match_context) = { +pcre2_match_context PRIV(default_match_context) = { { default_malloc, default_free, NULL }, #ifdef SUPPORT_JIT NULL, /* JIT callback */ @@ -172,6 +174,8 @@ const pcre2_match_context PRIV(default_match_context) = { NULL, /* Callout data */ NULL, /* Substitute callout function */ NULL, /* Substitute callout data */ + NULL, /* Substitute case callout function */ + NULL, /* Substitute case callout data */ PCRE2_UNSET, /* Offset limit */ HEAP_LIMIT, MATCH_LIMIT, @@ -196,7 +200,7 @@ return mcontext; /* A default convert context is set up to save having to initialize at run time when no context is supplied to the convert function. */ -const pcre2_convert_context PRIV(default_convert_context) = { +pcre2_convert_context PRIV(default_convert_context) = { { default_malloc, default_free, NULL }, /* Default memory handling */ #ifdef _WIN32 CHAR_BACKSLASH, /* Default path separator */ @@ -352,6 +356,13 @@ ccontext->max_pattern_length = length; return 0; } +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_set_max_pattern_compiled_length(pcre2_compile_context *ccontext, PCRE2_SIZE length) +{ +ccontext->max_pattern_compiled_length = length; +return 0; +} + PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION pcre2_set_newline(pcre2_compile_context *ccontext, uint32_t newline) { @@ -401,6 +412,38 @@ ccontext->stack_guard_data = user_data; return 0; } +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_set_optimize(pcre2_compile_context *ccontext, uint32_t directive) +{ +if (ccontext == NULL) + return PCRE2_ERROR_NULL; + +switch (directive) + { + case PCRE2_OPTIMIZATION_NONE: + ccontext->optimization_flags = 0; + break; + + case PCRE2_OPTIMIZATION_FULL: + ccontext->optimization_flags = PCRE2_OPTIMIZATION_ALL; + break; + + default: + if (directive >= PCRE2_AUTO_POSSESS && directive <= PCRE2_START_OPTIMIZE_OFF) + { + /* Even directive numbers starting from 64 switch a bit on; + * Odd directive numbers starting from 65 switch a bit off */ + if ((directive & 1) != 0) + ccontext->optimization_flags &= ~(1u << ((directive >> 1) - 32)); + else + ccontext->optimization_flags |= 1u << ((directive >> 1) - 32); + return 0; + } + return PCRE2_ERROR_BADOPTION; + } + +return 0; +} /* ------------ Match context ------------ */ @@ -416,13 +459,24 @@ return 0; PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION pcre2_set_substitute_callout(pcre2_match_context *mcontext, int (*substitute_callout)(pcre2_substitute_callout_block *, void *), - void *substitute_callout_data) + void *substitute_callout_data) { mcontext->substitute_callout = substitute_callout; mcontext->substitute_callout_data = substitute_callout_data; return 0; } +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_set_substitute_case_callout(pcre2_match_context *mcontext, + PCRE2_SIZE (*substitute_case_callout)(PCRE2_SPTR, PCRE2_SIZE, PCRE2_UCHAR *, + PCRE2_SIZE, int, void *), + void *substitute_case_callout_data) +{ +mcontext->substitute_case_callout = substitute_case_callout; +mcontext->substitute_case_callout_data = substitute_case_callout_data; +return 0; +} + PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION pcre2_set_heap_limit(pcre2_match_context *mcontext, uint32_t limit) { diff --git a/libpcre/src/pcre2_convert.c b/libpcre/src/pcre2_convert.c index fe396ae4f..d2b238ca4 100644 --- a/libpcre/src/pcre2_convert.c +++ b/libpcre/src/pcre2_convert.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2022 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -74,7 +74,7 @@ enum { POSIX_START_REGEX, POSIX_ANCHORED, POSIX_NOT_BRACKET, #define PUTCHARS(string) \ { \ - for (s = (char *)(string); *s != 0; s++) \ + for (const char *s = string; *s != 0; s++) \ { \ if (p >= endp) return PCRE2_ERROR_NOMEMORY; \ *p++ = *s; \ @@ -125,7 +125,6 @@ convert_posix(uint32_t pattype, PCRE2_SPTR pattern, PCRE2_SIZE plength, BOOL utf, PCRE2_UCHAR *use_buffer, PCRE2_SIZE use_length, PCRE2_SIZE *bufflenptr, BOOL dummyrun, pcre2_convert_context *ccontext) { -char *s; PCRE2_SPTR posix = pattern; PCRE2_UCHAR *p = use_buffer; PCRE2_UCHAR *pp = p; @@ -1065,7 +1064,7 @@ pcre2_pattern_convert(PCRE2_SPTR pattern, PCRE2_SIZE plength, uint32_t options, PCRE2_UCHAR **buffptr, PCRE2_SIZE *bufflenptr, pcre2_convert_context *ccontext) { -int i, rc; +int rc; PCRE2_UCHAR dummy_buffer[DUMMY_BUFFER_SIZE]; PCRE2_UCHAR *use_buffer = dummy_buffer; PCRE2_SIZE use_length = DUMMY_BUFFER_SIZE; @@ -1119,7 +1118,7 @@ if (buffptr != NULL && *buffptr != NULL) /* Call an individual converter, either just once (if a buffer was provided or just the length is needed), or twice (if a memory allocation is required). */ -for (i = 0; i < 2; i++) +for (int i = 0; i < 2; i++) { PCRE2_UCHAR *allocated; BOOL dummyrun = buffptr == NULL || *buffptr == NULL; @@ -1138,8 +1137,7 @@ for (i = 0; i < 2; i++) break; default: - *bufflenptr = 0; /* Error offset */ - return PCRE2_ERROR_INTERNAL; + goto EXIT; } if (rc != 0 || /* Error */ @@ -1159,8 +1157,12 @@ for (i = 0; i < 2; i++) use_length = *bufflenptr + 1; } -/* Control should never get here. */ +/* Something went terribly wrong. Trigger an assert and return an error */ +PCRE2_DEBUG_UNREACHABLE(); +EXIT: + +*bufflenptr = 0; /* Error offset */ return PCRE2_ERROR_INTERNAL; } diff --git a/libpcre/src/pcre2_dfa_match.c b/libpcre/src/pcre2_dfa_match.c index caae65248..ebf31d284 100644 --- a/libpcre/src/pcre2_dfa_match.c +++ b/libpcre/src/pcre2_dfa_match.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2023 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -156,6 +156,7 @@ static const uint8_t coptable[] = { 0, /* CLASS */ 0, /* NCLASS */ 0, /* XCLASS - variable length */ + 0, /* ECLASS - variable length */ 0, /* REF */ 0, /* REFI */ 0, /* DNREF */ @@ -175,6 +176,7 @@ static const uint8_t coptable[] = { 0, /* Assert behind not */ 0, /* NA assert */ 0, /* NA assert behind */ + 0, /* Assert scan substring */ 0, /* ONCE */ 0, /* SCRIPT_RUN */ 0, 0, 0, 0, 0, /* BRA, BRAPOS, CBRA, CBRAPOS, COND */ @@ -188,7 +190,7 @@ static const uint8_t coptable[] = { 0, 0, /* COMMIT, COMMIT_ARG */ 0, 0, 0, /* FAIL, ACCEPT, ASSERT_ACCEPT */ 0, 0, 0, /* CLOSE, SKIPZERO, DEFINE */ - 0, 0 /* \B and \b in UCP mode */ + 0, 0, /* \B and \b in UCP mode */ }; /* This table identifies those opcodes that inspect a character. It is used to @@ -234,6 +236,7 @@ static const uint8_t poptable[] = { 1, /* CLASS */ 1, /* NCLASS */ 1, /* XCLASS - variable length */ + 1, /* ECLASS - variable length */ 0, /* REF */ 0, /* REFI */ 0, /* DNREF */ @@ -253,6 +256,7 @@ static const uint8_t poptable[] = { 0, /* Assert behind not */ 0, /* NA assert */ 0, /* NA assert behind */ + 0, /* Assert scan substring */ 0, /* ONCE */ 0, /* SCRIPT_RUN */ 0, 0, 0, 0, 0, /* BRA, BRAPOS, CBRA, CBRAPOS, COND */ @@ -266,9 +270,13 @@ static const uint8_t poptable[] = { 0, 0, /* COMMIT, COMMIT_ARG */ 0, 0, 0, /* FAIL, ACCEPT, ASSERT_ACCEPT */ 0, 0, 0, /* CLOSE, SKIPZERO, DEFINE */ - 1, 1 /* \B and \b in UCP mode */ + 1, 1, /* \B and \b in UCP mode */ }; +/* Compile-time check that these tables have the correct size. */ +STATIC_ASSERT(sizeof(coptable) == OP_TABLE_LENGTH, coptable); +STATIC_ASSERT(sizeof(poptable) == OP_TABLE_LENGTH, poptable); + /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W, and \w */ @@ -695,7 +703,6 @@ for (;;) int i, j; int clen, dlen; uint32_t c, d; - int forced_fail = 0; BOOL partial_newline = FALSE; BOOL could_continue = reset_could_continue; reset_could_continue = FALSE; @@ -841,19 +848,6 @@ for (;;) switch (codevalue) { -/* ========================================================================== */ - /* These cases are never obeyed. This is a fudge that causes a compile- - time error if the vectors coptable or poptable, which are indexed by - opcode, are not the correct length. It seems to be the only way to do - such a check at compile time, as the sizeof() operator does not work - in the C preprocessor. */ - - case OP_TABLE_LENGTH: - case OP_TABLE_LENGTH + - ((sizeof(coptable) == OP_TABLE_LENGTH) && - (sizeof(poptable) == OP_TABLE_LENGTH)): - return 0; - /* ========================================================================== */ /* Reached a closing bracket. If not at the end of the pattern, carry on with the next opcode. For repeating opcodes, also add the repeat @@ -1179,10 +1173,6 @@ for (;;) const ucd_record * prop = GET_UCD(c); switch(code[1]) { - case PT_ANY: - OK = TRUE; - break; - case PT_LAMP: chartype = prop->chartype; OK = chartype == ucp_Lu || chartype == ucp_Ll || @@ -1462,10 +1452,6 @@ for (;;) const ucd_record * prop = GET_UCD(c); switch(code[2]) { - case PT_ANY: - OK = TRUE; - break; - case PT_LAMP: chartype = prop->chartype; OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt; @@ -1727,10 +1713,6 @@ for (;;) const ucd_record * prop = GET_UCD(c); switch(code[2]) { - case PT_ANY: - OK = TRUE; - break; - case PT_LAMP: chartype = prop->chartype; OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt; @@ -2017,10 +1999,6 @@ for (;;) const ucd_record * prop = GET_UCD(c); switch(code[1 + IMM2_SIZE + 1]) { - case PT_ANY: - OK = TRUE; - break; - case PT_LAMP: chartype = prop->chartype; OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt; @@ -2663,35 +2641,54 @@ for (;;) case OP_CLASS: case OP_NCLASS: +#ifdef SUPPORT_WIDE_CHARS case OP_XCLASS: + case OP_ECLASS: +#endif { BOOL isinclass = FALSE; int next_state_offset; PCRE2_SPTR ecode; +#ifdef SUPPORT_WIDE_CHARS + /* An extended class may have a table or a list of single characters, + ranges, or both, and it may be positive or negative. There's a + function that sorts all this out. */ + + if (codevalue == OP_XCLASS) + { + ecode = code + GET(code, 1); + if (clen > 0) + isinclass = PRIV(xclass)(c, code + 1 + LINK_SIZE, + (const uint8_t*)mb->start_code, utf); + } + + /* A nested set-based class has internal opcodes for performing + set operations. */ + + else if (codevalue == OP_ECLASS) + { + ecode = code + GET(code, 1); + if (clen > 0) + isinclass = PRIV(eclass)(c, code + 1 + LINK_SIZE, ecode, + (const uint8_t*)mb->start_code, utf); + } + + else +#endif /* SUPPORT_WIDE_CHARS */ + /* For a simple class, there is always just a 32-byte table, and we can set isinclass from it. */ - if (codevalue != OP_XCLASS) { ecode = code + 1 + (32 / sizeof(PCRE2_UCHAR)); if (clen > 0) { isinclass = (c > 255)? (codevalue == OP_NCLASS) : - ((((uint8_t *)(code + 1))[c/8] & (1u << (c&7))) != 0); + ((((const uint8_t *)(code + 1))[c/8] & (1u << (c&7))) != 0); } } - /* An extended class may have a table or a list of single characters, - ranges, or both, and it may be positive or negative. There's a - function that sorts all this out. */ - - else - { - ecode = code + GET(code, 1); - if (clen > 0) isinclass = PRIV(xclass)(c, code + 1 + LINK_SIZE, utf); - } - /* At this point, isinclass is set for all kinds of class, and ecode points to the byte after the end of the class. If there is a quantifier, this is where it will be. */ @@ -2784,7 +2781,6 @@ for (;;) though the other "backtracking verbs" are not supported. */ case OP_FAIL: - forced_fail++; /* Count FAILs for multiple states */ break; case OP_ASSERT: @@ -3058,7 +3054,7 @@ for (;;) if (codevalue == OP_BRAPOSZERO) { allow_zero = TRUE; - codevalue = *(++code); /* Codevalue will be one of above BRAs */ + ++code; /* The following opcode will be one of the above BRAs */ } else allow_zero = FALSE; @@ -3271,18 +3267,12 @@ for (;;) matches that we are going to find. If partial matching has been requested, check for appropriate conditions. - The "forced_ fail" variable counts the number of (*F) encountered for the - character. If it is equal to the original active_count (saved in - workspace[1]) it means that (*F) was found on every active state. In this - case we don't want to give a partial match. - The "could_continue" variable is true if a state could have continued but for the fact that the end of the subject was reached. */ if (new_count <= 0) { if (could_continue && /* Some could go on, and */ - forced_fail != workspace[1] && /* Not all forced fail & */ ( /* either... */ (mb->moptions & PCRE2_PARTIAL_HARD) != 0 /* Hard partial */ || /* or... */ @@ -3438,7 +3428,7 @@ if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8) /* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the options variable for this function. Users of PCRE2 who are not calling the function directly would like to have a way of setting these flags, in the same -way that they can set pcre2_compile() flags like PCRE2_NO_AUTOPOSSESS with +way that they can set pcre2_compile() flags like PCRE2_NO_AUTO_POSSESS with constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and (*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which can now be transferred to the options for this function. The bits are guaranteed to be @@ -3528,8 +3518,7 @@ if (mb->match_limit_depth > re->limit_depth) if (mb->heap_limit > re->limit_heap) mb->heap_limit = re->limit_heap; -mb->start_code = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code)) + - re->name_count * re->name_entry_size; +mb->start_code = (PCRE2_SPTR)((const uint8_t *)re + re->code_start); mb->tables = re->tables; mb->start_subject = subject; mb->end_subject = end_subject; @@ -3576,7 +3565,9 @@ switch(re->newline_convention) mb->nltype = NLTYPE_ANYCRLF; break; - default: return PCRE2_ERROR_INTERNAL; + default: + PCRE2_DEBUG_UNREACHABLE(); + return PCRE2_ERROR_INTERNAL; } /* Check a UTF string for validity if required. For 8-bit and 16-bit strings, @@ -3705,7 +3696,7 @@ for (;;) these, for testing and for ensuring that all callouts do actually occur. The optimizations must also be avoided when restarting a DFA match. */ - if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 && + if ((re->optimization_flags & PCRE2_OPTIM_START_OPTIMIZE) != 0 && (options & PCRE2_DFA_RESTART) == 0) { /* If firstline is TRUE, the start of the match is constrained to the first diff --git a/libpcre/src/pcre2_error.c b/libpcre/src/pcre2_error.c index 1569f6315..8b7423c6c 100644 --- a/libpcre/src/pcre2_error.c +++ b/libpcre/src/pcre2_error.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2023 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -96,7 +96,7 @@ static const unsigned char compile_error_texts[] = "length of lookbehind assertion is not limited\0" "a relative value of zero is not allowed\0" "conditional subpattern contains more than two branches\0" - "assertion expected after (?( or (?(?C)\0" + "atomic assertion expected after (?( or (?(?C)\0" "digit expected after (?+ or (?-\0" /* 30 */ "unknown POSIX class name\0" @@ -161,7 +161,7 @@ static const unsigned char compile_error_texts[] = "using UCP is disabled by the application\0" "name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0" "character code point value in \\u.... sequence is too large\0" - "digits missing in \\x{} or \\o{} or \\N{U+}\0" + "digits missing after \\x or in \\x{} or \\o{} or \\N{U+}\0" "syntax error or number too big in (?(VERSION condition\0" /* 80 */ "internal error: unknown opcode in auto_possessify()\0" @@ -185,10 +185,29 @@ static const unsigned char compile_error_texts[] = "(*alpha_assertion) not recognized\0" "script runs require Unicode support, which this version of PCRE2 does not have\0" "too many capturing groups (maximum 65535)\0" - "atomic assertion expected after (?( or (?(?C)\0" + "octal digit missing after \\0 (PCRE2_EXTRA_NO_BS0 is set)\0" "\\K is not allowed in lookarounds (but see PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK)\0" /* 100 */ "branch too long in variable-length lookbehind assertion\0" + "compiled pattern would be longer than the limit set by the application\0" + "octal value given by \\ddd is greater than \\377 (forbidden by PCRE2_EXTRA_PYTHON_OCTAL)\0" + "using callouts is disabled by the application\0" + "PCRE2_EXTRA_TURKISH_CASING require Unicode (UTF or UCP) mode\0" + /* 105 */ + "PCRE2_EXTRA_TURKISH_CASING requires UTF in 8-bit mode\0" + "PCRE2_EXTRA_TURKISH_CASING and PCRE2_EXTRA_CASELESS_RESTRICT are not compatible\0" + "extended character class nesting is too deep\0" + "invalid operator in extended character class\0" + "unexpected operator in extended character class (no preceding operand)\0" + /* 110 */ + "expected operand after operator in extended character class\0" + "square brackets needed to clarify operator precedence in extended character class\0" + "missing terminating ] for extended character class (note '[' must be escaped under PCRE2_ALT_EXTENDED_CLASS)\0" + "unexpected expression in extended character class (no preceding operator)\0" + "empty expression in extended character class\0" + /* 115 */ + "terminating ] with no following closing parenthesis in (?[...]\0" + "unexpected character in (?[...]) extended character class\0" ; /* Match-time and UTF error texts are in the same format. */ @@ -275,6 +294,10 @@ static const unsigned char match_error_texts[] = "internal error - duplicate substitution match\0" "PCRE2_MATCH_INVALID_UTF is not supported for DFA matching\0" "INTERNAL ERROR: invalid substring offset\0" + "feature is not supported by the JIT compiler\0" + "error performing replacement case transformation\0" + /* 70 */ + "replacement too large (longer than PCRE2_SIZE)\0" ; @@ -317,7 +340,7 @@ else if (enumber < 0) /* Match or UTF error */ } else /* Invalid error number */ { - message = (unsigned char *)"\0"; /* Empty message list */ + message = (const unsigned char *)"\0"; /* Empty message list */ n = 1; } diff --git a/libpcre/src/pcre2_extuni.c b/libpcre/src/pcre2_extuni.c index b23946b0d..91d839e29 100644 --- a/libpcre/src/pcre2_extuni.c +++ b/libpcre/src/pcre2_extuni.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2021 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -40,7 +40,7 @@ POSSIBILITY OF SUCH DAMAGE. /* This module contains an internal function that is used to match a Unicode extended grapheme sequence. It is used by both pcre2_match() and -pcre2_def_match(). However, it is called only when Unicode support is being +pcre2_dfa_match(). However, it is called only when Unicode support is being compiled. Nevertheless, we provide a dummy function when there is no Unicode support, because some compilers do not like functionless source files. */ @@ -75,7 +75,11 @@ return NULL; * Match an extended grapheme sequence * *************************************************/ -/* +/* NOTE: The logic contained in this function is replicated in three special- +purpose functions in the pcre2_jit_compile.c module. If the logic below is +changed, they must be kept in step so that the interpreter and the JIT have the +same behaviour. + Arguments: c the first character eptr pointer to next character @@ -92,6 +96,7 @@ PCRE2_SPTR PRIV(extuni)(uint32_t c, PCRE2_SPTR eptr, PCRE2_SPTR start_subject, PCRE2_SPTR end_subject, BOOL utf, int *xcount) { +BOOL was_ep_ZWJ = FALSE; int lgb = UCD_GRAPHBREAK(c); while (eptr < end_subject) @@ -102,6 +107,12 @@ while (eptr < end_subject) rgb = UCD_GRAPHBREAK(c); if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break; + /* ZWJ followed by Extended Pictographic is allowed only if the ZWJ was + preceded by Extended Pictographic. */ + + if (lgb == ucp_gbZWJ && rgb == ucp_gbExtended_Pictographic && !was_ep_ZWJ) + break; + /* Not breaking between Regional Indicators is allowed only if there are an even number of preceding RIs. */ @@ -129,12 +140,15 @@ while (eptr < end_subject) if ((ricount & 1) != 0) break; /* Grapheme break required */ } - /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this - allows any number of them before a following Extended_Pictographic. */ + /* Set a flag when ZWJ follows Extended Pictographic (with optional Extend in + between; see next statement). */ - if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) || - lgb != ucp_gbExtended_Pictographic) - lgb = rgb; + was_ep_ZWJ = (lgb == ucp_gbExtended_Pictographic && rgb == ucp_gbZWJ); + + /* If Extend follows Extended_Pictographic, do not update lgb; this allows + any number of them before a following ZWJ. */ + + if (rgb != ucp_gbExtend || lgb != ucp_gbExtended_Pictographic) lgb = rgb; eptr += len; if (xcount != NULL) *xcount += 1; diff --git a/libpcre/src/pcre2_find_bracket.c b/libpcre/src/pcre2_find_bracket.c index 1290c5e9d..486f4539d 100644 --- a/libpcre/src/pcre2_find_bracket.c +++ b/libpcre/src/pcre2_find_bracket.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2023 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -76,18 +76,19 @@ for (;;) if (c == OP_END) return NULL; /* XCLASS is used for classes that cannot be represented just by a bit map. - This includes negated single high-valued characters. CALLOUT_STR is used for - callouts with string arguments. In both cases the length in the table is + This includes negated single high-valued characters. ECLASS is used for + classes that use set operations internally. CALLOUT_STR is used for + callouts with string arguments. In each case the length in the table is zero; the actual length is stored in the compiled code. */ - if (c == OP_XCLASS) code += GET(code, 1); - else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE); + if (c == OP_XCLASS || c == OP_ECLASS) code += GET(code, 1); + else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE); /* Handle lookbehind */ else if (c == OP_REVERSE || c == OP_VREVERSE) { - if (number < 0) return (PCRE2_UCHAR *)code; + if (number < 0) return code; code += PRIV(OP_lengths)[c]; } @@ -97,7 +98,7 @@ for (;;) c == OP_CBRAPOS || c == OP_SCBRAPOS) { int n = (int)GET2(code, 1+LINK_SIZE); - if (n == number) return (PCRE2_UCHAR *)code; + if (n == number) return code; code += PRIV(OP_lengths)[c]; } diff --git a/libpcre/src/pcre2_internal.h b/libpcre/src/pcre2_internal.h index e5808182e..6e0a5e05d 100644 --- a/libpcre/src/pcre2_internal.h +++ b/libpcre/src/pcre2_internal.h @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2023 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -88,6 +88,12 @@ typedef int BOOL; #define TRUE 1 #endif +/* Helper macro for static (compile-time) assertions. Can be used inside +functions, or at the top-level of a file. */ +#define STATIC_ASSERT_JOIN(a,b) a ## b +#define STATIC_ASSERT(cond, msg) \ + typedef int STATIC_ASSERT_JOIN(static_assertion_,msg)[(cond)?1:-1] + /* Valgrind (memcheck) support */ #ifdef SUPPORT_VALGRIND @@ -523,29 +529,29 @@ start/end of string field names are. */ three must not be changed, because whichever is set is actually the number of bytes in a code unit in that mode. */ -#define PCRE2_MODE8 0x00000001 /* compiled in 8 bit mode */ -#define PCRE2_MODE16 0x00000002 /* compiled in 16 bit mode */ -#define PCRE2_MODE32 0x00000004 /* compiled in 32 bit mode */ -#define PCRE2_FIRSTSET 0x00000010 /* first_code unit is set */ -#define PCRE2_FIRSTCASELESS 0x00000020 /* caseless first code unit */ -#define PCRE2_FIRSTMAPSET 0x00000040 /* bitmap of first code units is set */ -#define PCRE2_LASTSET 0x00000080 /* last code unit is set */ -#define PCRE2_LASTCASELESS 0x00000100 /* caseless last code unit */ -#define PCRE2_STARTLINE 0x00000200 /* start after \n for multiline */ -#define PCRE2_JCHANGED 0x00000400 /* j option used in pattern */ -#define PCRE2_HASCRORLF 0x00000800 /* explicit \r or \n in pattern */ -#define PCRE2_HASTHEN 0x00001000 /* pattern contains (*THEN) */ -#define PCRE2_MATCH_EMPTY 0x00002000 /* pattern can match empty string */ -#define PCRE2_BSR_SET 0x00004000 /* BSR was set in the pattern */ -#define PCRE2_NL_SET 0x00008000 /* newline was set in the pattern */ -#define PCRE2_NOTEMPTY_SET 0x00010000 /* (*NOTEMPTY) used ) keep */ -#define PCRE2_NE_ATST_SET 0x00020000 /* (*NOTEMPTY_ATSTART) used) together */ -#define PCRE2_DEREF_TABLES 0x00040000 /* release character tables */ -#define PCRE2_NOJIT 0x00080000 /* (*NOJIT) used */ -#define PCRE2_HASBKPORX 0x00100000 /* contains \P, \p, or \X */ -#define PCRE2_DUPCAPUSED 0x00200000 /* contains (?| */ -#define PCRE2_HASBKC 0x00400000 /* contains \C */ -#define PCRE2_HASACCEPT 0x00800000 /* contains (*ACCEPT) */ +#define PCRE2_MODE8 0x00000001u /* compiled in 8 bit mode */ +#define PCRE2_MODE16 0x00000002u /* compiled in 16 bit mode */ +#define PCRE2_MODE32 0x00000004u /* compiled in 32 bit mode */ +#define PCRE2_FIRSTSET 0x00000010u /* first_code unit is set */ +#define PCRE2_FIRSTCASELESS 0x00000020u /* caseless first code unit */ +#define PCRE2_FIRSTMAPSET 0x00000040u /* bitmap of first code units is set */ +#define PCRE2_LASTSET 0x00000080u /* last code unit is set */ +#define PCRE2_LASTCASELESS 0x00000100u /* caseless last code unit */ +#define PCRE2_STARTLINE 0x00000200u /* start after \n for multiline */ +#define PCRE2_JCHANGED 0x00000400u /* j option used in pattern */ +#define PCRE2_HASCRORLF 0x00000800u /* explicit \r or \n in pattern */ +#define PCRE2_HASTHEN 0x00001000u /* pattern contains (*THEN) */ +#define PCRE2_MATCH_EMPTY 0x00002000u /* pattern can match empty string */ +#define PCRE2_BSR_SET 0x00004000u /* BSR was set in the pattern */ +#define PCRE2_NL_SET 0x00008000u /* newline was set in the pattern */ +#define PCRE2_NOTEMPTY_SET 0x00010000u /* (*NOTEMPTY) used ) keep */ +#define PCRE2_NE_ATST_SET 0x00020000u /* (*NOTEMPTY_ATSTART) used) together */ +#define PCRE2_DEREF_TABLES 0x00040000u /* release character tables */ +#define PCRE2_NOJIT 0x00080000u /* (*NOJIT) used */ +#define PCRE2_HASBKPORX 0x00100000u /* contains \P, \p, or \X */ +#define PCRE2_DUPCAPUSED 0x00200000u /* contains (?| */ +#define PCRE2_HASBKC 0x00400000u /* contains \C */ +#define PCRE2_HASACCEPT 0x00800000u /* contains (*ACCEPT) */ #define PCRE2_MODE_MASK (PCRE2_MODE8 | PCRE2_MODE16 | PCRE2_MODE32) @@ -574,6 +580,16 @@ modes. */ #define REQ_CU_MAX 2000 #endif +/* The maximum nesting depth for Unicode character class sets. +Currently fixed. Warning: the interpreter relies on this so it can encode +the operand stack in a uint32_t. A nesting limit of 15 implies (15*2+1)=31 +stack operands required, due to the fact that we have two (and only two) +levels of operator precedence. In the UTS#18 syntax, you can write 'x&&y[z]' +and in Perl syntax you can write '(?[ x - y & (z) ])', both of which imply +pushing the match results for x & y to the stack. */ + +#define ECLASS_NEST_LIMIT 15 + /* Offsets for the bitmap tables in the cbits set of tables. Each table contains a set of bits for a class map. Some classes are built by combining these tables. */ @@ -609,6 +625,13 @@ total length of the tables. */ #define ctypes_offset (cbits_offset + cbit_length) /* Character types */ #define TABLES_LENGTH (ctypes_offset + 256) +/* Private flags used in compile_context.optimization_flags */ + +#define PCRE2_OPTIM_AUTO_POSSESS 0x00000001u +#define PCRE2_OPTIM_DOTSTAR_ANCHOR 0x00000002u +#define PCRE2_OPTIM_START_OPTIMIZE 0x00000004u + +#define PCRE2_OPTIMIZATION_ALL 0x00000007u /* -------------------- Character and string names ------------------------ */ @@ -915,6 +938,7 @@ a positive value. */ #define STRING_naplb0 "naplb\0" #define STRING_nla0 "nla\0" #define STRING_nlb0 "nlb\0" +#define STRING_scs0 "scs\0" #define STRING_sr0 "sr\0" #define STRING_asr0 "asr\0" #define STRING_positive_lookahead0 "positive_lookahead\0" @@ -925,6 +949,7 @@ a positive value. */ #define STRING_negative_lookbehind0 "negative_lookbehind\0" #define STRING_script_run0 "script_run\0" #define STRING_atomic_script_run "atomic_script_run" +#define STRING_scan_substring0 "scan_substring\0" #define STRING_alpha0 "alpha\0" #define STRING_lower0 "lower\0" @@ -965,6 +990,8 @@ a positive value. */ #define STRING_NO_START_OPT_RIGHTPAR "NO_START_OPT)" #define STRING_NOTEMPTY_RIGHTPAR "NOTEMPTY)" #define STRING_NOTEMPTY_ATSTART_RIGHTPAR "NOTEMPTY_ATSTART)" +#define STRING_CASELESS_RESTRICT_RIGHTPAR "CASELESS_RESTRICT)" +#define STRING_TURKISH_CASING_RIGHTPAR "TURKISH_CASING)" #define STRING_LIMIT_HEAP_EQ "LIMIT_HEAP=" #define STRING_LIMIT_MATCH_EQ "LIMIT_MATCH=" #define STRING_LIMIT_DEPTH_EQ "LIMIT_DEPTH=" @@ -1216,6 +1243,7 @@ only. */ #define STRING_naplb0 STR_n STR_a STR_p STR_l STR_b "\0" #define STRING_nla0 STR_n STR_l STR_a "\0" #define STRING_nlb0 STR_n STR_l STR_b "\0" +#define STRING_scs0 STR_s STR_c STR_s "\0" #define STRING_sr0 STR_s STR_r "\0" #define STRING_asr0 STR_a STR_s STR_r "\0" #define STRING_positive_lookahead0 STR_p STR_o STR_s STR_i STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_a STR_h STR_e STR_a STR_d "\0" @@ -1226,6 +1254,7 @@ only. */ #define STRING_negative_lookbehind0 STR_n STR_e STR_g STR_a STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_b STR_e STR_h STR_i STR_n STR_d "\0" #define STRING_script_run0 STR_s STR_c STR_r STR_i STR_p STR_t STR_UNDERSCORE STR_r STR_u STR_n "\0" #define STRING_atomic_script_run STR_a STR_t STR_o STR_m STR_i STR_c STR_UNDERSCORE STR_s STR_c STR_r STR_i STR_p STR_t STR_UNDERSCORE STR_r STR_u STR_n +#define STRING_scan_substring0 STR_s STR_c STR_a STR_n STR_UNDERSCORE STR_s STR_u STR_b STR_s STR_t STR_r STR_i STR_n STR_g "\0" #define STRING_alpha0 STR_a STR_l STR_p STR_h STR_a "\0" #define STRING_lower0 STR_l STR_o STR_w STR_e STR_r "\0" @@ -1266,6 +1295,8 @@ only. */ #define STRING_NO_START_OPT_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS #define STRING_NOTEMPTY_RIGHTPAR STR_N STR_O STR_T STR_E STR_M STR_P STR_T STR_Y STR_RIGHT_PARENTHESIS #define STRING_NOTEMPTY_ATSTART_RIGHTPAR STR_N STR_O STR_T STR_E STR_M STR_P STR_T STR_Y STR_UNDERSCORE STR_A STR_T STR_S STR_T STR_A STR_R STR_T STR_RIGHT_PARENTHESIS +#define STRING_CASELESS_RESTRICT_RIGHTPAR STR_C STR_A STR_S STR_E STR_L STR_E STR_S STR_S STR_UNDERSCORE STR_R STR_E STR_S STR_T STR_R STR_I STR_C STR_T STR_RIGHT_PARENTHESIS +#define STRING_TURKISH_CASING_RIGHTPAR STR_T STR_U STR_R STR_K STR_I STR_S STR_H STR_UNDERSCORE STR_C STR_A STR_S STR_I STR_N STR_G STR_RIGHT_PARENTHESIS #define STRING_LIMIT_HEAP_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_H STR_E STR_A STR_P STR_EQUALS_SIGN #define STRING_LIMIT_MATCH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN #define STRING_LIMIT_DEPTH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_D STR_E STR_P STR_T STR_H STR_EQUALS_SIGN @@ -1290,21 +1321,22 @@ only. */ changed, the autopossessifying table in pcre2_auto_possess.c must be updated to match. */ -#define PT_ANY 0 /* Any property - matches all chars */ -#define PT_LAMP 1 /* L& - the union of Lu, Ll, Lt */ -#define PT_GC 2 /* Specified general characteristic (e.g. L) */ -#define PT_PC 3 /* Specified particular characteristic (e.g. Lu) */ -#define PT_SC 4 /* Script only (e.g. Han) */ -#define PT_SCX 5 /* Script extensions (includes SC) */ -#define PT_ALNUM 6 /* Alphanumeric - the union of L and N */ -#define PT_SPACE 7 /* Perl space - general category Z plus 9,10,12,13 */ -#define PT_PXSPACE 8 /* POSIX space - Z plus 9,10,11,12,13 */ -#define PT_WORD 9 /* Word - L, N, Mn, or Pc */ -#define PT_CLIST 10 /* Pseudo-property: match character list */ -#define PT_UCNC 11 /* Universal Character nameable character */ -#define PT_BIDICL 12 /* Specified bidi class */ -#define PT_BOOL 13 /* Boolean property */ -#define PT_TABSIZE 14 /* Size of square table for autopossessify tests */ +#define PT_LAMP 0 /* L& - the union of Lu, Ll, Lt */ +#define PT_GC 1 /* Specified general characteristic (e.g. L) */ +#define PT_PC 2 /* Specified particular characteristic (e.g. Lu) */ +#define PT_SC 3 /* Script only (e.g. Han) */ +#define PT_SCX 4 /* Script extensions (includes SC) */ +#define PT_ALNUM 5 /* Alphanumeric - the union of L and N */ +#define PT_SPACE 6 /* Perl space - general category Z plus 9,10,12,13 */ +#define PT_PXSPACE 7 /* POSIX space - Z plus 9,10,11,12,13 */ +#define PT_WORD 8 /* Word - L, N, Mn, or Pc */ +#define PT_CLIST 9 /* Pseudo-property: match character list */ +#define PT_UCNC 10 /* Universal Character nameable character */ +#define PT_BIDICL 11 /* Specified bidi class */ +#define PT_BOOL 12 /* Boolean property */ +#define PT_ANY 13 /* Must be the last entry! + Any property - matches all chars */ +#define PT_TABSIZE PT_ANY /* Size of square table for autopossessify tests */ /* The following special properties are used only in XCLASS items, when POSIX classes are specified and PCRE2_UCP is set - in other words, for Unicode @@ -1334,6 +1366,94 @@ contain characters with values greater than 255. */ #define XCL_RANGE 2 /* A range (two multibyte chars) follows */ #define XCL_PROP 3 /* Unicode property (2-byte property code follows) */ #define XCL_NOTPROP 4 /* Unicode inverted property (ditto) */ +/* This value represents the beginning of character lists. The value +is 16 bit long, and stored as a high and low byte pair in 8 bit mode. +The lower 12 bit contains information about character lists (see later). */ +#define XCL_LIST (sizeof(PCRE2_UCHAR) == 1 ? 0x10 : 0x1000) + +/* When a character class contains many characters/ranges, +they are stored in character lists. There are four character +lists which contain characters/ranges within a given range. + +The name, character range and item size for each list: +Low16 [0x100 - 0x7fff] 16 bit items +High16 [0x8000 - 0xffff] 16 bit items +Low32 [0x10000 - 0x7fffffff] 32 bit items +High32 [0x80000000 - 0xffffffff] 32 bit items + +The Low32 character list is used only when utf encoding or 32 bit +character width is enabled, and the High32 character is used only +when 32 bit character width is enabled. + +Each character list contain items. The lowest bit represents that +an item is the beginning of a range (bit is cleared), or not (bit +is set). The other bits represent the character shifted left by +one, so its highest bit is discarded. Due to the layout of character +lists, the highest bit of a character is always known: + +Low16 and Low32: the highest bit is always zero +High16 and High32: the highest bit is always one + +The items are ordered in increasing order, so binary search can be +used to find the lower bound of an input character. The lower bound +is the highest item, which value is less or equal than the input +character. If the lower bit of the item is cleard, or the character +stored in the item equals to the input character, the input +character is in the character list. */ + +/* Character list constants. */ +#define XCL_CHAR_LIST_LOW_16_START 0x100 +#define XCL_CHAR_LIST_LOW_16_END 0x7fff +#define XCL_CHAR_LIST_LOW_16_ADD 0x0 + +#define XCL_CHAR_LIST_HIGH_16_START 0x8000 +#define XCL_CHAR_LIST_HIGH_16_END 0xffff +#define XCL_CHAR_LIST_HIGH_16_ADD 0x8000 + +#define XCL_CHAR_LIST_LOW_32_START 0x10000 +#define XCL_CHAR_LIST_LOW_32_END 0x7fffffff +#define XCL_CHAR_LIST_LOW_32_ADD 0x0 + +#define XCL_CHAR_LIST_HIGH_32_START 0x80000000 +#define XCL_CHAR_LIST_HIGH_32_END 0xffffffff +#define XCL_CHAR_LIST_HIGH_32_ADD 0x80000000 + +/* Mask for getting the descriptors of character list ranges. +Each descriptor has XCL_TYPE_BIT_LEN bits, and can be processed +by XCL_BEGIN_WITH_RANGE and XCL_ITEM_COUNT_MASK macros. */ +#define XCL_TYPE_MASK 0xfff +#define XCL_TYPE_BIT_LEN 3 +/* If this bit is set, the first item of the character list is the +end of a range, which started before the starting character of the +character list. */ +#define XCL_BEGIN_WITH_RANGE 0x4 +/* Number of items in the character list: 0, 1, or 2. The value 3 +represents that the item count is stored at the begining of the +character list. The item count has the same width as the items +in the character list (e.g. 16 bit for Low16 and High16 lists). */ +#define XCL_ITEM_COUNT_MASK 0x3 +/* Shift and flag for constructing character list items. The XCL_CHAR_END +is set, when the item is not the beginning of a range. The XCL_CHAR_SHIFT +can be used to encode / decode the character value stored in an item. */ +#define XCL_CHAR_END 0x1 +#define XCL_CHAR_SHIFT 1 + +/* Flag bits for an extended class (OP_ECLASS), which is used for complex +character matches such as [\p{Greek} && \p{Ll}]. */ + +#define ECL_MAP 0x01 /* Flag: a 32-byte map is present */ + +/* Type tags for the items stored in an extended class (OP_ECLASS). These items +follow the OP_ECLASS's flag char and bitmap, and represent a Reverse Polish +Notation list of operands and operators manipulating a stack of bits. */ + +#define ECL_AND 1 /* Pop two from the stack, AND, and push result. */ +#define ECL_OR 2 /* Pop two from the stack, OR, and push result. */ +#define ECL_XOR 3 /* Pop two from the stack, XOR, and push result. */ +#define ECL_NOT 4 /* Pop one from the stack, NOT, and push result. */ +#define ECL_XCLASS 5 /* XCLASS nested within ECLASS; match and push result. */ +#define ECL_ANY 6 /* Temporary, only used during compilation. */ +#define ECL_NONE 7 /* Temporary, only used during compilation. */ /* These are escaped items that aren't just an encoding of a particular data value such as \n. They must have non-zero values, as check_escape() returns 0 @@ -1555,102 +1675,105 @@ enum { character > 255 is encountered. */ OP_XCLASS, /* 112 Extended class for handling > 255 chars within the class. This does both positive and negative. */ - OP_REF, /* 113 Match a back reference, casefully */ - OP_REFI, /* 114 Match a back reference, caselessly */ - OP_DNREF, /* 115 Match a duplicate name backref, casefully */ - OP_DNREFI, /* 116 Match a duplicate name backref, caselessly */ - OP_RECURSE, /* 117 Match a numbered subpattern (possibly recursive) */ - OP_CALLOUT, /* 118 Call out to external function if provided */ - OP_CALLOUT_STR, /* 119 Call out with string argument */ + OP_ECLASS, /* 113 Really-extended class, for handling logical + expressions computed over characters. */ + OP_REF, /* 114 Match a back reference, casefully */ + OP_REFI, /* 115 Match a back reference, caselessly */ + OP_DNREF, /* 116 Match a duplicate name backref, casefully */ + OP_DNREFI, /* 117 Match a duplicate name backref, caselessly */ + OP_RECURSE, /* 118 Match a numbered subpattern (possibly recursive) */ + OP_CALLOUT, /* 119 Call out to external function if provided */ + OP_CALLOUT_STR, /* 120 Call out with string argument */ - OP_ALT, /* 120 Start of alternation */ - OP_KET, /* 121 End of group that doesn't have an unbounded repeat */ - OP_KETRMAX, /* 122 These two must remain together and in this */ - OP_KETRMIN, /* 123 order. They are for groups the repeat for ever. */ - OP_KETRPOS, /* 124 Possessive unlimited repeat. */ + OP_ALT, /* 121 Start of alternation */ + OP_KET, /* 122 End of group that doesn't have an unbounded repeat */ + OP_KETRMAX, /* 123 These two must remain together and in this */ + OP_KETRMIN, /* 124 order. They are for groups the repeat for ever. */ + OP_KETRPOS, /* 125 Possessive unlimited repeat. */ /* The assertions must come before BRA, CBRA, ONCE, and COND. */ - OP_REVERSE, /* 125 Move pointer back - used in lookbehind assertions */ - OP_VREVERSE, /* 126 Move pointer back - variable */ - OP_ASSERT, /* 127 Positive lookahead */ - OP_ASSERT_NOT, /* 128 Negative lookahead */ - OP_ASSERTBACK, /* 129 Positive lookbehind */ - OP_ASSERTBACK_NOT, /* 130 Negative lookbehind */ - OP_ASSERT_NA, /* 131 Positive non-atomic lookahead */ - OP_ASSERTBACK_NA, /* 132 Positive non-atomic lookbehind */ + OP_REVERSE, /* 126 Move pointer back - used in lookbehind assertions */ + OP_VREVERSE, /* 127 Move pointer back - variable */ + OP_ASSERT, /* 128 Positive lookahead */ + OP_ASSERT_NOT, /* 129 Negative lookahead */ + OP_ASSERTBACK, /* 130 Positive lookbehind */ + OP_ASSERTBACK_NOT, /* 131 Negative lookbehind */ + OP_ASSERT_NA, /* 132 Positive non-atomic lookahead */ + OP_ASSERTBACK_NA, /* 133 Positive non-atomic lookbehind */ + OP_ASSERT_SCS, /* 134 Scan substring */ /* ONCE, SCRIPT_RUN, BRA, BRAPOS, CBRA, CBRAPOS, and COND must come immediately after the assertions, with ONCE first, as there's a test for >= ONCE for a subpattern that isn't an assertion. The POS versions must immediately follow the non-POS versions in each case. */ - OP_ONCE, /* 133 Atomic group, contains captures */ - OP_SCRIPT_RUN, /* 134 Non-capture, but check characters' scripts */ - OP_BRA, /* 135 Start of non-capturing bracket */ - OP_BRAPOS, /* 136 Ditto, with unlimited, possessive repeat */ - OP_CBRA, /* 137 Start of capturing bracket */ - OP_CBRAPOS, /* 138 Ditto, with unlimited, possessive repeat */ - OP_COND, /* 139 Conditional group */ + OP_ONCE, /* 135 Atomic group, contains captures */ + OP_SCRIPT_RUN, /* 136 Non-capture, but check characters' scripts */ + OP_BRA, /* 137 Start of non-capturing bracket */ + OP_BRAPOS, /* 138 Ditto, with unlimited, possessive repeat */ + OP_CBRA, /* 139 Start of capturing bracket */ + OP_CBRAPOS, /* 140 Ditto, with unlimited, possessive repeat */ + OP_COND, /* 141 Conditional group */ /* These five must follow the previous five, in the same order. There's a check for >= SBRA to distinguish the two sets. */ - OP_SBRA, /* 140 Start of non-capturing bracket, check empty */ - OP_SBRAPOS, /* 141 Ditto, with unlimited, possessive repeat */ - OP_SCBRA, /* 142 Start of capturing bracket, check empty */ - OP_SCBRAPOS, /* 143 Ditto, with unlimited, possessive repeat */ - OP_SCOND, /* 144 Conditional group, check empty */ + OP_SBRA, /* 142 Start of non-capturing bracket, check empty */ + OP_SBRAPOS, /* 143 Ditto, with unlimited, possessive repeat */ + OP_SCBRA, /* 144 Start of capturing bracket, check empty */ + OP_SCBRAPOS, /* 145 Ditto, with unlimited, possessive repeat */ + OP_SCOND, /* 146 Conditional group, check empty */ /* The next two pairs must (respectively) be kept together. */ - OP_CREF, /* 145 Used to hold a capture number as condition */ - OP_DNCREF, /* 146 Used to point to duplicate names as a condition */ - OP_RREF, /* 147 Used to hold a recursion number as condition */ - OP_DNRREF, /* 148 Used to point to duplicate names as a condition */ - OP_FALSE, /* 149 Always false (used by DEFINE and VERSION) */ - OP_TRUE, /* 150 Always true (used by VERSION) */ + OP_CREF, /* 147 Used to hold a capture number as condition */ + OP_DNCREF, /* 148 Used to point to duplicate names as a condition */ + OP_RREF, /* 149 Used to hold a recursion number as condition */ + OP_DNRREF, /* 150 Used to point to duplicate names as a condition */ + OP_FALSE, /* 151 Always false (used by DEFINE and VERSION) */ + OP_TRUE, /* 152 Always true (used by VERSION) */ - OP_BRAZERO, /* 151 These two must remain together and in this */ - OP_BRAMINZERO, /* 152 order. */ - OP_BRAPOSZERO, /* 153 */ + OP_BRAZERO, /* 153 These two must remain together and in this */ + OP_BRAMINZERO, /* 154 order. */ + OP_BRAPOSZERO, /* 155 */ /* These are backtracking control verbs */ - OP_MARK, /* 154 always has an argument */ - OP_PRUNE, /* 155 */ - OP_PRUNE_ARG, /* 156 same, but with argument */ - OP_SKIP, /* 157 */ - OP_SKIP_ARG, /* 158 same, but with argument */ - OP_THEN, /* 159 */ - OP_THEN_ARG, /* 160 same, but with argument */ - OP_COMMIT, /* 161 */ - OP_COMMIT_ARG, /* 162 same, but with argument */ + OP_MARK, /* 156 always has an argument */ + OP_PRUNE, /* 157 */ + OP_PRUNE_ARG, /* 158 same, but with argument */ + OP_SKIP, /* 159 */ + OP_SKIP_ARG, /* 160 same, but with argument */ + OP_THEN, /* 161 */ + OP_THEN_ARG, /* 162 same, but with argument */ + OP_COMMIT, /* 163 */ + OP_COMMIT_ARG, /* 164 same, but with argument */ /* These are forced failure and success verbs. FAIL and ACCEPT do accept an argument, but these cases can be compiled as, for example, (*MARK:X)(*FAIL) without the need for a special opcode. */ - OP_FAIL, /* 163 */ - OP_ACCEPT, /* 164 */ - OP_ASSERT_ACCEPT, /* 165 Used inside assertions */ - OP_CLOSE, /* 166 Used before OP_ACCEPT to close open captures */ + OP_FAIL, /* 165 */ + OP_ACCEPT, /* 166 */ + OP_ASSERT_ACCEPT, /* 167 Used inside assertions */ + OP_CLOSE, /* 168 Used before OP_ACCEPT to close open captures */ /* This is used to skip a subpattern with a {0} quantifier */ - OP_SKIPZERO, /* 167 */ + OP_SKIPZERO, /* 169 */ /* This is used to identify a DEFINE group during compilation so that it can be checked for having only one branch. It is changed to OP_FALSE before compilation finishes. */ - OP_DEFINE, /* 168 */ + OP_DEFINE, /* 170 */ /* These opcodes replace their normal counterparts in UCP mode when PCRE2_EXTRA_ASCII_BSW is not set. */ - OP_NOT_UCP_WORD_BOUNDARY, /* 169 */ - OP_UCP_WORD_BOUNDARY, /* 170 */ + OP_NOT_UCP_WORD_BOUNDARY, /* 171 */ + OP_UCP_WORD_BOUNDARY, /* 172 */ /* This is not an opcode, but is used to check that tables indexed by opcode are the correct length, in order to catch updating errors - there have been @@ -1693,19 +1816,21 @@ some cases doesn't actually use these names at all). */ "*+","++", "?+", "{", \ "*", "*?", "+", "+?", "?", "??", "{", "{", \ "*+","++", "?+", "{", \ - "class", "nclass", "xclass", "Ref", "Refi", "DnRef", "DnRefi", \ + "class", "nclass", "xclass", "eclass", \ + "Ref", "Refi", "DnRef", "DnRefi", \ "Recurse", "Callout", "CalloutStr", \ "Alt", "Ket", "KetRmax", "KetRmin", "KetRpos", \ "Reverse", "VReverse", "Assert", "Assert not", \ "Assert back", "Assert back not", \ "Non-atomic assert", "Non-atomic assert back", \ + "Scan substring", \ "Once", \ "Script run", \ "Bra", "BraPos", "CBra", "CBraPos", \ "Cond", \ "SBra", "SBraPos", "SCBra", "SCBraPos", \ "SCond", \ - "Cond ref", "Cond dnref", "Cond rec", "Cond dnrec", \ + "Capture ref", "Capture dnref", "Cond rec", "Cond dnrec", \ "Cond false", "Cond true", \ "Brazero", "Braminzero", "Braposzero", \ "*MARK", "*PRUNE", "*PRUNE", "*SKIP", "*SKIP", \ @@ -1766,10 +1891,11 @@ in UTF-8 mode. The code that uses this table must know about such things. */ 1+(32/sizeof(PCRE2_UCHAR)), /* CLASS */ \ 1+(32/sizeof(PCRE2_UCHAR)), /* NCLASS */ \ 0, /* XCLASS - variable length */ \ + 0, /* ECLASS - variable length */ \ 1+IMM2_SIZE, /* REF */ \ - 1+IMM2_SIZE, /* REFI */ \ + 1+IMM2_SIZE+1, /* REFI */ \ 1+2*IMM2_SIZE, /* DNREF */ \ - 1+2*IMM2_SIZE, /* DNREFI */ \ + 1+2*IMM2_SIZE+1, /* DNREFI */ \ 1+LINK_SIZE, /* RECURSE */ \ 1+2*LINK_SIZE+1, /* CALLOUT */ \ 0, /* CALLOUT_STR - variable length */ \ @@ -1786,6 +1912,7 @@ in UTF-8 mode. The code that uses this table must know about such things. */ 1+LINK_SIZE, /* Assert behind not */ \ 1+LINK_SIZE, /* NA Assert */ \ 1+LINK_SIZE, /* NA Assert behind */ \ + 1+LINK_SIZE, /* Scan substring */ \ 1+LINK_SIZE, /* ONCE */ \ 1+LINK_SIZE, /* SCRIPT_RUN */ \ 1+LINK_SIZE, /* BRA */ \ @@ -1815,6 +1942,11 @@ in UTF-8 mode. The code that uses this table must know about such things. */ #define RREF_ANY 0xffff +/* Constants used by OP_REFI and OP_DNREFI to control matching behaviour. */ + +#define REFI_FLAG_CASELESS_RESTRICT 0x1 +#define REFI_FLAG_TURKISH_CASING 0x2 + /* ---------- Private structures that are mode-independent. ---------- */ @@ -1890,6 +2022,14 @@ typedef struct { #define UCD_SCRIPTX(ch) UCD_SCRIPTX_PROP(GET_UCD(ch)) #define UCD_BPROPS(ch) UCD_BPROPS_PROP(GET_UCD(ch)) #define UCD_BIDICLASS(ch) UCD_BIDICLASS_PROP(GET_UCD(ch)) +#define UCD_ANY_I(ch) \ + /* match any of the four characters 'i', 'I', U+0130, U+0131 */ \ + (((uint32_t)(ch) | 0x20u) == 0x69u || ((uint32_t)(ch) | 1u) == 0x0131u) +#define UCD_DOTTED_I(ch) \ + ((uint32_t)(ch) == 0x69u || (uint32_t)(ch) == 0x0130u) +#define UCD_FOLD_I_TURKISH(ch) \ + ((uint32_t)(ch) == 0x0130u ? 0x69u : \ + (uint32_t)(ch) == 0x49u ? 0x0131u : (uint32_t)(ch)) /* The "scriptx" and bprops fields contain offsets into vectors of 32-bit words that form a bitmap representing a list of scripts or boolean properties. These @@ -1955,6 +2095,9 @@ extern const uint8_t PRIV(utf8_table4)[]; #define _pcre2_vspace_list PCRE2_SUFFIX(_pcre2_vspace_list_) #define _pcre2_ucd_boolprop_sets PCRE2_SUFFIX(_pcre2_ucd_boolprop_sets_) #define _pcre2_ucd_caseless_sets PCRE2_SUFFIX(_pcre2_ucd_caseless_sets_) +#define _pcre2_ucd_turkish_dotted_i_caseset PCRE2_SUFFIX(_pcre2_ucd_turkish_dotted_i_caseset_) +#define _pcre2_ucd_nocase_ranges PCRE2_SUFFIX(_pcre2_ucd_nocase_ranges_) +#define _pcre2_ucd_nocase_ranges_size PCRE2_SUFFIX(_pcre2_ucd_nocase_ranges_size_) #define _pcre2_ucd_digit_sets PCRE2_SUFFIX(_pcre2_ucd_digit_sets_) #define _pcre2_ucd_script_sets PCRE2_SUFFIX(_pcre2_ucd_script_sets_) #define _pcre2_ucd_records PCRE2_SUFFIX(_pcre2_ucd_records_) @@ -1971,14 +2114,17 @@ extern const uint8_t PRIV(utf8_table4)[]; extern const uint8_t PRIV(OP_lengths)[]; extern const uint32_t PRIV(callout_end_delims)[]; extern const uint32_t PRIV(callout_start_delims)[]; -extern const pcre2_compile_context PRIV(default_compile_context); -extern const pcre2_convert_context PRIV(default_convert_context); -extern const pcre2_match_context PRIV(default_match_context); +extern pcre2_compile_context PRIV(default_compile_context); +extern pcre2_convert_context PRIV(default_convert_context); +extern pcre2_match_context PRIV(default_match_context); extern const uint8_t PRIV(default_tables)[]; extern const uint32_t PRIV(hspace_list)[]; extern const uint32_t PRIV(vspace_list)[]; extern const uint32_t PRIV(ucd_boolprop_sets)[]; extern const uint32_t PRIV(ucd_caseless_sets)[]; +extern const uint32_t PRIV(ucd_turkish_dotted_i_caseset); +extern const uint32_t PRIV(ucd_nocase_ranges)[]; +extern const uint32_t PRIV(ucd_nocase_ranges_size); extern const uint32_t PRIV(ucd_digit_sets)[]; extern const uint32_t PRIV(ucd_script_sets)[]; extern const ucd_record PRIV(ucd_records)[]; @@ -2039,11 +2185,12 @@ is available. */ #define _pcre2_valid_utf PCRE2_SUFFIX(_pcre2_valid_utf_) #define _pcre2_was_newline PCRE2_SUFFIX(_pcre2_was_newline_) #define _pcre2_xclass PCRE2_SUFFIX(_pcre2_xclass_) +#define _pcre2_eclass PCRE2_SUFFIX(_pcre2_eclass_) extern int _pcre2_auto_possessify(PCRE2_UCHAR *, const compile_block *); extern int _pcre2_check_escape(PCRE2_SPTR *, PCRE2_SPTR, uint32_t *, - int *, uint32_t, uint32_t, BOOL, compile_block *); + int *, uint32_t, uint32_t, uint32_t, BOOL, compile_block *); extern PCRE2_SPTR _pcre2_extuni(uint32_t, PCRE2_SPTR, PCRE2_SPTR, PCRE2_SPTR, BOOL, int *); extern PCRE2_SPTR _pcre2_find_bracket(PCRE2_SPTR, BOOL, int); @@ -2066,7 +2213,9 @@ extern int _pcre2_study(pcre2_real_code *); extern int _pcre2_valid_utf(PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE *); extern BOOL _pcre2_was_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR, uint32_t *, BOOL); -extern BOOL _pcre2_xclass(uint32_t, PCRE2_SPTR, BOOL); +extern BOOL _pcre2_xclass(uint32_t, PCRE2_SPTR, const uint8_t *, BOOL); +extern BOOL _pcre2_eclass(uint32_t, PCRE2_SPTR, PCRE2_SPTR, + const uint8_t *, BOOL); /* This function is needed only when memmove() is not available. */ @@ -2079,6 +2228,8 @@ extern void * _pcre2_memmove(void *, const void *, size_t); extern BOOL PRIV(ckd_smul)(PCRE2_SIZE *, int, int); +#include "pcre2_util.h" + #endif /* PCRE2_INTERNAL_H_IDEMPOTENT_GUARD */ /* End of pcre2_internal.h */ diff --git a/libpcre/src/pcre2_intmodedep.h b/libpcre/src/pcre2_intmodedep.h index 5fcddce5f..6b858139f 100644 --- a/libpcre/src/pcre2_intmodedep.h +++ b/libpcre/src/pcre2_intmodedep.h @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2023 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -47,7 +47,7 @@ to have access to the hidden structures at all supported widths. Some of the mode-dependent macros are required at different widths for different parts of the pcre2test code (in particular, the included -pcre_printint.c file). We undefine them here so that they can be re-defined for +pcre2_printint.c file). We undefine them here so that they can be re-defined for multiple inclusions. Not all of these are used in pcre2test, but it's easier just to undefine them all. */ @@ -435,7 +435,7 @@ UTF-16 mode. */ c = *eptr; \ if ((c & 0xfc00u) == 0xd800u) GETUTF16LEN(c, eptr, len); -/* Get the next UTF-816character, testing for UTF-16 mode, not advancing the +/* Get the next UTF-16 character, testing for UTF-16 mode, not advancing the pointer, incrementing length if there is a low surrogate. This is called when we do not know if we are in UTF-16 mode. */ @@ -556,6 +556,11 @@ code that uses them is simpler because it assumes this. */ /* The real general context structure. At present it holds only data for custom memory control. */ +/* WARNING: if this is ever changed, code in pcre2_substitute.c will have to be +changed because it builds a general context "by hand" in order to avoid the +malloc() call in pcre2_general_context)_create(). There is also code in +pcre2_match.c that makes the same assumption. */ + typedef struct pcre2_real_general_context { pcre2_memctl memctl; } pcre2_real_general_context; @@ -568,11 +573,13 @@ typedef struct pcre2_real_compile_context { void *stack_guard_data; const uint8_t *tables; PCRE2_SIZE max_pattern_length; + PCRE2_SIZE max_pattern_compiled_length; uint16_t bsr_convention; uint16_t newline_convention; uint32_t parens_nest_limit; uint32_t extra_options; uint32_t max_varlookbehind; + uint32_t optimization_flags; } pcre2_real_compile_context; /* The real match context structure. */ @@ -583,10 +590,13 @@ typedef struct pcre2_real_match_context { pcre2_jit_callback jit_callback; void *jit_callback_data; #endif - int (*callout)(pcre2_callout_block *, void *); - void *callout_data; - int (*substitute_callout)(pcre2_substitute_callout_block *, void *); - void *substitute_callout_data; + int (*callout)(pcre2_callout_block *, void *); + void *callout_data; + int (*substitute_callout)(pcre2_substitute_callout_block *, void *); + void *substitute_callout_data; + PCRE2_SIZE (*substitute_case_callout)(PCRE2_SPTR, PCRE2_SIZE, PCRE2_UCHAR *, + PCRE2_SIZE, int, void *); + void *substitute_case_callout_data; PCRE2_SIZE offset_limit; uint32_t heap_limit; uint32_t match_limit; @@ -622,6 +632,7 @@ typedef struct pcre2_real_code { void *executable_jit; /* Pointer to JIT code */ uint8_t start_bitmap[32]; /* Bitmap for starting code unit < 256 */ CODE_BLOCKSIZE_TYPE blocksize; /* Total (bytes) that was malloc-ed */ + CODE_BLOCKSIZE_TYPE code_start; /* Byte code start offset */ uint32_t magic_number; /* Paranoid and endianness check */ uint32_t compile_options; /* Options passed to pcre2_compile() */ uint32_t overall_options; /* Options after processing the pattern */ @@ -640,6 +651,7 @@ typedef struct pcre2_real_code { uint16_t top_backref; /* Highest numbered back reference */ uint16_t name_entry_size; /* Size (code units) of table entries */ uint16_t name_count; /* Number of name entries in the table */ + uint32_t optimization_flags; /* Optimizations enabled at compile time */ } pcre2_real_code; /* The real match data structure. Define ovector as large as it can ever @@ -715,6 +727,23 @@ typedef struct named_group { uint16_t isdup; /* TRUE if a duplicate */ } named_group; +/* Structure for caching sorted ranges. This improves the performance +of translating META code to byte code. */ + +typedef struct class_ranges { + struct class_ranges *next; /* Next class ranges */ + size_t char_lists_size; /* Total size of encoded char lists */ + size_t char_lists_start; /* Start offset of encoded char lists */ + uint16_t range_list_size; /* Size of ranges array */ + uint16_t char_lists_types; /* The XCL_LIST header of char lists */ + /* Followed by the list of ranges (start/end pairs) */ +} class_ranges; + +typedef union class_bits_storage { + uint8_t classbits[32]; + uint32_t classwords[8]; +} class_bits_storage; + /* Structure for passing "static" information around between the functions doing the compiling, so that they are thread-safe. */ @@ -724,14 +753,15 @@ typedef struct compile_block { const uint8_t *fcc; /* Points to case-flipping table */ const uint8_t *cbits; /* Points to character type table */ const uint8_t *ctypes; /* Points to table of type maps */ - PCRE2_SPTR start_workspace; /* The start of working space */ - PCRE2_SPTR start_code; /* The start of the compiled code */ + PCRE2_UCHAR *start_workspace; /* The start of working space */ + PCRE2_UCHAR *start_code; /* The start of the compiled code */ PCRE2_SPTR start_pattern; /* The start of the pattern */ PCRE2_SPTR end_pattern; /* The end of the pattern */ PCRE2_UCHAR *name_table; /* The name/number table */ PCRE2_SIZE workspace_size; /* Size of workspace */ PCRE2_SIZE small_ref_offset[10]; /* Offsets for \1 to \9 */ PCRE2_SIZE erroroffset; /* Offset of error in pattern */ + class_bits_storage classbits; /* Temporary store for classbits */ uint16_t names_found; /* Number of entries so far */ uint16_t name_entry_size; /* Size of each entry */ uint16_t parens_depth; /* Depth of nested parentheses */ @@ -749,9 +779,9 @@ typedef struct compile_block { uint32_t backref_map; /* Bitmap of low back refs */ uint32_t nltype; /* Newline type */ uint32_t nllen; /* Newline string length */ - uint32_t class_range_start; /* Overall class range start */ - uint32_t class_range_end; /* Overall class range end */ PCRE2_UCHAR nl[4]; /* Newline string when fixed length */ + uint8_t class_op_used[ECLASS_NEST_LIMIT]; /* Operation used for + extended classes */ uint32_t req_varyopt; /* "After variable item" flag for reqbyte */ uint32_t max_varlookbehind; /* Limit for variable lookbehinds */ int max_lookbehind; /* Maximum lookbehind encountered (characters) */ @@ -759,6 +789,11 @@ typedef struct compile_block { BOOL had_pruneorskip; /* (*PRUNE) or (*SKIP) encountered */ BOOL had_recurse; /* Had a pattern recursion or subroutine call */ BOOL dupnames; /* Duplicate names exist */ +#ifdef SUPPORT_WIDE_CHARS + class_ranges *cranges; /* First class range. */ + class_ranges *next_cranges; /* Next class range. */ + size_t char_lists_size; /* Current size of character lists */ +#endif } compile_block; /* Structure for keeping the properties of the in-memory stack used @@ -792,7 +827,7 @@ typedef struct heapframe { to RRMATCH(), but which do not need to be copied to new frames. */ PCRE2_SPTR ecode; /* The current position in the pattern */ - PCRE2_SPTR temp_sptr[2]; /* Used for short-term PCRE_SPTR values */ + PCRE2_SPTR temp_sptr[2]; /* Used for short-term PCRE2_SPTR values */ PCRE2_SIZE length; /* Used for character, string, or code lengths */ PCRE2_SIZE back_frame; /* Amount to subtract on RRETURN */ PCRE2_SIZE temp_size; /* Used for short-term PCRE2_SIZE values */ @@ -840,11 +875,10 @@ typedef struct heapframe { PCRE2_SIZE ovector[131072]; /* Must be last in the structure */ } heapframe; -/* This typedef is a check that the size of the heapframe structure is a -multiple of PCRE2_SIZE. See various comments above. */ +/* Assert that the size of the heapframe structure is a multiple of PCRE2_SIZE. +See various comments above. */ -typedef char check_heapframe_size[ - ((sizeof(heapframe) % sizeof(PCRE2_SIZE)) == 0)? (+1):(-1)]; +STATIC_ASSERT((sizeof(heapframe) % sizeof(PCRE2_SIZE)) == 0, heapframe_size); /* Structure for computing the alignment of heapframe. */ diff --git a/libpcre/src/pcre2_jit_compile.c b/libpcre/src/pcre2_jit_compile.c index bf8502919..2d3da6022 100644 --- a/libpcre/src/pcre2_jit_compile.c +++ b/libpcre/src/pcre2_jit_compile.c @@ -8,7 +8,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel This module by Zoltan Herczeg Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2021 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -82,7 +82,7 @@ pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data); allocator->free(ptr, allocator->memory_data); } -#include "sljit/sljitLir.c" +#include "../deps/sljit/sljit_src/sljitLir.c" #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED #error Unsupported architecture @@ -282,14 +282,14 @@ typedef struct bracket_backtrack { struct sljit_label *zero_matchingpath; /* Contains the branches of a failed condition. */ union { - /* Both for OP_COND, OP_SCOND. */ - jump_list *condfailed; + /* Both for OP_COND, OP_SCOND, OP_ASSERT_SCS. */ + jump_list *no_capture; assert_backtrack *assert; /* For OP_ONCE. Less than 0 if not needed. */ int framesize; - /* For brackets with >3 alternatives. */ - struct sljit_put_label *matching_put_label; } u; + /* For brackets with >3 alternatives. */ + struct sljit_jump *matching_mov_addr; /* Points to our private memory word on the stack. */ int private_data_ptr; } bracket_backtrack; @@ -313,14 +313,12 @@ typedef struct char_iterator_backtrack { backtrack_common common; /* Next iteration. */ struct sljit_label *matchingpath; - union { - jump_list *backtracks; - struct { - unsigned int othercasebit; - PCRE2_UCHAR chr; - BOOL enabled; - } charpos; - } u; + /* Creating a range based on the next character. */ + struct { + unsigned int othercasebit; + PCRE2_UCHAR chr; + BOOL charpos_enabled; + } charpos; } char_iterator_backtrack; typedef struct ref_iterator_backtrack { @@ -408,6 +406,10 @@ typedef struct compiler_common { then_trap_backtrack *then_trap; /* Starting offset of private data for capturing brackets. */ sljit_s32 cbra_ptr; +#if defined SLJIT_DEBUG && SLJIT_DEBUG + /* End offset of locals for assertions. */ + sljit_s32 locals_size; +#endif /* Output vector starting point. Must be divisible by 2. */ sljit_s32 ovector_start; /* Points to the starting character of the current match. */ @@ -429,6 +431,11 @@ typedef struct compiler_common { Each item must have a previous offset and type (see control_types) values. See do_search_mark. */ sljit_s32 control_head_ptr; + /* The offset of the saved STR_END in the outermost + scan substring block. Since scan substring restores + STR_END after a match, it is enough to restore + STR_END inside a scan substring block. */ + sljit_s32 restore_end_ptr; /* Points to the last matched capture block index. */ sljit_s32 capture_last_ptr; /* Fast forward skipping byte code pointer. */ @@ -513,7 +520,6 @@ typedef struct compiler_common { BOOL invalid_utf; BOOL ucp; /* Points to saving area for iref. */ - sljit_s32 iref_ptr; jump_list *getucd; jump_list *getucdtype; #if PCRE2_CODE_UNIT_WIDTH == 8 @@ -603,14 +609,14 @@ typedef struct compare_context { #endif /* Local space layout. */ -/* These two locals can be used by the current opcode. */ -#define LOCALS0 (0 * sizeof(sljit_sw)) -#define LOCALS1 (1 * sizeof(sljit_sw)) -/* Two local variables for possessive quantifiers (char1 cannot use them). */ -#define POSSESSIVE0 (2 * sizeof(sljit_sw)) -#define POSSESSIVE1 (3 * sizeof(sljit_sw)) /* Max limit of recursions. */ -#define LIMIT_MATCH (4 * sizeof(sljit_sw)) +#define LIMIT_MATCH (0 * sizeof(sljit_sw)) +/* Local variables. Their number is computed by check_opcode_types. */ +#define LOCAL0 (1 * sizeof(sljit_sw)) +#define LOCAL1 (2 * sizeof(sljit_sw)) +#define LOCAL2 (3 * sizeof(sljit_sw)) +#define LOCAL3 (4 * sizeof(sljit_sw)) +#define LOCAL4 (5 * sizeof(sljit_sw)) /* The output vector is stored on the stack, and contains pointers to characters. The vector data is divided into two groups: the first group contains the start / end character pointers, and the second is @@ -667,7 +673,7 @@ the start pointers when the end of the capturing group has not yet reached. */ #define GET_LOCAL_BASE(dst, dstw, offset) \ sljit_get_local_base(compiler, (dst), (dstw), (offset)) -#define READ_CHAR_MAX 0x7fffffff +#define READ_CHAR_MAX ((sljit_u32)0xffffffff) #define INVALID_UTF_CHAR -1 #define UNASSIGNED_UTF_CHAR 888 @@ -862,7 +868,7 @@ the start pointers when the end of the capturing group has not yet reached. */ static PCRE2_SPTR bracketend(PCRE2_SPTR cc) { -SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND)); +SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERT_SCS) || (*cc >= OP_ONCE && *cc <= OP_SCOND)); do cc += GET(cc, 1); while (*cc == OP_ALT); SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS); cc += 1 + LINK_SIZE; @@ -872,7 +878,7 @@ return cc; static int no_alternatives(PCRE2_SPTR cc) { int count = 0; -SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND)); +SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERT_SCS) || (*cc >= OP_ONCE && *cc <= OP_SCOND)); do { cc += GET(cc, 1); @@ -975,6 +981,7 @@ switch(*cc) case OP_ASSERTBACK_NOT: case OP_ASSERT_NA: case OP_ASSERTBACK_NA: + case OP_ASSERT_SCS: case OP_ONCE: case OP_SCRIPT_RUN: case OP_BRA: @@ -1097,7 +1104,9 @@ switch(*cc) return cc + GET(cc, 1 + 2*LINK_SIZE); #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 + case OP_ECLASS: case OP_XCLASS: + SLJIT_COMPILE_ASSERT(OP_XCLASS + 1 == OP_ECLASS && OP_CLASS + 1 == OP_NCLASS && OP_NCLASS < OP_XCLASS, class_byte_code_order); return cc + GET(cc, 1); #endif @@ -1114,12 +1123,36 @@ switch(*cc) } } +static sljit_s32 ref_update_local_size(compiler_common *common, PCRE2_SPTR cc, sljit_s32 current_locals_size) +{ +/* Depends on do_casefulcmp(), do_caselesscmp(), and compile_ref_matchingpath() */ +int locals_size = 2 * SSIZE_OF(sw); +SLJIT_UNUSED_ARG(common); + +#ifdef SUPPORT_UNICODE +if ((*cc == OP_REFI || *cc == OP_DNREFI) && (common->utf || common->ucp)) + locals_size = 3 * SSIZE_OF(sw); +#endif + +cc += PRIV(OP_lengths)[*cc]; +/* Although do_casefulcmp() uses only one local, the allocate_stack() +calls during the repeat destroys LOCAL1 variables. */ +if (*cc >= OP_CRSTAR && *cc <= OP_CRPOSRANGE) + locals_size += 2 * SSIZE_OF(sw); + +return (current_locals_size >= locals_size) ? current_locals_size : locals_size; +} + static BOOL check_opcode_types(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend) { int count; PCRE2_SPTR slot; PCRE2_SPTR assert_back_end = cc - 1; PCRE2_SPTR assert_na_end = cc - 1; +sljit_s32 locals_size = 2 * SSIZE_OF(sw); +BOOL set_recursive_head = FALSE; +BOOL set_capture_last = FALSE; +BOOL set_mark = FALSE; /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */ while (cc < ccend) @@ -1132,22 +1165,41 @@ while (cc < ccend) cc += 1; break; + case OP_TYPEUPTO: + case OP_TYPEEXACT: + if (cc[1 + IMM2_SIZE] == OP_EXTUNI && locals_size <= 3 * SSIZE_OF(sw)) + locals_size = 3 * SSIZE_OF(sw); + cc += (2 + IMM2_SIZE) - 1; + break; + + case OP_TYPEPOSSTAR: + case OP_TYPEPOSPLUS: + case OP_TYPEPOSQUERY: + if (cc[1] == OP_EXTUNI && locals_size <= 3 * SSIZE_OF(sw)) + locals_size = 3 * SSIZE_OF(sw); + cc += 2 - 1; + break; + + case OP_TYPEPOSUPTO: +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 + if (common->utf && locals_size <= 3 * SSIZE_OF(sw)) + locals_size = 3 * SSIZE_OF(sw); +#endif + if (cc[1 + IMM2_SIZE] == OP_EXTUNI && locals_size <= 3 * SSIZE_OF(sw)) + locals_size = 3 * SSIZE_OF(sw); + cc += (2 + IMM2_SIZE) - 1; + break; + case OP_REFI: -#ifdef SUPPORT_UNICODE - if (common->iref_ptr == 0) - { - common->iref_ptr = common->ovector_start; - common->ovector_start += 3 * sizeof(sljit_sw); - } -#endif /* SUPPORT_UNICODE */ - /* Fall through. */ case OP_REF: + locals_size = ref_update_local_size(common, cc, locals_size); common->optimized_cbracket[GET2(cc, 1)] = 0; - cc += 1 + IMM2_SIZE; + cc += PRIV(OP_lengths)[*cc]; break; case OP_ASSERT_NA: case OP_ASSERTBACK_NA: + case OP_ASSERT_SCS: slot = bracketend(cc); if (slot > assert_na_end) assert_na_end = slot; @@ -1174,8 +1226,10 @@ while (cc < ccend) cc += 1 + IMM2_SIZE; break; - case OP_DNREF: case OP_DNREFI: + case OP_DNREF: + locals_size = ref_update_local_size(common, cc, locals_size); + /* Fall through */ case OP_DNCREF: count = GET2(cc, 1 + IMM2_SIZE); slot = common->name_table + GET2(cc, 1) * common->name_entry_size; @@ -1184,26 +1238,18 @@ while (cc < ccend) common->optimized_cbracket[GET2(slot, 0)] = 0; slot += common->name_entry_size; } - cc += 1 + 2 * IMM2_SIZE; + cc += PRIV(OP_lengths)[*cc]; break; case OP_RECURSE: /* Set its value only once. */ - if (common->recursive_head_ptr == 0) - { - common->recursive_head_ptr = common->ovector_start; - common->ovector_start += sizeof(sljit_sw); - } + set_recursive_head = TRUE; cc += 1 + LINK_SIZE; break; case OP_CALLOUT: case OP_CALLOUT_STR: - if (common->capture_last_ptr == 0) - { - common->capture_last_ptr = common->ovector_start; - common->ovector_start += sizeof(sljit_sw); - } + set_capture_last = TRUE; cc += (*cc == OP_CALLOUT) ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2*LINK_SIZE); break; @@ -1221,15 +1267,8 @@ while (cc < ccend) case OP_COMMIT_ARG: case OP_PRUNE_ARG: - if (cc < assert_na_end) - return FALSE; - /* Fall through */ case OP_MARK: - if (common->mark_ptr == 0) - { - common->mark_ptr = common->ovector_start; - common->ovector_start += sizeof(sljit_sw); - } + set_mark = TRUE; cc += 1 + 2 + cc[1]; break; @@ -1242,8 +1281,6 @@ while (cc < ccend) case OP_SKIP: if (cc < assert_back_end) common->has_skip_in_assert_back = TRUE; - if (cc < assert_na_end) - return FALSE; cc += 1; break; @@ -1252,19 +1289,31 @@ while (cc < ccend) common->has_skip_arg = TRUE; if (cc < assert_back_end) common->has_skip_in_assert_back = TRUE; - if (cc < assert_na_end) - return FALSE; cc += 1 + 2 + cc[1]; break; - case OP_PRUNE: - case OP_COMMIT: case OP_ASSERT_ACCEPT: if (cc < assert_na_end) return FALSE; cc++; break; +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 + case OP_CRPOSRANGE: + /* The second value can be 0 for infinite repeats. */ + if (common->utf && GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE) && locals_size <= 3 * SSIZE_OF(sw)) + locals_size = 3 * SSIZE_OF(sw); + cc += 1 + 2 * IMM2_SIZE; + break; + + case OP_POSUPTO: + case OP_POSUPTOI: + case OP_NOTPOSUPTO: + case OP_NOTPOSUPTOI: + if (common->utf && locals_size <= 3 * SSIZE_OF(sw)) + locals_size = 3 * SSIZE_OF(sw); +#endif + /* Fall through */ default: cc = next_opcode(common, cc); if (cc == NULL) @@ -1272,6 +1321,36 @@ while (cc < ccend) break; } } + +SLJIT_ASSERT((locals_size & (SSIZE_OF(sw) - 1)) == 0); +#if defined SLJIT_DEBUG && SLJIT_DEBUG +common->locals_size = locals_size; +#endif + +if (locals_size > 0) + common->ovector_start += locals_size; + +if (set_mark) + { + SLJIT_ASSERT(common->mark_ptr == 0); + common->mark_ptr = common->ovector_start; + common->ovector_start += sizeof(sljit_sw); + } + +if (set_recursive_head) + { + SLJIT_ASSERT(common->recursive_head_ptr == 0); + common->recursive_head_ptr = common->ovector_start; + common->ovector_start += sizeof(sljit_sw); + } + +if (set_capture_last) + { + SLJIT_ASSERT(common->capture_last_ptr == 0); + common->capture_last_ptr = common->ovector_start; + common->ovector_start += sizeof(sljit_sw); + } + return TRUE; } @@ -1512,8 +1591,9 @@ do case OP_NCLASS: #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 case OP_XCLASS: + case OP_ECLASS: accelerated_start = cc; - cc += ((*cc == OP_XCLASS) ? GET(cc, 1) : (unsigned int)(1 + (32 / sizeof(PCRE2_UCHAR)))); + cc += (*cc >= OP_XCLASS) ? GET(cc, 1) : (unsigned int)(1 + (32 / sizeof(PCRE2_UCHAR))); #else accelerated_start = cc; cc += (1 + (32 / sizeof(PCRE2_UCHAR))); @@ -1687,7 +1767,7 @@ switch(*cc) if (max == 0) return (*cc == OP_CRRANGE) ? 2 : 1; max -= min; - if (max > 2) + if (max > (sljit_u32)(*cc == OP_CRRANGE ? 0 : 1)) max = 2; return max; @@ -1905,6 +1985,12 @@ while (cc < ccend) bracketlen = 1 + LINK_SIZE; break; + case OP_ASSERT_SCS: + common->private_data_ptrs[cc - common->start] = private_data_ptr; + private_data_ptr += 2 * sizeof(sljit_sw); + bracketlen = 1 + LINK_SIZE; + break; + case OP_CBRAPOS: case OP_SCBRAPOS: common->private_data_ptrs[cc - common->start] = private_data_ptr; @@ -1962,13 +2048,13 @@ while (cc < ccend) CASE_ITERATOR_TYPE_PRIVATE_DATA_2A size = 1; - if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI) + if (cc[1] != OP_EXTUNI) space = 2; break; case OP_TYPEUPTO: size = 1 + IMM2_SIZE; - if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI) + if (cc[1 + IMM2_SIZE] != OP_EXTUNI) space = 2; break; @@ -1985,6 +2071,7 @@ while (cc < ccend) #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 case OP_XCLASS: + case OP_ECLASS: size = GET(cc, 1); space = get_class_iterator_size(cc + size); break; @@ -2200,6 +2287,7 @@ while (cc < ccend) case OP_CLASS: case OP_NCLASS: case OP_XCLASS: + case OP_ECLASS: case OP_CALLOUT: case OP_CALLOUT_STR: @@ -2242,6 +2330,7 @@ if (ccend == NULL) cc = next_opcode(common, cc); } +/* The data is restored by do_revertframes(). */ SLJIT_ASSERT(cc != NULL); while (cc < ccend) switch(*cc) @@ -2516,6 +2605,13 @@ while (cc < ccend) cc += 1 + LINK_SIZE; break; + case OP_ASSERT_SCS: + SLJIT_ASSERT(PRIVATE_DATA(cc) != 0); + if (recurse_check_bit(common, PRIVATE_DATA(cc))) + length += 2; + cc += 1 + LINK_SIZE; + break; + case OP_CBRA: case OP_SCBRA: offset = GET2(cc, 1 + LINK_SIZE); @@ -2623,7 +2719,8 @@ while (cc < ccend) case OP_NCLASS: #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 case OP_XCLASS: - size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR); + case OP_ECLASS: + size = (*cc >= OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR); #else size = 1 + 32 / (int)sizeof(PCRE2_UCHAR); #endif @@ -2865,6 +2962,14 @@ while (cc < ccend) cc += 1 + LINK_SIZE; break; + case OP_ASSERT_SCS: + private_srcw[0] = PRIVATE_DATA(cc); + private_srcw[1] = private_srcw[0] + sizeof(sljit_sw); + if (recurse_check_bit(common, private_srcw[0])) + private_count = 2; + cc += 1 + LINK_SIZE; + break; + case OP_CBRA: case OP_SCBRA: offset = GET2(cc, 1 + LINK_SIZE); @@ -3005,7 +3110,8 @@ while (cc < ccend) case OP_NCLASS: #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 case OP_XCLASS: - i = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR); + case OP_ECLASS: + i = (*cc >= OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR); #else i = 1 + 32 / (int)sizeof(PCRE2_UCHAR); #endif @@ -3140,50 +3246,66 @@ static SLJIT_INLINE PCRE2_SPTR set_then_offsets(compiler_common *common, PCRE2_S PCRE2_SPTR end = bracketend(cc); BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT; -/* Assert captures then. */ -if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) +/* Assert captures *THEN verb even if it has no alternatives. */ +if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) current_offset = NULL; -/* Conditional block does not. */ -if (*cc == OP_COND || *cc == OP_SCOND) +else if (*cc >= OP_ASSERT_NA && *cc <= OP_ASSERT_SCS) + has_alternatives = TRUE; +/* Conditional block does never capture. */ +else if (*cc == OP_COND || *cc == OP_SCOND) has_alternatives = FALSE; cc = next_opcode(common, cc); if (has_alternatives) { - if (*cc == OP_REVERSE) - cc += 1 + IMM2_SIZE; - else if (*cc == OP_VREVERSE) - cc += 1 + 2 * IMM2_SIZE; + switch (*cc) + { + case OP_REVERSE: + case OP_CREF: + cc += 1 + IMM2_SIZE; + break; + case OP_VREVERSE: + case OP_DNCREF: + cc += 1 + 2 * IMM2_SIZE; + break; + } current_offset = common->then_offsets + (cc - common->start); } while (cc < end) { - if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND)) - cc = set_then_offsets(common, cc, current_offset); - else + if (*cc >= OP_ASSERT && *cc <= OP_SCOND) { - if (*cc == OP_ALT && has_alternatives) - { - cc += 1 + LINK_SIZE; - - if (*cc == OP_REVERSE) - cc += 1 + IMM2_SIZE; - else if (*cc == OP_VREVERSE) - cc += 1 + 2 * IMM2_SIZE; - - current_offset = common->then_offsets + (cc - common->start); - continue; - } - - if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL) - *current_offset = 1; - cc = next_opcode(common, cc); + cc = set_then_offsets(common, cc, current_offset); + continue; } + + if (*cc == OP_ALT && has_alternatives) + { + cc += 1 + LINK_SIZE; + + if (*cc == OP_REVERSE) + cc += 1 + IMM2_SIZE; + else if (*cc == OP_VREVERSE) + cc += 1 + 2 * IMM2_SIZE; + + current_offset = common->then_offsets + (cc - common->start); + continue; + } + + if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL) + *current_offset = 1; + cc = next_opcode(common, cc); } +cc = end - 1 - LINK_SIZE; + +/* Ignore repeats. */ +if (*cc == OP_KET && PRIVATE_DATA(cc) != 0) + end += PRIVATE_DATA(cc + 1); + return end; } @@ -3269,8 +3391,12 @@ OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * SSIZE_OF(sw)); OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345); OP1(SLJIT_MOV, TMP3, 0, TMP1, 0); OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0); -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0); -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0); +#if defined SLJIT_DEBUG && SLJIT_DEBUG +SLJIT_ASSERT(common->locals_size >= 2 * SSIZE_OF(sw)); +/* These two are also used by the stackalloc calls. */ +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL0, TMP1, 0); +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL1, TMP1, 0); +#endif #endif add_stub(common, CMP(SLJIT_LESS, STACK_TOP, 0, STACK_LIMIT, 0)); } @@ -5589,11 +5715,38 @@ if (last) chars->last_count++; } -static int scan_prefix(compiler_common *common, PCRE2_SPTR cc, fast_forward_char_data *chars, int max_chars, sljit_u32 *rec_count) +/* Value can be increased if needed. Patterns +such as /(a|){33}b/ can exhaust the stack. + +Note: /(a|){29}b/ already stops scan_prefix() +because it reaches the maximum step_count. */ +#define SCAN_PREFIX_STACK_END 32 + +/* +Scan prefix stores the prefix string in the chars array. +The elements of the chars array is either small character +sets or "any" (count is set to 255). + +Examples (the chars array is represented by a simple regex): + +/(abc|xbyd)/ prefix: /[ax]b[cy]/ (length: 3) +/a[a-z]b+c/ prefix: a.b (length: 3) +/ab?cd/ prefix: a[bc][cd] (length: 3) +/(ab|cd)|(ef|gh)/ prefix: [aceg][bdfh] (length: 2) + +The length is returned by scan_prefix(). The length is +less than or equal than the minimum length of the pattern. +*/ + +static int scan_prefix(compiler_common *common, PCRE2_SPTR cc, fast_forward_char_data *chars) { -/* Recursive function, which scans prefix literals. */ +fast_forward_char_data *chars_start = chars; +fast_forward_char_data *chars_end = chars + MAX_N_CHARS; +PCRE2_SPTR cc_stack[SCAN_PREFIX_STACK_END]; +fast_forward_char_data *chars_stack[SCAN_PREFIX_STACK_END]; +sljit_u8 next_alternative_stack[SCAN_PREFIX_STACK_END]; BOOL last, any, class, caseless; -int len, repeat, len_save, consumed = 0; +int stack_ptr, step_count, repeat, len, len_save; sljit_u32 chr; /* Any unicode character. */ sljit_u8 *bytes, *bytes_end, byte; PCRE2_SPTR alternative, cc_save, oc; @@ -5606,11 +5759,44 @@ PCRE2_UCHAR othercase[1]; #endif repeat = 1; +stack_ptr = 0; +step_count = 10000; while (TRUE) { - if (*rec_count == 0) + if (--step_count == 0) return 0; - (*rec_count)--; + + SLJIT_ASSERT(chars <= chars_start + MAX_N_CHARS); + + if (chars >= chars_end) + { + if (stack_ptr == 0) + return (int)(chars_end - chars_start); + + --stack_ptr; + cc = cc_stack[stack_ptr]; + chars = chars_stack[stack_ptr]; + + if (chars >= chars_end) + continue; + + if (next_alternative_stack[stack_ptr] != 0) + { + /* When an alternative is processed, the + next alternative is pushed onto the stack. */ + SLJIT_ASSERT(*cc == OP_ALT); + alternative = cc + GET(cc, 1); + if (*alternative == OP_ALT) + { + SLJIT_ASSERT(stack_ptr < SCAN_PREFIX_STACK_END); + SLJIT_ASSERT(chars_stack[stack_ptr] == chars); + SLJIT_ASSERT(next_alternative_stack[stack_ptr] == 1); + cc_stack[stack_ptr] = alternative; + stack_ptr++; + } + cc += 1 + LINK_SIZE; + } + } last = TRUE; any = FALSE; @@ -5650,6 +5836,7 @@ while (TRUE) case OP_ASSERTBACK_NOT: case OP_ASSERT_NA: case OP_ASSERTBACK_NA: + case OP_ASSERT_SCS: cc = bracketend(cc); continue; @@ -5686,9 +5873,17 @@ while (TRUE) #ifdef SUPPORT_UNICODE if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc); #endif - max_chars = scan_prefix(common, cc + len, chars, max_chars, rec_count); - if (max_chars == 0) - return consumed; + if (stack_ptr >= SCAN_PREFIX_STACK_END) + { + chars_end = chars; + continue; + } + + cc_stack[stack_ptr] = cc + len; + chars_stack[stack_ptr] = chars; + next_alternative_stack[stack_ptr] = 0; + stack_ptr++; + last = FALSE; break; @@ -5706,12 +5901,18 @@ while (TRUE) case OP_CBRA: case OP_CBRAPOS: alternative = cc + GET(cc, 1); - while (*alternative == OP_ALT) + if (*alternative == OP_ALT) { - max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars, rec_count); - if (max_chars == 0) - return consumed; - alternative += GET(alternative, 1); + if (stack_ptr >= SCAN_PREFIX_STACK_END) + { + chars_end = chars; + continue; + } + + cc_stack[stack_ptr] = alternative; + chars_stack[stack_ptr] = chars; + next_alternative_stack[stack_ptr] = 1; + stack_ptr++; } if (*cc == OP_CBRA || *cc == OP_CBRAPOS) @@ -5722,22 +5923,34 @@ while (TRUE) case OP_CLASS: #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE)) - return consumed; + { + chars_end = chars; + continue; + } #endif class = TRUE; break; case OP_NCLASS: #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 - if (common->utf) return consumed; + if (common->utf) + { + chars_end = chars; + continue; + } #endif class = TRUE; break; #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 case OP_XCLASS: + case OP_ECLASS: #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 - if (common->utf) return consumed; + if (common->utf) + { + chars_end = chars; + continue; + } #endif any = TRUE; cc += GET(cc, 1); @@ -5747,7 +5960,10 @@ while (TRUE) case OP_DIGIT: #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE)) - return consumed; + { + chars_end = chars; + continue; + } #endif any = TRUE; cc++; @@ -5756,7 +5972,10 @@ while (TRUE) case OP_WHITESPACE: #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE)) - return consumed; + { + chars_end = chars; + continue; + } #endif any = TRUE; cc++; @@ -5765,7 +5984,10 @@ while (TRUE) case OP_WORDCHAR: #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE)) - return consumed; + { + chars_end = chars; + continue; + } #endif any = TRUE; cc++; @@ -5781,7 +6003,11 @@ while (TRUE) case OP_ANY: case OP_ALLANY: #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 - if (common->utf) return consumed; + if (common->utf) + { + chars_end = chars; + continue; + } #endif any = TRUE; cc++; @@ -5791,7 +6017,11 @@ while (TRUE) case OP_NOTPROP: case OP_PROP: #if PCRE2_CODE_UNIT_WIDTH != 32 - if (common->utf) return consumed; + if (common->utf) + { + chars_end = chars; + continue; + } #endif any = TRUE; cc += 1 + 2; @@ -5806,7 +6036,11 @@ while (TRUE) case OP_NOTEXACT: case OP_NOTEXACTI: #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 - if (common->utf) return consumed; + if (common->utf) + { + chars_end = chars; + continue; + } #endif any = TRUE; repeat = GET2(cc, 1); @@ -5814,21 +6048,20 @@ while (TRUE) break; default: - return consumed; + chars_end = chars; + continue; } + SLJIT_ASSERT(chars < chars_end); + if (any) { do { chars->count = 255; - - consumed++; - if (--max_chars == 0) - return consumed; chars++; } - while (--repeat > 0); + while (--repeat > 0 && chars < chars_end); repeat = 1; continue; @@ -5839,17 +6072,27 @@ while (TRUE) bytes = (sljit_u8*) (cc + 1); cc += 1 + 32 / sizeof(PCRE2_UCHAR); + SLJIT_ASSERT(last == TRUE && repeat == 1); switch (*cc) { - case OP_CRSTAR: - case OP_CRMINSTAR: - case OP_CRPOSSTAR: case OP_CRQUERY: case OP_CRMINQUERY: case OP_CRPOSQUERY: - max_chars = scan_prefix(common, cc + 1, chars, max_chars, rec_count); - if (max_chars == 0) - return consumed; + last = FALSE; + /* Fall through */ + case OP_CRSTAR: + case OP_CRMINSTAR: + case OP_CRPOSSTAR: + if (stack_ptr >= SCAN_PREFIX_STACK_END) + { + chars_end = chars; + continue; + } + + cc_stack[stack_ptr] = ++cc; + chars_stack[stack_ptr] = chars; + next_alternative_stack[stack_ptr] = 0; + stack_ptr++; break; default: @@ -5863,7 +6106,13 @@ while (TRUE) case OP_CRPOSRANGE: repeat = GET2(cc, 1); if (repeat <= 0) - return consumed; + { + chars_end = chars; + continue; + } + + last = (repeat != (int)GET2(cc, 1 + IMM2_SIZE)); + cc += 1 + 2 * IMM2_SIZE; break; } @@ -5891,43 +6140,20 @@ while (TRUE) chr++; } while (byte != 0); - chr = (chr + 7) & ~7; + chr = (chr + 7) & (sljit_u32)(~7); } } while (chars->count != 255 && bytes < bytes_end); bytes = bytes_end - 32; } - consumed++; - if (--max_chars == 0) - return consumed; chars++; } - while (--repeat > 0); - - switch (*cc) - { - case OP_CRSTAR: - case OP_CRMINSTAR: - case OP_CRPOSSTAR: - return consumed; - - case OP_CRQUERY: - case OP_CRMINQUERY: - case OP_CRPOSQUERY: - cc++; - break; - - case OP_CRRANGE: - case OP_CRMINRANGE: - case OP_CRPOSRANGE: - if (GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE)) - return consumed; - cc += 1 + 2 * IMM2_SIZE; - break; - } + while (--repeat > 0 && chars < chars_end); repeat = 1; + if (last) + chars_end = chars; continue; } @@ -5943,7 +6169,10 @@ while (TRUE) { GETCHAR(chr, cc); if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len) - return consumed; + { + chars_end = chars; + continue; + } } else #endif @@ -5951,7 +6180,10 @@ while (TRUE) chr = *cc; #ifdef SUPPORT_UNICODE if (common->ucp && chr > 127) - othercase[0] = UCD_OTHERCASE(chr); + { + chr = UCD_OTHERCASE(chr); + othercase[0] = (chr == (PCRE2_UCHAR)chr) ? chr : *cc; + } else #endif othercase[0] = TABLE_GET(chr, common->fcc, chr); @@ -5971,7 +6203,6 @@ while (TRUE) do { len--; - consumed++; chr = *cc; add_prefix_char(*cc, chars, len == 0); @@ -5979,15 +6210,13 @@ while (TRUE) if (caseless) add_prefix_char(*oc, chars, len == 0); - if (--max_chars == 0) - return consumed; chars++; cc++; oc++; } - while (len > 0); + while (len > 0 && chars < chars_end); - if (--repeat == 0) + if (--repeat == 0 || chars >= chars_end) break; len = len_save; @@ -5996,7 +6225,7 @@ while (TRUE) repeat = 1; if (last) - return consumed; + chars_end = chars; } } @@ -6166,7 +6395,6 @@ int i, max, from; int range_right = -1, range_len; sljit_u8 *update_table = NULL; BOOL in_range; -sljit_u32 rec_count; for (i = 0; i < MAX_N_CHARS; i++) { @@ -6174,8 +6402,7 @@ for (i = 0; i < MAX_N_CHARS; i++) chars[i].last_count = 0; } -rec_count = 10000; -max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count); +max = scan_prefix(common, common->start, chars); if (max < 1) return FALSE; @@ -6183,25 +6410,34 @@ if (max < 1) /* Convert last_count to priority. */ for (i = 0; i < max; i++) { - SLJIT_ASSERT(chars[i].count > 0 && chars[i].last_count <= chars[i].count); + SLJIT_ASSERT(chars[i].last_count <= chars[i].count); - if (chars[i].count == 1) + switch (chars[i].count) { + case 0: + chars[i].count = 255; + chars[i].last_count = 0; + break; + + case 1: chars[i].last_count = (chars[i].last_count == 1) ? 7 : 5; /* Simplifies algorithms later. */ chars[i].chars[1] = chars[i].chars[0]; - } - else if (chars[i].count == 2) - { + break; + + case 2: SLJIT_ASSERT(chars[i].chars[0] != chars[i].chars[1]); if (is_powerof2(chars[i].chars[0] ^ chars[i].chars[1])) chars[i].last_count = (chars[i].last_count == 2) ? 6 : 4; else chars[i].last_count = (chars[i].last_count == 2) ? 3 : 2; - } - else + break; + + default: chars[i].last_count = (chars[i].count == 255) ? 0 : 1; + break; + } } #ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD @@ -6756,8 +6992,7 @@ jump = JUMP(SLJIT_NOT_ZERO /* SIG_LESS */); OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); JUMPHERE(jump); -OP2(SLJIT_SUB, TMP2, 0, SLJIT_IMM, 0, TMP2, 0); -OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); +OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, TMP2, 0); if (HAS_VIRTUAL_REGISTERS) { OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw))); @@ -6799,7 +7034,8 @@ struct sljit_jump *jump; SLJIT_UNUSED_ARG(ucp); SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16); -sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCALS0); +SLJIT_ASSERT(common->locals_size >= 2 * SSIZE_OF(sw)); +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCAL0); /* Get type of the previous char, and put it to TMP3. */ OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); @@ -6868,7 +7104,7 @@ JUMPHERE(skipread); OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0); check_str_end(common, &skipread_list); -peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, &invalid_utf2); +peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCAL1, &invalid_utf2); /* Testing char type. This is a code duplication. */ #ifdef SUPPORT_UNICODE @@ -6907,7 +7143,7 @@ else } set_jumps(skipread_list, LABEL()); -OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); +OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0); OP2(SLJIT_XOR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, TMP3, 0); OP_SRC(SLJIT_FAST_RETURN, TMP1, 0); @@ -6916,15 +7152,15 @@ if (common->invalid_utf) { set_jumps(invalid_utf1, LABEL()); - peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, NULL); + peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCAL1, NULL); CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR, valid_utf); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0); OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, -1); OP_SRC(SLJIT_FAST_RETURN, TMP1, 0); set_jumps(invalid_utf2, LABEL()); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0); OP1(SLJIT_MOV, TMP2, 0, TMP3, 0); OP_SRC(SLJIT_FAST_RETURN, TMP1, 0); } @@ -6941,7 +7177,7 @@ int i, byte, length = 0; bit = bits[0] & 0x1; /* All bits will be zero or one (since bit is zero or one). */ -all = -bit; +all = (sljit_u8)-bit; for (i = 0; i < 256; ) { @@ -6958,7 +7194,7 @@ for (i = 0; i < 256; ) ranges[length] = i; length++; bit = cbit; - all = -cbit; + all = (sljit_u8)-cbit; /* sign extend bit into byte */ } i++; } @@ -7102,7 +7338,7 @@ for (i = 0; i < 32; i++) byte = bits[i]; if (nclass) - byte = ~byte; + byte = (sljit_u8)~byte; j = 0; while (byte != 0) @@ -7305,7 +7541,9 @@ else char2_reg = RETURN_ADDR; } -sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCALS0); +/* Update ref_update_local_size() when this changes. */ +SLJIT_ASSERT(common->locals_size >= SSIZE_OF(sw)); +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCAL0); OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); if (char1_reg == STR_END) @@ -7324,7 +7562,7 @@ if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, JUMPTO(SLJIT_NOT_ZERO, label); JUMPHERE(jump); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0); } else if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS) { @@ -7339,7 +7577,7 @@ else if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_ JUMPTO(SLJIT_NOT_ZERO, label); JUMPHERE(jump); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); } else @@ -7354,7 +7592,7 @@ else JUMPTO(SLJIT_NOT_ZERO, label); JUMPHERE(jump); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0); } if (char1_reg == STR_END) @@ -7392,10 +7630,12 @@ if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, else if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS) opt_type = 2; -sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCALS0); +/* Update ref_update_local_size() when this changes. */ +SLJIT_ASSERT(common->locals_size >= 2 * SSIZE_OF(sw)); +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCAL0); OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, char1_reg, 0); +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL1, char1_reg, 0); if (char2_reg == STACK_TOP) { @@ -7449,7 +7689,7 @@ OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1)); JUMPTO(SLJIT_NOT_ZERO, label); JUMPHERE(jump); -OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); +OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0); if (opt_type == 2) OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); @@ -7460,983 +7700,11 @@ if (char2_reg == STACK_TOP) OP1(SLJIT_MOV, lcc_table, 0, RETURN_ADDR, 0); } -OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1); +OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL1); OP_SRC(SLJIT_FAST_RETURN, TMP1, 0); } -static PCRE2_SPTR byte_sequence_compare(compiler_common *common, BOOL caseless, PCRE2_SPTR cc, - compare_context *context, jump_list **backtracks) -{ -DEFINE_COMPILER; -unsigned int othercasebit = 0; -PCRE2_SPTR othercasechar = NULL; -#ifdef SUPPORT_UNICODE -int utflength; -#endif - -if (caseless && char_has_othercase(common, cc)) - { - othercasebit = char_get_othercase_bit(common, cc); - SLJIT_ASSERT(othercasebit); - /* Extracting bit difference info. */ -#if PCRE2_CODE_UNIT_WIDTH == 8 - othercasechar = cc + (othercasebit >> 8); - othercasebit &= 0xff; -#elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 - /* Note that this code only handles characters in the BMP. If there - ever are characters outside the BMP whose othercase differs in only one - bit from itself (there currently are none), this code will need to be - revised for PCRE2_CODE_UNIT_WIDTH == 32. */ - othercasechar = cc + (othercasebit >> 9); - if ((othercasebit & 0x100) != 0) - othercasebit = (othercasebit & 0xff) << 8; - else - othercasebit &= 0xff; -#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */ - } - -if (context->sourcereg == -1) - { -#if PCRE2_CODE_UNIT_WIDTH == 8 -#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED - if (context->length >= 4) - OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); - else if (context->length >= 2) - OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); - else -#endif - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); -#elif PCRE2_CODE_UNIT_WIDTH == 16 -#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED - if (context->length >= 4) - OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); - else -#endif - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); -#elif PCRE2_CODE_UNIT_WIDTH == 32 - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); -#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */ - context->sourcereg = TMP2; - } - -#ifdef SUPPORT_UNICODE -utflength = 1; -if (common->utf && HAS_EXTRALEN(*cc)) - utflength += GET_EXTRALEN(*cc); - -do - { -#endif - - context->length -= IN_UCHARS(1); -#if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16) - - /* Unaligned read is supported. */ - if (othercasebit != 0 && othercasechar == cc) - { - context->c.asuchars[context->ucharptr] = *cc | othercasebit; - context->oc.asuchars[context->ucharptr] = othercasebit; - } - else - { - context->c.asuchars[context->ucharptr] = *cc; - context->oc.asuchars[context->ucharptr] = 0; - } - context->ucharptr++; - -#if PCRE2_CODE_UNIT_WIDTH == 8 - if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1)) -#else - if (context->ucharptr >= 2 || context->length == 0) -#endif - { - if (context->length >= 4) - OP1(SLJIT_MOV_S32, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); - else if (context->length >= 2) - OP1(SLJIT_MOV_U16, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); -#if PCRE2_CODE_UNIT_WIDTH == 8 - else if (context->length >= 1) - OP1(SLJIT_MOV_U8, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); -#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ - context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1; - - switch(context->ucharptr) - { - case 4 / sizeof(PCRE2_UCHAR): - if (context->oc.asint != 0) - OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint); - add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint)); - break; - - case 2 / sizeof(PCRE2_UCHAR): - if (context->oc.asushort != 0) - OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort); - add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort)); - break; - -#if PCRE2_CODE_UNIT_WIDTH == 8 - case 1: - if (context->oc.asbyte != 0) - OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte); - add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte)); - break; -#endif - - default: - SLJIT_UNREACHABLE(); - break; - } - context->ucharptr = 0; - } - -#else - - /* Unaligned read is unsupported or in 32 bit mode. */ - if (context->length >= 1) - OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); - - context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1; - - if (othercasebit != 0 && othercasechar == cc) - { - OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit); - add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit)); - } - else - add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc)); - -#endif - - cc++; -#ifdef SUPPORT_UNICODE - utflength--; - } -while (utflength > 0); -#endif - -return cc; -} - -#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 - -#define SET_CHAR_OFFSET(value) \ - if ((value) != charoffset) \ - { \ - if ((value) < charoffset) \ - OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \ - else \ - OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \ - } \ - charoffset = (value); - -static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr); - -#ifdef SUPPORT_UNICODE -#define XCLASS_SAVE_CHAR 0x001 -#define XCLASS_CHAR_SAVED 0x002 -#define XCLASS_HAS_TYPE 0x004 -#define XCLASS_HAS_SCRIPT 0x008 -#define XCLASS_HAS_SCRIPT_EXTENSION 0x010 -#define XCLASS_HAS_BOOL 0x020 -#define XCLASS_HAS_BIDICL 0x040 -#define XCLASS_NEEDS_UCD (XCLASS_HAS_TYPE | XCLASS_HAS_SCRIPT | XCLASS_HAS_SCRIPT_EXTENSION | XCLASS_HAS_BOOL | XCLASS_HAS_BIDICL) -#define XCLASS_SCRIPT_EXTENSION_NOTPROP 0x080 -#define XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR 0x100 -#define XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0 0x200 -#endif /* SUPPORT_UNICODE */ - -static void compile_xclass_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks) -{ -DEFINE_COMPILER; -jump_list *found = NULL; -jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks; -sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX; -struct sljit_jump *jump = NULL; -PCRE2_SPTR ccbegin; -int compares, invertcmp, numberofcmps; -#if defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16) -BOOL utf = common->utf; -#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */ - -#ifdef SUPPORT_UNICODE -sljit_u32 unicode_status = 0; -sljit_u32 category_list = 0; -sljit_u32 items; -int typereg = TMP1; -const sljit_u32 *other_cases; -#endif /* SUPPORT_UNICODE */ - -/* Scanning the necessary info. */ -cc++; -ccbegin = cc; -compares = 0; - -if (cc[-1] & XCL_MAP) - { - min = 0; - cc += 32 / sizeof(PCRE2_UCHAR); - } - -while (*cc != XCL_END) - { - compares++; - - if (*cc == XCL_SINGLE) - { - cc ++; - GETCHARINCTEST(c, cc); - if (c > max) max = c; - if (c < min) min = c; -#ifdef SUPPORT_UNICODE - unicode_status |= XCLASS_SAVE_CHAR; -#endif /* SUPPORT_UNICODE */ - } - else if (*cc == XCL_RANGE) - { - cc ++; - GETCHARINCTEST(c, cc); - if (c < min) min = c; - GETCHARINCTEST(c, cc); - if (c > max) max = c; -#ifdef SUPPORT_UNICODE - unicode_status |= XCLASS_SAVE_CHAR; -#endif /* SUPPORT_UNICODE */ - } -#ifdef SUPPORT_UNICODE - else - { - SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP); - cc++; - - if (*cc == PT_CLIST && cc[-1] == XCL_PROP) - { - other_cases = PRIV(ucd_caseless_sets) + cc[1]; - while (*other_cases != NOTACHAR) - { - if (*other_cases > max) max = *other_cases; - if (*other_cases < min) min = *other_cases; - other_cases++; - } - } - else - { - max = READ_CHAR_MAX; - min = 0; - } - - items = 0; - - switch(*cc) - { - case PT_ANY: - /* Any either accepts everything or ignored. */ - if (cc[-1] == XCL_PROP) - items = UCPCAT_ALL; - else - compares--; - break; - - case PT_LAMP: - items = UCPCAT3(ucp_Lu, ucp_Ll, ucp_Lt); - break; - - case PT_GC: - items = UCPCAT_RANGE(PRIV(ucp_typerange)[(int)cc[1] * 2], PRIV(ucp_typerange)[(int)cc[1] * 2 + 1]); - break; - - case PT_PC: - items = UCPCAT(cc[1]); - break; - - case PT_WORD: - items = UCPCAT2(ucp_Mn, ucp_Pc) | UCPCAT_L | UCPCAT_N; - break; - - case PT_ALNUM: - items = UCPCAT_L | UCPCAT_N; - break; - - case PT_SCX: - unicode_status |= XCLASS_HAS_SCRIPT_EXTENSION; - if (cc[-1] == XCL_NOTPROP) - { - unicode_status |= XCLASS_SCRIPT_EXTENSION_NOTPROP; - break; - } - compares++; - /* Fall through */ - - case PT_SC: - unicode_status |= XCLASS_HAS_SCRIPT; - break; - - case PT_SPACE: - case PT_PXSPACE: - case PT_PXGRAPH: - case PT_PXPRINT: - case PT_PXPUNCT: - unicode_status |= XCLASS_SAVE_CHAR | XCLASS_HAS_TYPE; - break; - - case PT_CLIST: - case PT_UCNC: - case PT_PXXDIGIT: - unicode_status |= XCLASS_SAVE_CHAR; - break; - - case PT_BOOL: - unicode_status |= XCLASS_HAS_BOOL; - break; - - case PT_BIDICL: - unicode_status |= XCLASS_HAS_BIDICL; - break; - - default: - SLJIT_UNREACHABLE(); - break; - } - - if (items > 0) - { - if (cc[-1] == XCL_NOTPROP) - items ^= UCPCAT_ALL; - category_list |= items; - unicode_status |= XCLASS_HAS_TYPE; - compares--; - } - - cc += 2; - } -#endif /* SUPPORT_UNICODE */ - } - -#ifdef SUPPORT_UNICODE -if (category_list == UCPCAT_ALL) - { - /* All characters are accepted, same as dotall. */ - compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE); - if (list == backtracks) - add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); - return; - } - -if (compares == 0 && category_list == 0) - { - /* No characters are accepted, same as (*F) or dotall. */ - compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE); - if (list != backtracks) - add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); - return; - } -#else /* !SUPPORT_UNICODE */ -SLJIT_ASSERT(compares > 0); -#endif /* SUPPORT_UNICODE */ - -/* We are not necessary in utf mode even in 8 bit mode. */ -cc = ccbegin; -if ((cc[-1] & XCL_NOT) != 0) - read_char(common, min, max, backtracks, READ_CHAR_UPDATE_STR_PTR); -else - { -#ifdef SUPPORT_UNICODE - read_char(common, min, max, (unicode_status & XCLASS_NEEDS_UCD) ? backtracks : NULL, 0); -#else /* !SUPPORT_UNICODE */ - read_char(common, min, max, NULL, 0); -#endif /* SUPPORT_UNICODE */ - } - -if ((cc[-1] & XCL_HASPROP) == 0) - { - if ((cc[-1] & XCL_MAP) != 0) - { - jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255); - if (!optimize_class(common, (const sljit_u8 *)cc, (((const sljit_u8 *)cc)[31] & 0x80) != 0, TRUE, &found)) - { - OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); - OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); - OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); - OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); - OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0); - add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO)); - } - - add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); - JUMPHERE(jump); - - cc += 32 / sizeof(PCRE2_UCHAR); - } - else - { - OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min); - add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min)); - } - } -else if ((cc[-1] & XCL_MAP) != 0) - { - OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0); -#ifdef SUPPORT_UNICODE - unicode_status |= XCLASS_CHAR_SAVED; -#endif /* SUPPORT_UNICODE */ - if (!optimize_class(common, (const sljit_u8 *)cc, FALSE, TRUE, list)) - { -#if PCRE2_CODE_UNIT_WIDTH == 8 - jump = NULL; - if (common->utf) -#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ - jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255); - - OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); - OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); - OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); - OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); - OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0); - add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO)); - -#if PCRE2_CODE_UNIT_WIDTH == 8 - if (common->utf) -#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ - JUMPHERE(jump); - } - - OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0); - cc += 32 / sizeof(PCRE2_UCHAR); - } - -#ifdef SUPPORT_UNICODE -if (unicode_status & XCLASS_NEEDS_UCD) - { - if ((unicode_status & (XCLASS_SAVE_CHAR | XCLASS_CHAR_SAVED)) == XCLASS_SAVE_CHAR) - OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0); - -#if PCRE2_CODE_UNIT_WIDTH == 32 - if (!common->utf) - { - jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR); - JUMPHERE(jump); - } -#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */ - - OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT); - OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1); - OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1)); - OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK); - OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT); - OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); - OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2)); - OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1); - OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3); - OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2); - OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); - - ccbegin = cc; - - if (category_list != 0) - compares++; - - if (unicode_status & XCLASS_HAS_BIDICL) - { - OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass)); - OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BIDICLASS_SHIFT); - - while (*cc != XCL_END) - { - if (*cc == XCL_SINGLE) - { - cc ++; - GETCHARINCTEST(c, cc); - } - else if (*cc == XCL_RANGE) - { - cc ++; - GETCHARINCTEST(c, cc); - GETCHARINCTEST(c, cc); - } - else - { - SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP); - cc++; - if (*cc == PT_BIDICL) - { - compares--; - invertcmp = (compares == 0 && list != backtracks); - if (cc[-1] == XCL_NOTPROP) - invertcmp ^= 0x1; - jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]); - add_jump(compiler, compares > 0 ? list : backtracks, jump); - } - cc += 2; - } - } - - cc = ccbegin; - } - - if (unicode_status & XCLASS_HAS_BOOL) - { - OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, bprops)); - OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BPROPS_MASK); - OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2); - - while (*cc != XCL_END) - { - if (*cc == XCL_SINGLE) - { - cc ++; - GETCHARINCTEST(c, cc); - } - else if (*cc == XCL_RANGE) - { - cc ++; - GETCHARINCTEST(c, cc); - GETCHARINCTEST(c, cc); - } - else - { - SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP); - cc++; - if (*cc == PT_BOOL) - { - compares--; - invertcmp = (compares == 0 && list != backtracks); - if (cc[-1] == XCL_NOTPROP) - invertcmp ^= 0x1; - - OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), (sljit_sw)(PRIV(ucd_boolprop_sets) + (cc[1] >> 5)), SLJIT_IMM, (sljit_sw)1 << (cc[1] & 0x1f)); - add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp)); - } - cc += 2; - } - } - - cc = ccbegin; - } - - if (unicode_status & XCLASS_HAS_SCRIPT) - { - OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script)); - - while (*cc != XCL_END) - { - if (*cc == XCL_SINGLE) - { - cc ++; - GETCHARINCTEST(c, cc); - } - else if (*cc == XCL_RANGE) - { - cc ++; - GETCHARINCTEST(c, cc); - GETCHARINCTEST(c, cc); - } - else - { - SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP); - cc++; - switch (*cc) - { - case PT_SCX: - if (cc[-1] == XCL_NOTPROP) - break; - /* Fall through */ - - case PT_SC: - compares--; - invertcmp = (compares == 0 && list != backtracks); - if (cc[-1] == XCL_NOTPROP) - invertcmp ^= 0x1; - - add_jump(compiler, compares > 0 ? list : backtracks, CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1])); - } - cc += 2; - } - } - - cc = ccbegin; - } - - if (unicode_status & XCLASS_HAS_SCRIPT_EXTENSION) - { - OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass)); - OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_SCRIPTX_MASK); - OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2); - - if (unicode_status & XCLASS_SCRIPT_EXTENSION_NOTPROP) - { - if (unicode_status & XCLASS_HAS_TYPE) - { - if (unicode_status & XCLASS_SAVE_CHAR) - { - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP2, 0); - unicode_status |= XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0; - } - else - { - OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP2, 0); - unicode_status |= XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR; - } - } - OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script)); - } - - while (*cc != XCL_END) - { - if (*cc == XCL_SINGLE) - { - cc ++; - GETCHARINCTEST(c, cc); - } - else if (*cc == XCL_RANGE) - { - cc ++; - GETCHARINCTEST(c, cc); - GETCHARINCTEST(c, cc); - } - else - { - SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP); - cc++; - if (*cc == PT_SCX) - { - compares--; - invertcmp = (compares == 0 && list != backtracks); - - jump = NULL; - if (cc[-1] == XCL_NOTPROP) - { - jump = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, (int)cc[1]); - if (invertcmp) - { - add_jump(compiler, backtracks, jump); - jump = NULL; - } - invertcmp ^= 0x1; - } - - OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), (sljit_sw)(PRIV(ucd_script_sets) + (cc[1] >> 5)), SLJIT_IMM, (sljit_sw)1 << (cc[1] & 0x1f)); - add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp)); - - if (jump != NULL) - JUMPHERE(jump); - } - cc += 2; - } - } - - if (unicode_status & XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0) - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); - else if (unicode_status & XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR) - OP1(SLJIT_MOV, TMP2, 0, RETURN_ADDR, 0); - cc = ccbegin; - } - - if (unicode_status & XCLASS_SAVE_CHAR) - OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0); - - if (unicode_status & XCLASS_HAS_TYPE) - { - if (unicode_status & XCLASS_SAVE_CHAR) - typereg = RETURN_ADDR; - - OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); - OP2(SLJIT_SHL, typereg, 0, SLJIT_IMM, 1, TMP2, 0); - - if (category_list > 0) - { - compares--; - invertcmp = (compares == 0 && list != backtracks); - OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, category_list); - add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp)); - } - } - } -#endif /* SUPPORT_UNICODE */ - -/* Generating code. */ -charoffset = 0; -numberofcmps = 0; - -while (*cc != XCL_END) - { - compares--; - invertcmp = (compares == 0 && list != backtracks); - jump = NULL; - - if (*cc == XCL_SINGLE) - { - cc ++; - GETCHARINCTEST(c, cc); - - if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE)) - { - OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); - OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_EQUAL); - numberofcmps++; - } - else if (numberofcmps > 0) - { - OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); - OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL); - jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); - numberofcmps = 0; - } - else - { - jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); - numberofcmps = 0; - } - } - else if (*cc == XCL_RANGE) - { - cc ++; - GETCHARINCTEST(c, cc); - SET_CHAR_OFFSET(c); - GETCHARINCTEST(c, cc); - - if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE)) - { - OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); - OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); - numberofcmps++; - } - else if (numberofcmps > 0) - { - OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); - OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL); - jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); - numberofcmps = 0; - } - else - { - jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); - numberofcmps = 0; - } - } -#ifdef SUPPORT_UNICODE - else - { - SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP); - if (*cc == XCL_NOTPROP) - invertcmp ^= 0x1; - cc++; - switch(*cc) - { - case PT_ANY: - case PT_LAMP: - case PT_GC: - case PT_PC: - case PT_SC: - case PT_SCX: - case PT_BOOL: - case PT_BIDICL: - case PT_WORD: - case PT_ALNUM: - compares++; - /* Already handled. */ - break; - - case PT_SPACE: - case PT_PXSPACE: - SET_CHAR_OFFSET(9); - OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xd - 0x9); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); - - OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x9); - OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); - - OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e - 0x9); - OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); - - OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Zl, ucp_Zs)); - OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_ZERO); - jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); - break; - - case PT_CLIST: - other_cases = PRIV(ucd_caseless_sets) + cc[1]; - - /* At least three characters are required. - Otherwise this case would be handled by the normal code path. */ - SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR); - SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]); - - /* Optimizing character pairs, if their difference is power of 2. */ - if (is_powerof2(other_cases[1] ^ other_cases[0])) - { - if (charoffset == 0) - OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]); - else - { - OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset); - OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]); - } - OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, other_cases[1]); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL); - other_cases += 2; - } - else if (is_powerof2(other_cases[2] ^ other_cases[1])) - { - if (charoffset == 0) - OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]); - else - { - OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset); - OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]); - } - OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, other_cases[2]); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL); - - OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset)); - OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL); - - other_cases += 3; - } - else - { - OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset)); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL); - } - - while (*other_cases != NOTACHAR) - { - OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset)); - OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL); - } - jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); - break; - - case PT_UCNC: - OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset)); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL); - OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset)); - OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); - OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset)); - OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); - - SET_CHAR_OFFSET(0xa0); - OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset)); - OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); - SET_CHAR_OFFSET(0); - OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000 - 0); - OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_GREATER_EQUAL); - jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); - break; - - case PT_PXGRAPH: - OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Cc, ucp_Cs) | UCPCAT_RANGE(ucp_Zl, ucp_Zs)); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO); - - OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT(ucp_Cf)); - jump = JUMP(SLJIT_ZERO); - - c = charoffset; - /* In case of ucp_Cf, we overwrite the result. */ - SET_CHAR_OFFSET(0x2066); - OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); - - OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066); - OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); - - OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066); - OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); - - /* Restore charoffset. */ - SET_CHAR_OFFSET(c); - - JUMPHERE(jump); - jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0); - break; - - case PT_PXPRINT: - OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Cc, ucp_Cs) | UCPCAT2(ucp_Zl, ucp_Zp)); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO); - - OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT(ucp_Cf)); - jump = JUMP(SLJIT_ZERO); - - c = charoffset; - /* In case of ucp_Cf, we overwrite the result. */ - SET_CHAR_OFFSET(0x2066); - OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); - - OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066); - OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); - - /* Restore charoffset. */ - SET_CHAR_OFFSET(c); - - JUMPHERE(jump); - jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0); - break; - - case PT_PXPUNCT: - OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Sc, ucp_So)); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO); - - SET_CHAR_OFFSET(0); - OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x7f); - OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_LESS_EQUAL); - - OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Pc, ucp_Ps)); - OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_ZERO); - jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); - break; - - case PT_PXXDIGIT: - SET_CHAR_OFFSET(CHAR_A); - OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, ~0x20); - OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP2, 0, SLJIT_IMM, CHAR_F - CHAR_A); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); - - SET_CHAR_OFFSET(CHAR_0); - OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_9 - CHAR_0); - OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); - - SET_CHAR_OFFSET(0xff10); - jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 0xff46 - 0xff10); - - OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff19 - 0xff10); - OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); - - SET_CHAR_OFFSET(0xff21); - OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff26 - 0xff21); - OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); - - SET_CHAR_OFFSET(0xff41); - OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff46 - 0xff41); - OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); - - SET_CHAR_OFFSET(0xff10); - - JUMPHERE(jump); - OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0); - jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); - break; - - default: - SLJIT_UNREACHABLE(); - break; - } - cc += 2; - } -#endif /* SUPPORT_UNICODE */ - - if (jump != NULL) - add_jump(compiler, compares > 0 ? list : backtracks, jump); - } - -SLJIT_ASSERT(compares == 0); -if (found != NULL) - set_jumps(found, LABEL()); -} - -#undef SET_TYPE_OFFSET -#undef SET_CHAR_OFFSET - -#endif +#include "pcre2_jit_char_inc.h" static PCRE2_SPTR compile_simple_assertion_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks) { @@ -8681,665 +7949,6 @@ SLJIT_UNREACHABLE(); return cc; } -#ifdef SUPPORT_UNICODE - -#if PCRE2_CODE_UNIT_WIDTH != 32 - -static PCRE2_SPTR SLJIT_FUNC do_extuni_utf(jit_arguments *args, PCRE2_SPTR cc) -{ -PCRE2_SPTR start_subject = args->begin; -PCRE2_SPTR end_subject = args->end; -int lgb, rgb, ricount; -PCRE2_SPTR prevcc, endcc, bptr; -BOOL first = TRUE; -uint32_t c; - -prevcc = cc; -endcc = NULL; -do - { - GETCHARINC(c, cc); - rgb = UCD_GRAPHBREAK(c); - - if (first) - { - lgb = rgb; - endcc = cc; - first = FALSE; - continue; - } - - if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) - break; - - /* Not breaking between Regional Indicators is allowed only if there - are an even number of preceding RIs. */ - - if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator) - { - ricount = 0; - bptr = prevcc; - - /* bptr is pointing to the left-hand character */ - while (bptr > start_subject) - { - bptr--; - BACKCHAR(bptr); - GETCHAR(c, bptr); - - if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator) - break; - - ricount++; - } - - if ((ricount & 1) != 0) break; /* Grapheme break required */ - } - - /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this - allows any number of them before a following Extended_Pictographic. */ - - if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) || - lgb != ucp_gbExtended_Pictographic) - lgb = rgb; - - prevcc = endcc; - endcc = cc; - } -while (cc < end_subject); - -return endcc; -} - -#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */ - -static PCRE2_SPTR SLJIT_FUNC do_extuni_utf_invalid(jit_arguments *args, PCRE2_SPTR cc) -{ -PCRE2_SPTR start_subject = args->begin; -PCRE2_SPTR end_subject = args->end; -int lgb, rgb, ricount; -PCRE2_SPTR prevcc, endcc, bptr; -BOOL first = TRUE; -uint32_t c; - -prevcc = cc; -endcc = NULL; -do - { - GETCHARINC_INVALID(c, cc, end_subject, break); - rgb = UCD_GRAPHBREAK(c); - - if (first) - { - lgb = rgb; - endcc = cc; - first = FALSE; - continue; - } - - if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) - break; - - /* Not breaking between Regional Indicators is allowed only if there - are an even number of preceding RIs. */ - - if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator) - { - ricount = 0; - bptr = prevcc; - - /* bptr is pointing to the left-hand character */ - while (bptr > start_subject) - { - GETCHARBACK_INVALID(c, bptr, start_subject, break); - - if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator) - break; - - ricount++; - } - - if ((ricount & 1) != 0) - break; /* Grapheme break required */ - } - - /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this - allows any number of them before a following Extended_Pictographic. */ - - if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) || - lgb != ucp_gbExtended_Pictographic) - lgb = rgb; - - prevcc = endcc; - endcc = cc; - } -while (cc < end_subject); - -return endcc; -} - -static PCRE2_SPTR SLJIT_FUNC do_extuni_no_utf(jit_arguments *args, PCRE2_SPTR cc) -{ -PCRE2_SPTR start_subject = args->begin; -PCRE2_SPTR end_subject = args->end; -int lgb, rgb, ricount; -PCRE2_SPTR bptr; -uint32_t c; - -/* Patch by PH */ -/* GETCHARINC(c, cc); */ -c = *cc++; - -#if PCRE2_CODE_UNIT_WIDTH == 32 -if (c >= 0x110000) - return cc; -#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */ -lgb = UCD_GRAPHBREAK(c); - -while (cc < end_subject) - { - c = *cc; -#if PCRE2_CODE_UNIT_WIDTH == 32 - if (c >= 0x110000) - break; -#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */ - rgb = UCD_GRAPHBREAK(c); - - if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) - break; - - /* Not breaking between Regional Indicators is allowed only if there - are an even number of preceding RIs. */ - - if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator) - { - ricount = 0; - bptr = cc - 1; - - /* bptr is pointing to the left-hand character */ - while (bptr > start_subject) - { - bptr--; - c = *bptr; -#if PCRE2_CODE_UNIT_WIDTH == 32 - if (c >= 0x110000) - break; -#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */ - - if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator) break; - - ricount++; - } - - if ((ricount & 1) != 0) - break; /* Grapheme break required */ - } - - /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this - allows any number of them before a following Extended_Pictographic. */ - - if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) || - lgb != ucp_gbExtended_Pictographic) - lgb = rgb; - - cc++; - } - -return cc; -} - -#endif /* SUPPORT_UNICODE */ - -static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr) -{ -DEFINE_COMPILER; -int length; -unsigned int c, oc, bit; -compare_context context; -struct sljit_jump *jump[3]; -jump_list *end_list; -#ifdef SUPPORT_UNICODE -PCRE2_UCHAR propdata[5]; -#endif /* SUPPORT_UNICODE */ - -switch(type) - { - case OP_NOT_DIGIT: - case OP_DIGIT: - /* Digits are usually 0-9, so it is worth to optimize them. */ - if (check_str_ptr) - detect_partial_match(common, backtracks); -#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 - if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_digit, FALSE)) - read_char7_type(common, backtracks, type == OP_NOT_DIGIT); - else -#endif - read_char8_type(common, backtracks, type == OP_NOT_DIGIT); - /* Flip the starting bit in the negative case. */ - OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_digit); - add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO)); - return cc; - - case OP_NOT_WHITESPACE: - case OP_WHITESPACE: - if (check_str_ptr) - detect_partial_match(common, backtracks); -#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 - if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_space, FALSE)) - read_char7_type(common, backtracks, type == OP_NOT_WHITESPACE); - else -#endif - read_char8_type(common, backtracks, type == OP_NOT_WHITESPACE); - OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_space); - add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO)); - return cc; - - case OP_NOT_WORDCHAR: - case OP_WORDCHAR: - if (check_str_ptr) - detect_partial_match(common, backtracks); -#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 - if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_word, FALSE)) - read_char7_type(common, backtracks, type == OP_NOT_WORDCHAR); - else -#endif - read_char8_type(common, backtracks, type == OP_NOT_WORDCHAR); - OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_word); - add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO)); - return cc; - - case OP_ANY: - if (check_str_ptr) - detect_partial_match(common, backtracks); - read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR); - if (common->nltype == NLTYPE_FIXED && common->newline > 255) - { - jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff); - end_list = NULL; - if (common->mode != PCRE2_JIT_PARTIAL_HARD) - add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); - else - check_str_end(common, &end_list); - - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); - add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff)); - set_jumps(end_list, LABEL()); - JUMPHERE(jump[0]); - } - else - check_newlinechar(common, common->nltype, backtracks, TRUE); - return cc; - - case OP_ALLANY: - if (check_str_ptr) - detect_partial_match(common, backtracks); -#ifdef SUPPORT_UNICODE - if (common->utf && common->invalid_utf) - { - read_char(common, 0, READ_CHAR_MAX, backtracks, READ_CHAR_UPDATE_STR_PTR); - return cc; - } -#endif /* SUPPORT_UNICODE */ - - skip_valid_char(common); - return cc; - - case OP_ANYBYTE: - if (check_str_ptr) - detect_partial_match(common, backtracks); - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - return cc; - -#ifdef SUPPORT_UNICODE - case OP_NOTPROP: - case OP_PROP: - propdata[0] = XCL_HASPROP; - propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP; - propdata[2] = cc[0]; - propdata[3] = cc[1]; - propdata[4] = XCL_END; - if (check_str_ptr) - detect_partial_match(common, backtracks); - compile_xclass_matchingpath(common, propdata, backtracks); - return cc + 2; -#endif - - case OP_ANYNL: - if (check_str_ptr) - detect_partial_match(common, backtracks); - read_char(common, common->bsr_nlmin, common->bsr_nlmax, NULL, 0); - jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); - /* We don't need to handle soft partial matching case. */ - end_list = NULL; - if (common->mode != PCRE2_JIT_PARTIAL_HARD) - add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); - else - check_str_end(common, &end_list); - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); - jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL); - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - jump[2] = JUMP(SLJIT_JUMP); - JUMPHERE(jump[0]); - check_newlinechar(common, common->bsr_nltype, backtracks, FALSE); - set_jumps(end_list, LABEL()); - JUMPHERE(jump[1]); - JUMPHERE(jump[2]); - return cc; - - case OP_NOT_HSPACE: - case OP_HSPACE: - if (check_str_ptr) - detect_partial_match(common, backtracks); - - if (type == OP_NOT_HSPACE) - read_char(common, 0x9, 0x3000, backtracks, READ_CHAR_UPDATE_STR_PTR); - else - read_char(common, 0x9, 0x3000, NULL, 0); - - add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL)); - sljit_set_current_flags(compiler, SLJIT_SET_Z); - add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO)); - return cc; - - case OP_NOT_VSPACE: - case OP_VSPACE: - if (check_str_ptr) - detect_partial_match(common, backtracks); - - if (type == OP_NOT_VSPACE) - read_char(common, 0xa, 0x2029, backtracks, READ_CHAR_UPDATE_STR_PTR); - else - read_char(common, 0xa, 0x2029, NULL, 0); - - add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL)); - sljit_set_current_flags(compiler, SLJIT_SET_Z); - add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO)); - return cc; - -#ifdef SUPPORT_UNICODE - case OP_EXTUNI: - if (check_str_ptr) - detect_partial_match(common, backtracks); - - SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1); - OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0); - -#if PCRE2_CODE_UNIT_WIDTH != 32 - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, - common->utf ? (common->invalid_utf ? SLJIT_FUNC_ADDR(do_extuni_utf_invalid) : SLJIT_FUNC_ADDR(do_extuni_utf)) : SLJIT_FUNC_ADDR(do_extuni_no_utf)); - if (common->invalid_utf) - add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0)); -#else - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, - common->invalid_utf ? SLJIT_FUNC_ADDR(do_extuni_utf_invalid) : SLJIT_FUNC_ADDR(do_extuni_no_utf)); - if (common->invalid_utf) - add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0)); -#endif - - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0); - - if (common->mode == PCRE2_JIT_PARTIAL_HARD) - { - jump[0] = CMP(SLJIT_LESS, SLJIT_RETURN_REG, 0, STR_END, 0); - /* Since we successfully read a char above, partial matching must occure. */ - check_partial(common, TRUE); - JUMPHERE(jump[0]); - } - return cc; -#endif - - case OP_CHAR: - case OP_CHARI: - length = 1; -#ifdef SUPPORT_UNICODE - if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc); -#endif - - if (check_str_ptr && common->mode != PCRE2_JIT_COMPLETE) - detect_partial_match(common, backtracks); - - if (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0) - { - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length)); - if (length > 1 || (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE)) - add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0)); - - context.length = IN_UCHARS(length); - context.sourcereg = -1; -#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED - context.ucharptr = 0; -#endif - return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks); - } - -#ifdef SUPPORT_UNICODE - if (common->utf) - { - GETCHAR(c, cc); - } - else -#endif - c = *cc; - - SLJIT_ASSERT(type == OP_CHARI && char_has_othercase(common, cc)); - - if (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE) - add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); - - oc = char_othercase(common, c); - read_char(common, c < oc ? c : oc, c > oc ? c : oc, NULL, 0); - - SLJIT_ASSERT(!is_powerof2(c ^ oc)); - - if (sljit_has_cpu_feature(SLJIT_HAS_CMOV)) - { - OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, oc); - SELECT(SLJIT_EQUAL, TMP1, SLJIT_IMM, c, TMP1); - add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c)); - } - else - { - jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c); - add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc)); - JUMPHERE(jump[0]); - } - return cc + length; - - case OP_NOT: - case OP_NOTI: - if (check_str_ptr) - detect_partial_match(common, backtracks); - - length = 1; -#ifdef SUPPORT_UNICODE - if (common->utf) - { -#if PCRE2_CODE_UNIT_WIDTH == 8 - c = *cc; - if (c < 128 && !common->invalid_utf) - { - OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); - if (type == OP_NOT || !char_has_othercase(common, cc)) - add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c)); - else - { - /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */ - OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20); - add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20)); - } - /* Skip the variable-length character. */ - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0); - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); - JUMPHERE(jump[0]); - return cc + 1; - } - else -#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ - { - GETCHARLEN(c, cc, length); - } - } - else -#endif /* SUPPORT_UNICODE */ - c = *cc; - - if (type == OP_NOT || !char_has_othercase(common, cc)) - { - read_char(common, c, c, backtracks, READ_CHAR_UPDATE_STR_PTR); - add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c)); - } - else - { - oc = char_othercase(common, c); - read_char(common, c < oc ? c : oc, c > oc ? c : oc, backtracks, READ_CHAR_UPDATE_STR_PTR); - bit = c ^ oc; - if (is_powerof2(bit)) - { - OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit); - add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit)); - } - else - { - add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c)); - add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc)); - } - } - return cc + length; - - case OP_CLASS: - case OP_NCLASS: - if (check_str_ptr) - detect_partial_match(common, backtracks); - -#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 - bit = (common->utf && is_char7_bitset((const sljit_u8 *)cc, type == OP_NCLASS)) ? 127 : 255; - if (type == OP_NCLASS) - read_char(common, 0, bit, backtracks, READ_CHAR_UPDATE_STR_PTR); - else - read_char(common, 0, bit, NULL, 0); -#else - if (type == OP_NCLASS) - read_char(common, 0, 255, backtracks, READ_CHAR_UPDATE_STR_PTR); - else - read_char(common, 0, 255, NULL, 0); -#endif - - if (optimize_class(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks)) - return cc + 32 / sizeof(PCRE2_UCHAR); - -#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 - jump[0] = NULL; - if (common->utf) - { - jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit); - if (type == OP_CLASS) - { - add_jump(compiler, backtracks, jump[0]); - jump[0] = NULL; - } - } -#elif PCRE2_CODE_UNIT_WIDTH != 8 - jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255); - if (type == OP_CLASS) - { - add_jump(compiler, backtracks, jump[0]); - jump[0] = NULL; - } -#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */ - - OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); - OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); - OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); - OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); - OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0); - add_jump(compiler, backtracks, JUMP(SLJIT_ZERO)); - -#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 - if (jump[0] != NULL) - JUMPHERE(jump[0]); -#endif - return cc + 32 / sizeof(PCRE2_UCHAR); - -#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 - case OP_XCLASS: - if (check_str_ptr) - detect_partial_match(common, backtracks); - compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks); - return cc + GET(cc, 0) - 1; -#endif - } -SLJIT_UNREACHABLE(); -return cc; -} - -static SLJIT_INLINE PCRE2_SPTR compile_charn_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, jump_list **backtracks) -{ -/* This function consumes at least one input character. */ -/* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */ -DEFINE_COMPILER; -PCRE2_SPTR ccbegin = cc; -compare_context context; -int size; - -context.length = 0; -do - { - if (cc >= ccend) - break; - - if (*cc == OP_CHAR) - { - size = 1; -#ifdef SUPPORT_UNICODE - if (common->utf && HAS_EXTRALEN(cc[1])) - size += GET_EXTRALEN(cc[1]); -#endif - } - else if (*cc == OP_CHARI) - { - size = 1; -#ifdef SUPPORT_UNICODE - if (common->utf) - { - if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0) - size = 0; - else if (HAS_EXTRALEN(cc[1])) - size += GET_EXTRALEN(cc[1]); - } - else -#endif - if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0) - size = 0; - } - else - size = 0; - - cc += 1 + size; - context.length += IN_UCHARS(size); - } -while (size > 0 && context.length <= 128); - -cc = ccbegin; -if (context.length > 0) - { - /* We have a fixed-length byte sequence. */ - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length); - add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0)); - - context.sourcereg = -1; -#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED - context.ucharptr = 0; -#endif - do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0); - return cc; - } - -/* A non-fixed length character will be checked if length == 0. */ -return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE); -} - /* Forward definitions. */ static void compile_matchingpath(compiler_common *, PCRE2_SPTR, PCRE2_SPTR, backtrack_common *); static void compile_backtrackingpath(compiler_common *, struct backtrack_common *); @@ -9413,10 +8022,16 @@ struct sljit_jump *nopartial; #if defined SUPPORT_UNICODE struct sljit_label *loop; struct sljit_label *caseless_loop; +struct sljit_jump *turkish_ascii_i = NULL; +struct sljit_jump *turkish_non_ascii_i = NULL; jump_list *no_match = NULL; int source_reg = COUNT_MATCH; int source_end_reg = ARGUMENTS; int char1_reg = STACK_LIMIT; +PCRE2_UCHAR refi_flag = 0; + +if (*cc == OP_REFI || *cc == OP_DNREFI) + refi_flag = cc[PRIV(OP_lengths)[*cc] - 1]; #endif /* SUPPORT_UNICODE */ if (ref) @@ -9431,9 +8046,10 @@ else OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0); #if defined SUPPORT_UNICODE -if (common->utf && *cc == OP_REFI) +if ((common->utf || common->ucp) && (*cc == OP_REFI || *cc == OP_DNREFI)) { - SLJIT_ASSERT(common->iref_ptr != 0); + /* Update ref_update_local_size() when this changes. */ + SLJIT_ASSERT(common->locals_size >= 3 * SSIZE_OF(sw)); if (ref) OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); @@ -9443,9 +8059,9 @@ if (common->utf && *cc == OP_REFI) if (withchecks && emptyfail) add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0)); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr, source_reg, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw), source_end_reg, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2, char1_reg, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL0, source_reg, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL1, source_end_reg, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL2, char1_reg, 0); OP1(SLJIT_MOV, source_reg, 0, TMP1, 0); OP1(SLJIT_MOV, source_end_reg, 0, TMP2, 0); @@ -9469,6 +8085,16 @@ if (common->utf && *cc == OP_REFI) CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop); + if ((refi_flag & (REFI_FLAG_TURKISH_CASING|REFI_FLAG_CASELESS_RESTRICT)) == + REFI_FLAG_TURKISH_CASING) + { + OP2(SLJIT_OR, SLJIT_TMP_DEST_REG, 0, char1_reg, 0, SLJIT_IMM, 0x20); + turkish_ascii_i = CMP(SLJIT_EQUAL, SLJIT_TMP_DEST_REG, 0, SLJIT_IMM, 0x69); + + OP2(SLJIT_OR, SLJIT_TMP_DEST_REG, 0, char1_reg, 0, SLJIT_IMM, 0x1); + turkish_non_ascii_i = CMP(SLJIT_EQUAL, SLJIT_TMP_DEST_REG, 0, SLJIT_IMM, 0x131); + } + OP1(SLJIT_MOV, TMP3, 0, TMP1, 0); add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL)); @@ -9488,6 +8114,9 @@ if (common->utf && *cc == OP_REFI) OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2); OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_caseless_sets)); + if (refi_flag & REFI_FLAG_CASELESS_RESTRICT) + add_jump(compiler, &no_match, CMP(SLJIT_LESS | SLJIT_32, SLJIT_MEM1(TMP2), 0, SLJIT_IMM, 128)); + caseless_loop = LABEL(); OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP2), 0); OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(uint32_t)); @@ -9495,30 +8124,52 @@ if (common->utf && *cc == OP_REFI) JUMPTO(SLJIT_EQUAL, loop); JUMPTO(SLJIT_LESS, caseless_loop); + if ((refi_flag & (REFI_FLAG_TURKISH_CASING|REFI_FLAG_CASELESS_RESTRICT)) == + REFI_FLAG_TURKISH_CASING) + { + add_jump(compiler, &no_match, JUMP(SLJIT_JUMP)); + JUMPHERE(turkish_ascii_i); + + OP2(SLJIT_LSHR, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 5); + OP2(SLJIT_AND, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 1); + OP2(SLJIT_XOR, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 1); + OP2(SLJIT_ADD, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 0x130); + CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop); + + add_jump(compiler, &no_match, JUMP(SLJIT_JUMP)); + JUMPHERE(turkish_non_ascii_i); + + OP2(SLJIT_AND, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 1); + OP2(SLJIT_XOR, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 1); + OP2(SLJIT_SHL, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 5); + OP2(SLJIT_ADD, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 0x49); + CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop); + } + set_jumps(no_match, LABEL()); if (common->mode == PCRE2_JIT_COMPLETE) JUMPHERE(partial); - OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr); - OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw)); - OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2); + OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0); + OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL1); + OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL2); add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); if (common->mode != PCRE2_JIT_COMPLETE) { JUMPHERE(partial); - OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr); - OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw)); - OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2); + OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0); + OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL1); + OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL2); check_partial(common, FALSE); add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); } JUMPHERE(jump); - OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr); - OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw)); - OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2); + OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0); + OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL1); + OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL2); return; } else @@ -9537,7 +8188,7 @@ else if (common->mode == PCRE2_JIT_COMPLETE) add_jump(compiler, backtracks, partial); - add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL)); + add_jump(compiler, (*cc == OP_REF || *cc == OP_DNREF) ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL)); add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); if (common->mode != PCRE2_JIT_COMPLETE) @@ -9549,7 +8200,7 @@ else OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0); partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0); OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0); - add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL)); + add_jump(compiler, (*cc == OP_REF || *cc == OP_DNREF) ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL)); add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); JUMPHERE(partial); check_partial(common, FALSE); @@ -9573,6 +8224,7 @@ DEFINE_COMPILER; BOOL ref = (*cc == OP_REF || *cc == OP_REFI); backtrack_common *backtrack; PCRE2_UCHAR type; +int local_start = LOCAL2; int offset = 0; struct sljit_label *label; struct sljit_jump *zerolength; @@ -9587,9 +8239,21 @@ if (ref) offset = GET2(cc, 1) << 1; else cc += IMM2_SIZE; + +if (*ccbegin == OP_REFI || *ccbegin == OP_DNREFI) + { + cc += 1; +#ifdef SUPPORT_UNICODE + if (common->utf || common->ucp) + local_start = LOCAL3; +#endif + } + type = cc[1 + IMM2_SIZE]; SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even); +/* Update ref_update_local_size() when this changes. */ +SLJIT_ASSERT(local_start + 2 * SSIZE_OF(sw) <= (int)LOCAL0 + common->locals_size); minimize = (type & 0x1) != 0; switch(type) { @@ -9641,7 +8305,7 @@ if (!minimize) { compile_dnref_search(common, ccbegin, NULL); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), local_start + SSIZE_OF(sw), TMP2, 0); zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); } /* Restore if not zero length. */ @@ -9664,24 +8328,24 @@ if (!minimize) { compile_dnref_search(common, ccbegin, &backtrack->own_backtracks); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), local_start + SSIZE_OF(sw), TMP2, 0); zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); } } if (min > 1 || max > 1) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), local_start, SLJIT_IMM, 0); label = LABEL(); if (!ref) - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), local_start + SSIZE_OF(sw)); compile_ref_matchingpath(common, ccbegin, &backtrack->own_backtracks, FALSE, FALSE); if (min > 1 || max > 1) { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), local_start); OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), local_start, TMP1, 0); if (min > 1) CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label); if (max > 1) @@ -9836,7 +8500,7 @@ BACKTRACK_AS(recurse_backtrack)->matchingpath = LABEL(); return cc + 1 + LINK_SIZE; } -static sljit_s32 SLJIT_FUNC SLJIT_FUNC_ATTRIBUTE do_callout_jit(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector) +static sljit_s32 SLJIT_FUNC do_callout_jit(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector) { PCRE2_SPTR begin; PCRE2_SIZE *ovector; @@ -9949,12 +8613,13 @@ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_pt SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1); /* Needed to save important temporary registers. */ -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0); +SLJIT_ASSERT(common->locals_size >= SSIZE_OF(sw)); +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL0, STR_PTR, 0); /* SLJIT_R0 = arguments */ OP1(SLJIT_MOV, SLJIT_R1, 0, STACK_TOP, 0); GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START); sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS3(32, W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_callout_jit)); -OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); +OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0); free_stack(common, callout_arg_size); /* Check return value. */ @@ -10122,6 +8787,7 @@ jump_list **found; /* Saving previous accept variables. */ BOOL save_local_quit_available = common->local_quit_available; BOOL save_in_positive_assertion = common->in_positive_assertion; +sljit_s32 save_restore_end_ptr = common->restore_end_ptr; then_trap_backtrack *save_then_trap = common->then_trap; struct sljit_label *save_quit_label = common->quit_label; struct sljit_label *save_accept_label = common->accept_label; @@ -10229,6 +8895,7 @@ if (conditional || (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)) { /* Control verbs cannot escape from these asserts. */ local_quit_available = TRUE; + common->restore_end_ptr = 0; common->local_quit_available = TRUE; common->quit_label = NULL; common->quit = NULL; @@ -10264,6 +8931,7 @@ while (1) common->quit = save_quit; } common->in_positive_assertion = save_in_positive_assertion; + common->restore_end_ptr = save_restore_end_ptr; common->then_trap = save_then_trap; common->accept_label = save_accept_label; common->positive_assertion_quit = save_positive_assertion_quit; @@ -10361,6 +9029,7 @@ while (1) common->quit = save_quit; } common->in_positive_assertion = save_in_positive_assertion; + common->restore_end_ptr = save_restore_end_ptr; common->then_trap = save_then_trap; common->accept_label = save_accept_label; common->positive_assertion_quit = save_positive_assertion_quit; @@ -10500,7 +9169,8 @@ if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) { JUMPTO(SLJIT_JUMP, backtrack->matchingpath); JUMPHERE(brajump); - if (framesize >= 0) + SLJIT_ASSERT(framesize != 0); + if (framesize > 0) { OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); @@ -10565,7 +9235,9 @@ if (local_quit_available) common->quit_label = save_quit_label; common->quit = save_quit; } + common->in_positive_assertion = save_in_positive_assertion; +common->restore_end_ptr = save_restore_end_ptr; common->then_trap = save_then_trap; common->accept_label = save_accept_label; common->positive_assertion_quit = save_positive_assertion_quit; @@ -10756,6 +9428,7 @@ BOOL needs_control_head = FALSE; BOOL has_vreverse = FALSE; struct sljit_jump *jump; struct sljit_jump *skip; +jump_list *jumplist; struct sljit_label *rmax_label = NULL; struct sljit_jump *braminzero = NULL; @@ -10818,7 +9491,8 @@ if (opcode == OP_CBRA || opcode == OP_SCBRA) BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr; matchingpath += IMM2_SIZE; } -else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_ONCE || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND) +else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_ONCE + || opcode == OP_ASSERT_SCS || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND) { /* Other brackets simply allocate the next entry. */ private_data_ptr = PRIVATE_DATA(ccbegin); @@ -11029,6 +9703,88 @@ else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SC if (*matchingpath == OP_REVERSE) matchingpath = compile_reverse_matchingpath(common, matchingpath, backtrack); } +else if (opcode == OP_ASSERT_SCS) + { + /* Nested scs blocks will not update this variable. */ + if (common->restore_end_ptr == 0) + common->restore_end_ptr = private_data_ptr + sizeof(sljit_sw); + + if (*matchingpath == OP_CREF && (matchingpath[1 + IMM2_SIZE] != OP_CREF && matchingpath[1 + IMM2_SIZE] != OP_DNCREF)) + { + /* Optimized case for a single capture reference. */ + i = OVECTOR(GET2(matchingpath, 1) << 1); + + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), i); + + add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.no_capture), CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1))); + matchingpath += 1 + IMM2_SIZE; + + allocate_stack(common, has_alternatives ? 3 : 2); + + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + OP1(SLJIT_MOV, SLJIT_TMP_DEST_REG, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), STR_END, 0); + OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), i + sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0); + OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0); + } + else + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)); + jumplist = NULL; + + while (TRUE) + { + if (*matchingpath == OP_CREF) + { + sljit_get_local_base(compiler, TMP2, 0, OVECTOR(GET2(matchingpath, 1) << 1)); + matchingpath += 1 + IMM2_SIZE; + } + else + { + SLJIT_ASSERT(*matchingpath == OP_DNCREF); + + i = GET2(matchingpath, 1 + IMM2_SIZE); + slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size; + + while (i-- > 1) + { + sljit_get_local_base(compiler, TMP2, 0, OVECTOR(GET2(slot, 0) << 1)); + add_jump(compiler, &jumplist, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), 0, TMP1, 0)); + slot += common->name_entry_size; + } + + sljit_get_local_base(compiler, TMP2, 0, OVECTOR(GET2(slot, 0) << 1)); + matchingpath += 1 + 2 * IMM2_SIZE; + } + + if (*matchingpath != OP_CREF && *matchingpath != OP_DNCREF) + break; + + add_jump(compiler, &jumplist, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), 0, TMP1, 0)); + } + + add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.no_capture), + CMP(SLJIT_EQUAL, SLJIT_MEM1(TMP2), 0, TMP1, 0)); + + set_jumps(jumplist, LABEL()); + + allocate_stack(common, has_alternatives ? 3 : 2); + + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + OP1(SLJIT_MOV, SLJIT_TMP_DEST_REG, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0); + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), STR_END, 0); + OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); + } + + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_TMP_DEST_REG, 0); + + if (has_alternatives) + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0); + } else if (has_alternatives) { /* Pushing the starting string pointer. */ @@ -11042,7 +9798,7 @@ if (opcode == OP_COND || opcode == OP_SCOND) if (*matchingpath == OP_CREF) { SLJIT_ASSERT(has_alternatives); - add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), + add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.no_capture), CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1))); matchingpath += 1 + IMM2_SIZE; } @@ -11064,13 +9820,13 @@ if (opcode == OP_COND || opcode == OP_SCOND) slot += common->name_entry_size; } OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0); - add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_ZERO)); + add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.no_capture), JUMP(SLJIT_ZERO)); matchingpath += 1 + 2 * IMM2_SIZE; } else if ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL) { /* Never has other case. */ - BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL; + BACKTRACK_AS(bracket_backtrack)->u.no_capture = NULL; SLJIT_ASSERT(!has_alternatives); if (*matchingpath == OP_TRUE) @@ -11159,9 +9915,6 @@ switch (opcode) if (PRIVATE_DATA(ccbegin + 1)) OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw)); break; - case OP_ASSERT_NA: - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); - break; case OP_ONCE: match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head); break; @@ -11227,7 +9980,7 @@ if (has_alternatives) if (i <= 3) OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0); else - BACKTRACK_AS(bracket_backtrack)->u.matching_put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize)); + BACKTRACK_AS(bracket_backtrack)->matching_mov_addr = sljit_emit_mov_addr(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize)); } if (ket != OP_KETRMAX) BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL(); @@ -11239,6 +9992,22 @@ if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0) SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0)); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0); } +else switch (opcode) + { + case OP_ASSERT_NA: + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + break; + case OP_ASSERT_SCS: + OP1(SLJIT_MOV, TMP1, 0, STR_END, 0); + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), TMP1, 0); + + /* Nested scs blocks will not update this variable. */ + if (common->restore_end_ptr == private_data_ptr + SSIZE_OF(sw)) + common->restore_end_ptr = 0; + break; + } if (ket == OP_KETRMAX) { @@ -11302,29 +10071,40 @@ if (bra == OP_BRAMINZERO) /* We need to release the end pointer to perform the backtrack for the zero-length iteration. When framesize is < 0, OP_ONCE will do the release itself. */ - if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0) + if (opcode == OP_ONCE) { - OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); - add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); - OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw)); + int framesize = BACKTRACK_AS(bracket_backtrack)->u.framesize; + + SLJIT_ASSERT(framesize != 0); + if (framesize > 0) + { + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); + OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw)); + } } - else if (ket == OP_KETRMIN && opcode != OP_ONCE) + else if (ket == OP_KETRMIN) free_stack(common, 1); } /* Continue to the normal backtrack. */ } -if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO) +if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO || (has_alternatives && repeat_type != OP_EXACT)) count_match(common); cc += 1 + LINK_SIZE; if (opcode == OP_ONCE) { + int data; + int framesize = BACKTRACK_AS(bracket_backtrack)->u.framesize; + + SLJIT_ASSERT(SHRT_MIN <= framesize && framesize < SHRT_MAX/2); /* We temporarily encode the needs_control_head in the lowest bit. - Note: on the target architectures of SLJIT the ((x << 1) >> 1) returns - the same value for small signed numbers (including negative numbers). */ - BACKTRACK_AS(bracket_backtrack)->u.framesize = (int)((unsigned)BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0); + The real value should be short enough for this operation to work + without triggering Undefined Behaviour. */ + data = (int)((short)((unsigned short)framesize << 1) | (needs_control_head ? 1 : 0)); + BACKTRACK_AS(bracket_backtrack)->u.framesize = data; } return cc + repeat_length; } @@ -11551,12 +10331,7 @@ while (*cc != OP_KETRPOS) add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0)); if (!zero) - { - if (framesize < 0) - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0); - else - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); - } + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); } JUMPTO(SLJIT_JUMP, loop); @@ -11650,11 +10425,11 @@ else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO) } else { - SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS); + SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS || *opcode == OP_ECLASS); *type = *opcode; + class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(PCRE2_UCHAR))) : GET(cc, 1); + *opcode = cc[class_len]; cc++; - class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(PCRE2_UCHAR))) : GET(cc, 0); - *opcode = cc[class_len - 1]; if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY) { @@ -11666,8 +10441,10 @@ else *exact = 1; *opcode -= OP_PLUS - OP_STAR; } + return cc; } - else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY) + + if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY) { *opcode -= OP_CRPOSSTAR - OP_POSSTAR; *end = cc + class_len; @@ -11677,41 +10454,40 @@ else *exact = 1; *opcode = OP_POSSTAR; } + return cc; } + + SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE); + *max = GET2(cc, (class_len + IMM2_SIZE)); + *exact = GET2(cc, class_len); + *end = cc + class_len + 2 * IMM2_SIZE; + + if (*max == 0) + { + SLJIT_ASSERT(*exact > 1); + if (*opcode == OP_CRRANGE) + *opcode = OP_UPTO; + else if (*opcode == OP_CRPOSRANGE) + *opcode = OP_POSUPTO; + else + *opcode = OP_MINSTAR; + return cc; + } + + *max -= *exact; + if (*max == 0) + *opcode = OP_EXACT; else { - SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE); - *max = GET2(cc, (class_len + IMM2_SIZE)); - *exact = GET2(cc, class_len); - - if (*max == 0) - { - if (*opcode == OP_CRPOSRANGE) - *opcode = OP_POSSTAR; - else - *opcode -= OP_CRRANGE - OP_STAR; - } + SLJIT_ASSERT(*exact > 0 || *max > 1); + if (*opcode == OP_CRRANGE) + *opcode = OP_UPTO; + else if (*opcode == OP_CRPOSRANGE) + *opcode = OP_POSUPTO; + else if (*max == 1) + *opcode = OP_MINQUERY; else - { - *max -= *exact; - if (*max == 0) - *opcode = OP_EXACT; - else if (*max == 1) - { - if (*opcode == OP_CRPOSRANGE) - *opcode = OP_POSQUERY; - else - *opcode -= OP_CRRANGE - OP_QUERY; - } - else - { - if (*opcode == OP_CRPOSRANGE) - *opcode = OP_POSUPTO; - else - *opcode -= OP_CRRANGE - OP_UPTO; - } - } - *end = cc + class_len + 2 * IMM2_SIZE; + *opcode = OP_MINUPTO; } return cc; } @@ -11757,16 +10533,17 @@ if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc); return cc; } -static PCRE2_SPTR compile_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent) +static PCRE2_SPTR compile_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent, jump_list **prev_backtracks) { DEFINE_COMPILER; -backtrack_common *backtrack; +backtrack_common *backtrack = NULL; +PCRE2_SPTR begin = cc; PCRE2_UCHAR opcode; PCRE2_UCHAR type; sljit_u32 max = 0, exact; sljit_s32 early_fail_ptr = PRIVATE_DATA(cc + 1); sljit_s32 early_fail_type; -BOOL charpos_enabled; +BOOL charpos_enabled, use_tmp; PCRE2_UCHAR charpos_char; unsigned int charpos_othercasebit; PCRE2_SPTR end; @@ -11779,11 +10556,6 @@ int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr; int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + SSIZE_OF(sw); int tmp_base, tmp_offset; -#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 -BOOL use_tmp; -#endif - -PUSH_BACKTRACK(sizeof(char_iterator_backtrack), cc, NULL); early_fail_type = (early_fail_ptr & 0x7); early_fail_ptr >>= 3; @@ -11799,7 +10571,7 @@ SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || early_fail_ptr == 0 || (early_fail_ptr >= common->early_fail_start_ptr && early_fail_ptr <= common->early_fail_end_ptr)); if (early_fail_type == type_fail) - add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr)); + add_jump(compiler, prev_backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr)); cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end); @@ -11811,39 +10583,47 @@ if (type != OP_EXTUNI) else { tmp_base = SLJIT_MEM1(SLJIT_SP); - tmp_offset = POSSESSIVE0; + tmp_offset = LOCAL2; } -/* Handle fixed part first. */ -if (exact > 1) +if (opcode == OP_EXACT) { - SLJIT_ASSERT(early_fail_ptr == 0); + SLJIT_ASSERT(early_fail_ptr == 0 && exact >= 2); if (common->mode == PCRE2_JIT_COMPLETE -#ifdef SUPPORT_UNICODE +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 && !common->utf #endif && type != OP_ANYNL && type != OP_EXTUNI) { - OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact)); - add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_GREATER, TMP1, 0, STR_END, 0)); - OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact); - label = LABEL(); - compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, FALSE); - OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); - JUMPTO(SLJIT_NOT_ZERO, label); + OP2(SLJIT_SUB, TMP1, 0, STR_END, 0, STR_PTR, 0); + add_jump(compiler, prev_backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, IN_UCHARS(exact))); + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32 + if (type == OP_ALLANY && !common->invalid_utf) +#else + if (type == OP_ALLANY) +#endif + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact)); + else + { + OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact); + label = LABEL(); + compile_char1_matchingpath(common, type, cc, prev_backtracks, FALSE); + OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); + JUMPTO(SLJIT_NOT_ZERO, label); + } } else { + SLJIT_ASSERT(tmp_base == TMP3 || common->locals_size >= 3 * SSIZE_OF(sw)); OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact); label = LABEL(); - compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE); + compile_char1_matchingpath(common, type, cc, prev_backtracks, TRUE); OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); JUMPTO(SLJIT_NOT_ZERO, label); } } -else if (exact == 1) - compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE); if (early_fail_type == type_fail_range) { @@ -11852,38 +10632,57 @@ if (early_fail_type == type_fail_range) OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + SSIZE_OF(sw)); OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, TMP2, 0); OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, TMP2, 0); - add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_LESS_EQUAL, TMP2, 0, TMP1, 0)); + add_jump(compiler, prev_backtracks, CMP(SLJIT_LESS_EQUAL, TMP2, 0, TMP1, 0)); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + SSIZE_OF(sw), STR_PTR, 0); } +if (opcode < OP_EXACT) + PUSH_BACKTRACK(sizeof(char_iterator_backtrack), begin, NULL); + switch(opcode) { case OP_STAR: case OP_UPTO: - SLJIT_ASSERT(early_fail_ptr == 0 || opcode == OP_STAR); + SLJIT_ASSERT(backtrack != NULL && (early_fail_ptr == 0 || opcode == OP_STAR)); + max += exact; - if (type == OP_ANYNL || type == OP_EXTUNI) + if (type == OP_EXTUNI) { SLJIT_ASSERT(private_data_ptr == 0); SLJIT_ASSERT(early_fail_ptr == 0); - allocate_stack(common, 2); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0); + if (exact == 1) + { + SLJIT_ASSERT(opcode == OP_STAR); + allocate_stack(common, 1); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); + } + else + { + /* If OP_EXTUNI is present, it has a separate EXACT opcode. */ + SLJIT_ASSERT(exact == 0); - if (opcode == OP_UPTO) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, max); + allocate_stack(common, 2); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0); + } - label = LABEL(); - compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE); if (opcode == OP_UPTO) { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0); + SLJIT_ASSERT(common->locals_size >= 3 * SSIZE_OF(sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL2, SLJIT_IMM, max); + } + + label = LABEL(); + compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE); + if (opcode == OP_UPTO) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL2); OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); jump = JUMP(SLJIT_ZERO); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL2, TMP1, 0); } /* We cannot use TMP3 because of allocate_stack. */ @@ -11903,6 +10702,9 @@ switch(opcode) { if (opcode == OP_STAR) { + if (exact == 1) + detect_partial_match(common, prev_backtracks); + if (private_data_ptr == 0) allocate_stack(common, 2); @@ -11923,6 +10725,9 @@ switch(opcode) else #endif { + /* If OP_ALLANY is present, it has a separate EXACT opcode. */ + SLJIT_ASSERT(exact == 0); + if (private_data_ptr == 0) allocate_stack(common, 2); @@ -11954,6 +10759,7 @@ switch(opcode) charpos_char = 0; charpos_othercasebit = 0; + SLJIT_ASSERT(tmp_base == TMP3); if ((type != OP_CHAR && type != OP_CHARI) && (*end == OP_CHAR || *end == OP_CHARI)) { #ifdef SUPPORT_UNICODE @@ -11983,176 +10789,320 @@ switch(opcode) if (charpos_othercasebit != 0) charpos_char |= charpos_othercasebit; - BACKTRACK_AS(char_iterator_backtrack)->u.charpos.enabled = TRUE; - BACKTRACK_AS(char_iterator_backtrack)->u.charpos.chr = charpos_char; - BACKTRACK_AS(char_iterator_backtrack)->u.charpos.othercasebit = charpos_othercasebit; + BACKTRACK_AS(char_iterator_backtrack)->charpos.charpos_enabled = TRUE; + BACKTRACK_AS(char_iterator_backtrack)->charpos.chr = charpos_char; + BACKTRACK_AS(char_iterator_backtrack)->charpos.othercasebit = charpos_othercasebit; + + if (private_data_ptr == 0) + allocate_stack(common, 2); + + use_tmp = (opcode == OP_STAR); + + if (use_tmp) + { + OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0); + OP1(SLJIT_MOV, base, offset0, TMP3, 0); + } + else + { + OP1(SLJIT_MOV, base, offset1, COUNT_MATCH, 0); + OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_IMM, 0); + OP1(SLJIT_MOV, base, offset0, COUNT_MATCH, 0); + OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, exact == max ? 0 : (max + 1)); + } + + /* Search the first instance of charpos_char. */ + if (exact > 0) + detect_partial_match(common, &no_match); + else + jump = JUMP(SLJIT_JUMP); + + label = LABEL(); + + if (opcode == OP_UPTO) + { + if (exact == max) + OP2(SLJIT_ADD, TMP3, 0, TMP3, 0, SLJIT_IMM, 1); + else + { + OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1); + add_jump(compiler, &no_match, JUMP(SLJIT_ZERO)); + } + } + + compile_char1_matchingpath(common, type, cc, &no_match, FALSE); + + if (early_fail_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0); + + if (exact == 0) + JUMPHERE(jump); + + detect_partial_match(common, &no_match); + + if (opcode == OP_UPTO && exact > 0) + { + if (exact == max) + CMPTO(SLJIT_LESS, TMP3, 0, SLJIT_IMM, exact, label); + else + CMPTO(SLJIT_GREATER, TMP3, 0, SLJIT_IMM, (max + 1) - exact, label); + } + + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + if (charpos_othercasebit != 0) + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit); + CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label); + + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + if (use_tmp) + { + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, SLJIT_IMM, 0); + SELECT(SLJIT_EQUAL, TMP3, STR_PTR, 0, TMP3); + } + else + { + OP2U(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, SLJIT_IMM, 0); + SELECT(SLJIT_EQUAL, COUNT_MATCH, STR_PTR, 0, COUNT_MATCH); + } + JUMPTO(SLJIT_JUMP, label); + + set_jumps(no_match, LABEL()); + OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); + if (use_tmp) + OP1(SLJIT_MOV, base, offset1, TMP3, 0); + else + { + OP1(SLJIT_MOV, TMP1, 0, base, offset1); + OP1(SLJIT_MOV, base, offset1, COUNT_MATCH, 0); + OP1(SLJIT_MOV, COUNT_MATCH, 0, TMP1, 0); + } + + add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0)); + + BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL(); + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + break; } } - if (charpos_enabled) + if (private_data_ptr == 0) + allocate_stack(common, 2); + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 + use_tmp = (opcode == OP_STAR); + + if (common->utf) { - if (opcode == OP_UPTO) - OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max + 1); + if (!use_tmp) + OP1(SLJIT_MOV, base, offset0, COUNT_MATCH, 0); - /* Search the first instance of charpos_char. */ - jump = JUMP(SLJIT_JUMP); + OP1(SLJIT_MOV, use_tmp ? TMP3 : COUNT_MATCH, 0, STR_PTR, 0); + } +#endif + + if (opcode == OP_UPTO) + OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, exact == max ? -(sljit_sw)exact : (sljit_sw)max); + + if (opcode == OP_UPTO && exact > 0) + { label = LABEL(); - if (opcode == OP_UPTO) - { - OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); - add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_ZERO)); - } - compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, FALSE); - if (early_fail_ptr != 0) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0); - JUMPHERE(jump); - - detect_partial_match(common, &backtrack->own_backtracks); - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); - if (charpos_othercasebit != 0) - OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit); - CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label); - - if (private_data_ptr == 0) - allocate_stack(common, 2); - OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); - OP1(SLJIT_MOV, base, offset1, STR_PTR, 0); - - if (opcode == OP_UPTO) - { - OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); - add_jump(compiler, &no_match, JUMP(SLJIT_ZERO)); - } - - /* Search the last instance of charpos_char. */ - label = LABEL(); - compile_char1_matchingpath(common, type, cc, &no_match, FALSE); - if (early_fail_ptr != 0) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0); detect_partial_match(common, &no_match); - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); - if (charpos_othercasebit != 0) - OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit); + compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE); +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 + if (common->utf) + OP1(SLJIT_MOV, use_tmp ? TMP3 : COUNT_MATCH, 0, STR_PTR, 0); +#endif - if (opcode == OP_STAR) + if (exact == max) { - CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label); - OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); - JUMPTO(SLJIT_JUMP, label); + OP2(SLJIT_ADD | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1); + JUMPTO(SLJIT_NOT_ZERO, label); } else { - jump = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char); - OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); - JUMPHERE(jump); - OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); - JUMPTO(SLJIT_NOT_ZERO, label); + OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1); + add_jump(compiler, &no_match, JUMP(SLJIT_ZERO)); + CMPTO(SLJIT_NOT_EQUAL, TMP3, 0, SLJIT_IMM, max - exact, label); } - set_jumps(no_match, LABEL()); - OP2(SLJIT_ADD, STR_PTR, 0, base, offset0, SLJIT_IMM, IN_UCHARS(1)); - OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + OP1(SLJIT_MOV, base, offset1, STR_PTR, 0); + JUMPTO(SLJIT_JUMP, label); } else { - if (private_data_ptr == 0) - allocate_stack(common, 2); - OP1(SLJIT_MOV, base, offset1, STR_PTR, 0); -#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 - use_tmp = (!HAS_VIRTUAL_REGISTERS && opcode == OP_STAR); - SLJIT_ASSERT(!use_tmp || tmp_base == TMP3); - - if (common->utf) - OP1(SLJIT_MOV, use_tmp ? TMP3 : base, use_tmp ? 0 : offset0, STR_PTR, 0); -#endif - if (opcode == OP_UPTO) - OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max); detect_partial_match(common, &no_match); label = LABEL(); compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE); #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 if (common->utf) - OP1(SLJIT_MOV, use_tmp ? TMP3 : base, use_tmp ? 0 : offset0, STR_PTR, 0); + OP1(SLJIT_MOV, use_tmp ? TMP3 : COUNT_MATCH, 0, STR_PTR, 0); #endif if (opcode == OP_UPTO) { - OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); + OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1); add_jump(compiler, &no_match, JUMP(SLJIT_ZERO)); } detect_partial_match_to(common, label); - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + } - set_jumps(no_char1_match, LABEL()); #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 - if (common->utf) + if (common->utf) + { + set_jumps(no_char1_match, LABEL()); + set_jumps(no_match, LABEL()); + if (use_tmp) { - set_jumps(no_match, LABEL()); - if (use_tmp) - { - OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0); - OP1(SLJIT_MOV, base, offset0, TMP3, 0); - } - else - OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); + OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0); + OP1(SLJIT_MOV, base, offset0, TMP3, 0); } else -#endif { - OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - set_jumps(no_match, LABEL()); + OP1(SLJIT_MOV, STR_PTR, 0, COUNT_MATCH, 0); + OP1(SLJIT_MOV, COUNT_MATCH, 0, base, offset0); OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); } - - if (early_fail_ptr != 0) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0); } + else +#endif + { + if (opcode != OP_UPTO || exact == 0) + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + set_jumps(no_char1_match, LABEL()); + + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + set_jumps(no_match, LABEL()); + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + } + + if (opcode == OP_UPTO) + { + if (exact > 0) + { + if (max == exact) + jump = CMP(SLJIT_GREATER_EQUAL, TMP3, 0, SLJIT_IMM, -(sljit_sw)exact); + else + jump = CMP(SLJIT_GREATER, TMP3, 0, SLJIT_IMM, max - exact); + + add_jump(compiler, &backtrack->own_backtracks, jump); + } + } + else if (exact == 1) + add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, base, offset1, STR_PTR, 0)); + + if (early_fail_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0); BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL(); break; - case OP_MINSTAR: + case OP_QUERY: + SLJIT_ASSERT(backtrack != NULL && early_fail_ptr == 0); if (private_data_ptr == 0) allocate_stack(common, 1); OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE); BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL(); + break; + + case OP_MINSTAR: + case OP_MINQUERY: + SLJIT_ASSERT(backtrack != NULL && (opcode == OP_MINSTAR || early_fail_ptr == 0)); + if (private_data_ptr == 0) + allocate_stack(common, 1); + + if (exact >= 1) + { + if (exact >= 2) + { + /* Extuni has a separate exact opcode. */ + SLJIT_ASSERT(tmp_base == TMP3 && early_fail_ptr == 0); + OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, exact); + } + + if (opcode == OP_MINQUERY) + OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, -1); + + label = LABEL(); + BACKTRACK_AS(char_iterator_backtrack)->matchingpath = label; + + compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE); + + if (exact >= 2) + { + OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1); + JUMPTO(SLJIT_NOT_ZERO, label); + } + + if (opcode == OP_MINQUERY) + OP2(SLJIT_AND, base, offset0, base, offset0, STR_PTR, 0); + else + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + } + else + { + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL(); + } + if (early_fail_ptr != 0) OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0); break; case OP_MINUPTO: - SLJIT_ASSERT(early_fail_ptr == 0); + SLJIT_ASSERT(backtrack != NULL && early_fail_ptr == 0); if (private_data_ptr == 0) allocate_stack(common, 2); - OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); - OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, max + 1); - BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL(); - break; - case OP_QUERY: - case OP_MINQUERY: - SLJIT_ASSERT(early_fail_ptr == 0); - if (private_data_ptr == 0) - allocate_stack(common, 1); + OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, max + 1); + + if (exact == 0) + { + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL(); + break; + } + + if (exact >= 2) + { + /* Extuni has a separate exact opcode. */ + SLJIT_ASSERT(tmp_base == TMP3); + OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, exact); + } + + label = LABEL(); + BACKTRACK_AS(char_iterator_backtrack)->matchingpath = label; + + compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE); + + if (exact >= 2) + { + OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1); + JUMPTO(SLJIT_NOT_ZERO, label); + } + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); - if (opcode == OP_QUERY) - compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE); - BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL(); break; case OP_EXACT: + SLJIT_ASSERT(backtrack == NULL); break; case OP_POSSTAR: + SLJIT_ASSERT(backtrack == NULL); #if defined SUPPORT_UNICODE if (type == OP_ALLANY && !common->invalid_utf) #else if (type == OP_ALLANY) #endif { + if (exact == 1) + detect_partial_match(common, prev_backtracks); + OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0); process_partial_match(common); if (early_fail_ptr != 0) @@ -12161,98 +11111,150 @@ switch(opcode) } #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 - if (type == OP_EXTUNI || common->utf) + if (common->utf) { - OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); + SLJIT_ASSERT(tmp_base == TMP3 || common->locals_size >= 3 * SSIZE_OF(sw)); + + if (tmp_base != TMP3) + { + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL2, COUNT_MATCH, 0); + tmp_base = COUNT_MATCH; + } + + OP1(SLJIT_MOV, tmp_base, 0, exact == 1 ? SLJIT_IMM : STR_PTR, 0); detect_partial_match(common, &no_match); label = LABEL(); compile_char1_matchingpath(common, type, cc, &no_match, FALSE); - OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); + OP1(SLJIT_MOV, tmp_base, 0, STR_PTR, 0); detect_partial_match_to(common, label); set_jumps(no_match, LABEL()); - OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset); + OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, 0); + + if (tmp_base != TMP3) + OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LOCAL2); + + if (exact == 1) + add_jump(compiler, prev_backtracks, CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0)); + if (early_fail_ptr != 0) - { - if (!HAS_VIRTUAL_REGISTERS && tmp_base == TMP3) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, TMP3, 0); - else - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0); - } + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0); break; } #endif + if (exact == 1) + OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); + detect_partial_match(common, &no_match); label = LABEL(); + /* Extuni never fails, so no_char1_match is not used in that case. + Anynl optionally reads an extra character on success. */ compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE); detect_partial_match_to(common, label); - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + if (type != OP_EXTUNI) + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); set_jumps(no_char1_match, LABEL()); - OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + if (type != OP_EXTUNI) + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + set_jumps(no_match, LABEL()); + + if (exact == 1) + add_jump(compiler, prev_backtracks, CMP(SLJIT_EQUAL, tmp_base, tmp_offset, STR_PTR, 0)); + if (early_fail_ptr != 0) OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0); break; case OP_POSUPTO: - SLJIT_ASSERT(early_fail_ptr == 0); + SLJIT_ASSERT(backtrack == NULL && early_fail_ptr == 0); + max += exact; + #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 - if (common->utf) - { - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0); - OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max); - - detect_partial_match(common, &no_match); - label = LABEL(); - compile_char1_matchingpath(common, type, cc, &no_match, FALSE); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0); - OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); - add_jump(compiler, &no_match, JUMP(SLJIT_ZERO)); - detect_partial_match_to(common, label); - - set_jumps(no_match, LABEL()); - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1); - break; - } + if (type == OP_EXTUNI || common->utf) +#else + if (type == OP_EXTUNI) #endif - - if (type == OP_ALLANY) { - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max)); + SLJIT_ASSERT(common->locals_size >= 3 * SSIZE_OF(sw)); - if (common->mode == PCRE2_JIT_COMPLETE) + /* Count match is not modified by compile_char1_matchingpath. */ + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL2, COUNT_MATCH, 0); + OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_IMM, exact == max ? 0 : max); + + label = LABEL(); + /* Extuni only modifies TMP3 on successful match. */ + OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0); + compile_char1_matchingpath(common, type, cc, &no_match, TRUE); + + if (exact == max) { - OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0); - SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR); + OP2(SLJIT_ADD, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1); + JUMPTO(SLJIT_JUMP, label); } else { - jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0); - process_partial_match(common); - JUMPHERE(jump); + OP2(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1); + JUMPTO(SLJIT_NOT_ZERO, label); + OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0); } + + set_jumps(no_match, LABEL()); + + if (exact > 0) + { + if (exact == max) + OP2U(SLJIT_SUB | SLJIT_SET_LESS, COUNT_MATCH, 0, SLJIT_IMM, exact); + else + OP2U(SLJIT_SUB | SLJIT_SET_GREATER, COUNT_MATCH, 0, SLJIT_IMM, max - exact); + } + + OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LOCAL2); + + if (exact > 0) + add_jump(compiler, prev_backtracks, JUMP(exact == max ? SLJIT_LESS : SLJIT_GREATER)); + OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0); break; } - OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max); + SLJIT_ASSERT(tmp_base == TMP3); + + OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, exact == max ? 0 : max); detect_partial_match(common, &no_match); label = LABEL(); compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE); - OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); - add_jump(compiler, &no_match, JUMP(SLJIT_ZERO)); + + if (exact == max) + OP2(SLJIT_ADD, TMP3, 0, TMP3, 0, SLJIT_IMM, 1); + else + { + OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1); + add_jump(compiler, &no_match, JUMP(SLJIT_ZERO)); + } detect_partial_match_to(common, label); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); set_jumps(no_char1_match, LABEL()); OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); set_jumps(no_match, LABEL()); + + if (exact > 0) + { + if (exact == max) + jump = CMP(SLJIT_LESS, TMP3, 0, SLJIT_IMM, exact); + else + jump = CMP(SLJIT_GREATER, TMP3, 0, SLJIT_IMM, max - exact); + + add_jump(compiler, prev_backtracks, jump); + } break; case OP_POSQUERY: - SLJIT_ASSERT(early_fail_ptr == 0); + SLJIT_ASSERT(backtrack == NULL && early_fail_ptr == 0); + SLJIT_ASSERT(tmp_base == TMP3 || common->locals_size >= 3 * SSIZE_OF(sw)); OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); compile_char1_matchingpath(common, type, cc, &no_match, TRUE); OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); @@ -12415,6 +11417,7 @@ DEFINE_COMPILER; backtrack_common *backtrack; BOOL has_then_trap = FALSE; then_trap_backtrack *save_then_trap = NULL; +size_t op_len; SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS)); @@ -12550,21 +11553,23 @@ while (cc < ccend) case OP_TYPEPOSPLUS: case OP_TYPEPOSQUERY: case OP_TYPEPOSUPTO: - cc = compile_iterator_matchingpath(common, cc, parent); + cc = compile_iterator_matchingpath(common, cc, parent, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks); break; case OP_CLASS: case OP_NCLASS: if (cc[1 + (32 / sizeof(PCRE2_UCHAR))] >= OP_CRSTAR && cc[1 + (32 / sizeof(PCRE2_UCHAR))] <= OP_CRPOSRANGE) - cc = compile_iterator_matchingpath(common, cc, parent); + cc = compile_iterator_matchingpath(common, cc, parent, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks); else cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE); break; #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 case OP_XCLASS: - if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE) - cc = compile_iterator_matchingpath(common, cc, parent); + case OP_ECLASS: + op_len = GET(cc, 1); + if (cc[op_len] >= OP_CRSTAR && cc[op_len] <= OP_CRPOSRANGE) + cc = compile_iterator_matchingpath(common, cc, parent, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks); else cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE); break; @@ -12572,24 +11577,26 @@ while (cc < ccend) case OP_REF: case OP_REFI: - if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE) + op_len = PRIV(OP_lengths)[*cc]; + if (cc[op_len] >= OP_CRSTAR && cc[op_len] <= OP_CRPOSRANGE) cc = compile_ref_iterator_matchingpath(common, cc, parent); else { compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE, FALSE); - cc += 1 + IMM2_SIZE; + cc += op_len; } break; case OP_DNREF: case OP_DNREFI: - if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE) + op_len = PRIV(OP_lengths)[*cc]; + if (cc[op_len] >= OP_CRSTAR && cc[op_len] <= OP_CRPOSRANGE) cc = compile_ref_iterator_matchingpath(common, cc, parent); else { compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks); compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE, FALSE); - cc += 1 + 2 * IMM2_SIZE; + cc += op_len; } break; @@ -12630,6 +11637,7 @@ while (cc < ccend) case OP_ASSERT_NA: case OP_ASSERTBACK_NA: + case OP_ASSERT_SCS: case OP_ONCE: case OP_SCRIPT_RUN: case OP_BRA: @@ -12741,6 +11749,28 @@ SLJIT_ASSERT(cc == ccend); #define CURRENT_AS(type) ((type *)current) +static void compile_newline_move_back(compiler_common *common) +{ +DEFINE_COMPILER; +struct sljit_jump *jump; + +OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); +jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, TMP2, 0); +/* All newlines are single byte, or their last byte +is not equal to CHAR_NL/CHAR_CR even if UTF is enabled. */ +OP1(MOV_UCHAR, SLJIT_TMP_DEST_REG, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2)); +OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); +OP2(SLJIT_SHL, SLJIT_TMP_DEST_REG, 0, SLJIT_TMP_DEST_REG, 0, SLJIT_IMM, 8); +OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_TMP_DEST_REG, 0); +OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_CR << 8 | CHAR_NL); +OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL); +#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 +OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT); +#endif +OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0); +JUMPHERE(jump); +} + static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current) { DEFINE_COMPILER; @@ -12763,52 +11793,104 @@ switch(opcode) { case OP_STAR: case OP_UPTO: - if (type == OP_ANYNL || type == OP_EXTUNI) + if (type == OP_EXTUNI) { SLJIT_ASSERT(private_data_ptr == 0); - set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL()); + set_jumps(current->own_backtracks, LABEL()); OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); free_stack(common, 1); CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath); } else { - if (CURRENT_AS(char_iterator_backtrack)->u.charpos.enabled) + if (CURRENT_AS(char_iterator_backtrack)->charpos.charpos_enabled) { OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); OP1(SLJIT_MOV, TMP2, 0, base, offset1); - OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0); label = LABEL(); - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); - OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); - if (CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit != 0) - OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit); - CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.chr, CURRENT_AS(char_iterator_backtrack)->matchingpath); + if (type == OP_ANYNL) + compile_newline_move_back(common); move_back(common, NULL, TRUE); - CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP2, 0, label); + + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + if (CURRENT_AS(char_iterator_backtrack)->charpos.othercasebit != 0) + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->charpos.othercasebit); + CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->charpos.chr, CURRENT_AS(char_iterator_backtrack)->matchingpath); + /* The range beginning must match, no need to compare. */ + JUMPTO(SLJIT_JUMP, label); + + set_jumps(current->own_backtracks, LABEL()); + current->own_backtracks = NULL; } else { OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); - jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1); - move_back(common, NULL, TRUE); + + if (opcode == OP_STAR && exact == 1) + { + if (type == OP_ANYNL) + { + OP1(SLJIT_MOV, TMP2, 0, base, offset1); + compile_newline_move_back(common); + } + + move_back(common, NULL, TRUE); + jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1); + } + else + { + if (type == OP_ANYNL) + { + OP1(SLJIT_MOV, TMP2, 0, base, offset1); + jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0); + compile_newline_move_back(common); + } + else + jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1); + + move_back(common, NULL, TRUE); + } + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath); + + set_jumps(current->own_backtracks, LABEL()); } + JUMPHERE(jump); if (private_data_ptr == 0) free_stack(common, 2); } break; + case OP_QUERY: + OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); + OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0); + CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath); + jump = JUMP(SLJIT_JUMP); + set_jumps(current->own_backtracks, LABEL()); + OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); + OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0); + JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath); + JUMPHERE(jump); + if (private_data_ptr == 0) + free_stack(common, 1); + break; + case OP_MINSTAR: OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); - compile_char1_matchingpath(common, type, cc, &jumplist, TRUE); - OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + if (exact == 0) + { + compile_char1_matchingpath(common, type, cc, &jumplist, TRUE); + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + } + else if (exact > 1) + OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1); + JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath); - set_jumps(jumplist, LABEL()); + set_jumps(exact > 0 ? current->own_backtracks : jumplist, LABEL()); if (private_data_ptr == 0) free_stack(common, 1); break; @@ -12817,56 +11899,60 @@ switch(opcode) OP1(SLJIT_MOV, TMP1, 0, base, offset1); OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); - add_jump(compiler, &jumplist, JUMP(SLJIT_ZERO)); - OP1(SLJIT_MOV, base, offset1, TMP1, 0); - compile_char1_matchingpath(common, type, cc, &jumplist, TRUE); - OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); - JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath); + if (exact == 0) + { + add_jump(compiler, &jumplist, JUMP(SLJIT_ZERO)); + + OP1(SLJIT_MOV, base, offset1, TMP1, 0); + compile_char1_matchingpath(common, type, cc, &jumplist, TRUE); + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath); + + set_jumps(jumplist, LABEL()); + } + else + { + if (exact > 1) + OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1); + OP1(SLJIT_MOV, base, offset1, TMP1, 0); + JUMPTO(SLJIT_NOT_ZERO, CURRENT_AS(char_iterator_backtrack)->matchingpath); + + set_jumps(current->own_backtracks, LABEL()); + } - set_jumps(jumplist, LABEL()); if (private_data_ptr == 0) free_stack(common, 2); break; - case OP_QUERY: - OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); - OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0); - CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath); - jump = JUMP(SLJIT_JUMP); - set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL()); - OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); - OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0); - JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath); - JUMPHERE(jump); - if (private_data_ptr == 0) - free_stack(common, 1); - break; - case OP_MINQUERY: OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0); - jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); - compile_char1_matchingpath(common, type, cc, &jumplist, TRUE); - JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath); - set_jumps(jumplist, LABEL()); - JUMPHERE(jump); + + if (exact >= 1) + { + if (exact >= 2) + OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1); + CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath); + set_jumps(current->own_backtracks, LABEL()); + } + else + { + jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); + compile_char1_matchingpath(common, type, cc, &jumplist, TRUE); + JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath); + set_jumps(jumplist, LABEL()); + JUMPHERE(jump); + } + if (private_data_ptr == 0) free_stack(common, 1); break; - case OP_EXACT: - case OP_POSSTAR: - case OP_POSQUERY: - case OP_POSUPTO: - break; - default: SLJIT_UNREACHABLE(); break; } - -set_jumps(current->own_backtracks, LABEL()); } static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current) @@ -12876,7 +11962,7 @@ PCRE2_SPTR cc = current->cc; BOOL ref = (*cc == OP_REF || *cc == OP_REFI); PCRE2_UCHAR type; -type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE]; +type = cc[PRIV(OP_lengths)[*cc]]; if ((type & 0x1) == 0) { @@ -12995,7 +12081,7 @@ PCRE2_SPTR ccbegin; PCRE2_SPTR ccprev; PCRE2_UCHAR bra = OP_BRA; PCRE2_UCHAR ket; -assert_backtrack *assert; +const assert_backtrack *assert; BOOL has_alternatives; BOOL needs_control_head = FALSE; BOOL has_vreverse; @@ -13005,7 +12091,7 @@ struct sljit_jump *once = NULL; struct sljit_jump *cond = NULL; struct sljit_label *rmin_label = NULL; struct sljit_label *exact_label = NULL; -struct sljit_put_label *put_label = NULL; +struct sljit_jump *mov_addr = NULL; if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO) { @@ -13031,7 +12117,7 @@ ccbegin = cc; cc += GET(cc, 1); has_alternatives = *cc == OP_ALT; if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND)) - has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL; + has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.no_capture != NULL; if (opcode == OP_CBRA || opcode == OP_SCBRA) offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1; if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN)) @@ -13134,14 +12220,27 @@ if (offset != 0) OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0); } } +else if (SLJIT_UNLIKELY(opcode == OP_ASSERT_SCS)) + { + OP1(SLJIT_MOV, TMP1, 0, STR_END, 0); + OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), TMP1, 0); + + /* Nested scs blocks will not update this variable. */ + if (common->restore_end_ptr == 0) + common->restore_end_ptr = private_data_ptr + sizeof(sljit_sw); + } if (SLJIT_UNLIKELY(opcode == OP_ONCE)) { - if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0) + int framesize = CURRENT_AS(bracket_backtrack)->u.framesize; + + SLJIT_ASSERT(framesize != 0); + if (framesize > 0) { OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); - OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw)); + OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw)); } once = JUMP(SLJIT_JUMP); } @@ -13166,8 +12265,8 @@ else if (has_alternatives) { sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0); - SLJIT_ASSERT(CURRENT_AS(bracket_backtrack)->u.matching_put_label); - sljit_set_put_label(CURRENT_AS(bracket_backtrack)->u.matching_put_label, LABEL()); + SLJIT_ASSERT(CURRENT_AS(bracket_backtrack)->matching_mov_addr != NULL); + sljit_set_label(CURRENT_AS(bracket_backtrack)->matching_mov_addr, LABEL()); sljit_emit_op0(compiler, SLJIT_ENDBR); } else @@ -13185,7 +12284,8 @@ if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND)) { SLJIT_ASSERT(has_alternatives); assert = CURRENT_AS(bracket_backtrack)->u.assert; - if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK)) + SLJIT_ASSERT(assert->framesize != 0); + if (assert->framesize > 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK)) { OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr); add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); @@ -13196,11 +12296,11 @@ if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND)) cond = JUMP(SLJIT_JUMP); set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL()); } - else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL) + else if (CURRENT_AS(bracket_backtrack)->u.no_capture != NULL) { SLJIT_ASSERT(has_alternatives); cond = JUMP(SLJIT_JUMP); - set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL()); + set_jumps(CURRENT_AS(bracket_backtrack)->u.no_capture, LABEL()); } else SLJIT_ASSERT(!has_alternatives); @@ -13221,26 +12321,33 @@ if (has_alternatives) cc += GET(cc, 1); has_vreverse = FALSE; - if (opcode == OP_ASSERTBACK || opcode == OP_ASSERTBACK_NA) - { - SLJIT_ASSERT(private_data_ptr != 0); - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); - has_vreverse = (*ccprev == OP_VREVERSE); - if (*ccprev == OP_REVERSE || has_vreverse) - ccprev = compile_reverse_matchingpath(common, ccprev, current); - } - else if (opcode != OP_COND && opcode != OP_SCOND) + switch (opcode) { - if (opcode != OP_ONCE) - { + case OP_ASSERTBACK: + case OP_ASSERTBACK_NA: + SLJIT_ASSERT(private_data_ptr != 0); + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + + has_vreverse = (*ccprev == OP_VREVERSE); + if (*ccprev == OP_REVERSE || has_vreverse) + ccprev = compile_reverse_matchingpath(common, ccprev, current); + break; + case OP_ASSERT_SCS: + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(2)); + break; + case OP_ONCE: + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0)); + break; + case OP_COND: + case OP_SCOND: + break; + default: if (private_data_ptr != 0) OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); else OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); - } - else - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0)); + break; } compile_matchingpath(common, ccprev, cc, current); @@ -13320,7 +12427,7 @@ if (has_alternatives) if (alt_max <= 3) OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count); else - put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize)); + mov_addr = sljit_emit_mov_addr(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize)); } if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0) @@ -13346,7 +12453,7 @@ if (has_alternatives) } else { - sljit_set_put_label(put_label, LABEL()); + sljit_set_label(mov_addr, LABEL()); sljit_emit_op0(compiler, SLJIT_ENDBR); } } @@ -13361,14 +12468,18 @@ if (has_alternatives) if (cond != NULL) { SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND); - assert = CURRENT_AS(bracket_backtrack)->u.assert; - if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0) + if (ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) { - OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr); - add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2)); - OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw)); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0); + assert = CURRENT_AS(bracket_backtrack)->u.assert; + SLJIT_ASSERT(assert->framesize != 0); + if (assert->framesize > 0) + { + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr); + add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2)); + OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0); + } } JUMPHERE(cond); } @@ -13410,6 +12521,21 @@ else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SC OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0)); free_stack(common, 1); } +else if (opcode == OP_ASSERT_SCS) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); + OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), TMP2, 0); + free_stack(common, has_alternatives ? 3 : 2); + + set_jumps(CURRENT_AS(bracket_backtrack)->u.no_capture, LABEL()); + + /* Nested scs blocks will not update this variable. */ + if (common->restore_end_ptr == private_data_ptr + SSIZE_OF(sw)) + common->restore_end_ptr = 0; + } else if (opcode == OP_ONCE) { cc = ccbegin + GET(ccbegin, 1); @@ -13590,6 +12716,9 @@ if (opcode == OP_THEN || opcode == OP_THEN_ARG) } } +if (common->restore_end_ptr != 0 && opcode != OP_SKIP_ARG) + OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->restore_end_ptr); + if (common->local_quit_available) { /* Abort match with a fail. */ @@ -13607,8 +12736,18 @@ if (opcode == OP_SKIP_ARG) OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2)); sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_search_mark)); + if (common->restore_end_ptr == 0) + { + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_R0, 0); + add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0)); + return; + } + + jump = CMP(SLJIT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0); OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_R0, 0); - add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0)); + OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->restore_end_ptr); + add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP)); + JUMPHERE(jump); return; } @@ -13640,6 +12779,7 @@ static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *com { DEFINE_COMPILER; struct sljit_jump *jump; +int framesize; int size; if (CURRENT_AS(then_trap_backtrack)->then_trap) @@ -13656,11 +12796,15 @@ free_stack(common, size); jump = JUMP(SLJIT_JUMP); set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL()); + +framesize = CURRENT_AS(then_trap_backtrack)->framesize; +SLJIT_ASSERT(framesize != 0); + /* STACK_TOP is set by THEN. */ -if (CURRENT_AS(then_trap_backtrack)->framesize >= 0) +if (framesize > 0) { add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); - OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(then_trap_backtrack)->framesize - 1) * sizeof(sljit_sw)); + OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw)); } OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); free_stack(common, 3); @@ -13751,10 +12895,13 @@ while (current) case OP_TYPEPOSPLUS: case OP_TYPEPOSQUERY: case OP_TYPEPOSUPTO: + /* Since classes has no backtracking path, this + backtrackingpath was pushed by an iterator. */ case OP_CLASS: case OP_NCLASS: #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 case OP_XCLASS: + case OP_ECLASS: #endif compile_iterator_backtrackingpath(common, current); break; @@ -13779,6 +12926,7 @@ while (current) case OP_ASSERT_NA: case OP_ASSERTBACK_NA: + case OP_ASSERT_SCS: case OP_ONCE: case OP_SCRIPT_RUN: case OP_BRA: @@ -13830,8 +12978,12 @@ while (current) case OP_COMMIT: case OP_COMMIT_ARG: + if (common->restore_end_ptr != 0) + OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->restore_end_ptr); + if (!common->local_quit_available) OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH); + if (common->quit_label == NULL) add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP)); else @@ -13878,7 +13030,7 @@ jump_list *match = NULL; struct sljit_jump *next_alt = NULL; struct sljit_jump *accept_exit = NULL; struct sljit_label *quit; -struct sljit_put_label *put_label = NULL; +struct sljit_jump *mov_addr = NULL; /* Recurse captures then. */ common->then_trap = NULL; @@ -13941,7 +13093,7 @@ while (1) if (alt_max > 1 || (recurse_flags & recurse_flag_accept_found)) { if (alt_max > 3) - put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(1)); + mov_addr = sljit_emit_mov_addr(compiler, SLJIT_MEM1(STACK_TOP), STACK(1)); else OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_count); } @@ -13974,7 +13126,7 @@ while (1) if (alt_max > 3) { sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0); - sljit_set_put_label(put_label, LABEL()); + sljit_set_label(mov_addr, LABEL()); sljit_emit_op0(compiler, SLJIT_ENDBR); } else @@ -13985,7 +13137,7 @@ while (1) } else if (alt_max > 3) { - sljit_set_put_label(put_label, LABEL()); + sljit_set_label(mov_addr, LABEL()); sljit_emit_op0(compiler, SLJIT_ENDBR); } else @@ -14092,8 +13244,7 @@ int private_data_size; PCRE2_SPTR ccend; executable_functions *functions; void *executable_func; -sljit_uw executable_size; -sljit_uw total_length; +sljit_uw executable_size, private_data_length, total_length; struct sljit_label *mainloop_label = NULL; struct sljit_label *continue_match_label; struct sljit_label *empty_match_found_label = NULL; @@ -14120,7 +13271,7 @@ memset(&rootbacktrack, 0, sizeof(backtrack_common)); memset(common, 0, sizeof(compiler_common)); common->re = re; common->name_table = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code)); -rootbacktrack.cc = common->name_table + re->name_count * re->name_entry_size; +rootbacktrack.cc = (PCRE2_SPTR)((uint8_t *)re + re->code_start); #ifdef SUPPORT_UNICODE common->invalid_utf = (mode & PCRE2_JIT_INVALID_UTF) != 0; @@ -14168,7 +13319,7 @@ common->name_entry_size = re->name_entry_size; common->unset_backref = (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) != 0; common->alt_circumflex = (re->overall_options & PCRE2_ALT_CIRCUMFLEX) != 0; #ifdef SUPPORT_UNICODE -/* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */ +/* PCRE2_UTF[16|32] have the same value as PCRE2_UTF8. */ common->utf = (re->overall_options & PCRE2_UTF) != 0; common->ucp = (re->overall_options & PCRE2_UCP) != 0; if (common->utf) @@ -14200,10 +13351,26 @@ else ccend = bracketend(common->start); /* Calculate the local space size on the stack. */ -common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw); -common->optimized_cbracket = (sljit_u8 *)SLJIT_MALLOC(re->top_bracket + 1, allocator_data); -if (!common->optimized_cbracket) +common->ovector_start = LOCAL0; +/* Allocate space for temporary data structures. */ +private_data_length = ccend - common->start; +/* The chance of overflow is very low, but might happen on 32 bit. */ +if (private_data_length > ~(sljit_uw)0 / sizeof(sljit_s32)) return PCRE2_ERROR_NOMEMORY; + +private_data_length *= sizeof(sljit_s32); +/* Align to 32 bit. */ +total_length = ((re->top_bracket + 1) + (sljit_uw)(sizeof(sljit_s32) - 1)) & ~(sljit_uw)(sizeof(sljit_s32) - 1); +if (~(sljit_uw)0 - private_data_length < total_length) + return PCRE2_ERROR_NOMEMORY; + +total_length += private_data_length; +common->private_data_ptrs = (sljit_s32*)SLJIT_MALLOC(total_length, allocator_data); +if (!common->private_data_ptrs) + return PCRE2_ERROR_NOMEMORY; + +memset(common->private_data_ptrs, 0, private_data_length); +common->optimized_cbracket = ((sljit_u8 *)common->private_data_ptrs) + private_data_length; #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1 memset(common->optimized_cbracket, 0, re->top_bracket + 1); #else @@ -14217,16 +13384,19 @@ common->ovector_start += sizeof(sljit_sw); #endif if (!check_opcode_types(common, common->start, ccend)) { - SLJIT_FREE(common->optimized_cbracket, allocator_data); - return PCRE2_ERROR_NOMEMORY; + SLJIT_FREE(common->private_data_ptrs, allocator_data); + return PCRE2_ERROR_JIT_UNSUPPORTED; } /* Checking flags and updating ovector_start. */ -if (mode == PCRE2_JIT_COMPLETE && (re->flags & PCRE2_LASTSET) != 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0) +if (mode == PCRE2_JIT_COMPLETE && + (re->flags & PCRE2_LASTSET) != 0 && + (re->optimization_flags & PCRE2_OPTIM_START_OPTIMIZE) != 0) { common->req_char_ptr = common->ovector_start; common->ovector_start += sizeof(sljit_sw); } + if (mode != PCRE2_JIT_COMPLETE) { common->start_used_ptr = common->ovector_start; @@ -14237,19 +13407,23 @@ if (mode != PCRE2_JIT_COMPLETE) common->ovector_start += sizeof(sljit_sw); } } + if ((re->overall_options & (PCRE2_FIRSTLINE | PCRE2_USE_OFFSET_LIMIT)) != 0) { common->match_end_ptr = common->ovector_start; common->ovector_start += sizeof(sljit_sw); } + #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD common->control_head_ptr = 1; #endif + if (common->control_head_ptr != 0) { common->control_head_ptr = common->ovector_start; common->ovector_start += sizeof(sljit_sw); } + if (common->has_set_som) { /* Saving the real start pointer is necessary. */ @@ -14270,19 +13444,11 @@ if (common->capture_last_ptr != 0) SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0)); common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw); - -total_length = ccend - common->start; -common->private_data_ptrs = (sljit_s32*)SLJIT_MALLOC(total_length * (sizeof(sljit_s32) + (common->has_then ? 1 : 0)), allocator_data); -if (!common->private_data_ptrs) - { - SLJIT_FREE(common->optimized_cbracket, allocator_data); - return PCRE2_ERROR_NOMEMORY; - } -memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32)); - private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw); -if ((re->overall_options & PCRE2_ANCHORED) == 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 && !common->has_skip_in_assert_back) +if ((re->overall_options & PCRE2_ANCHORED) == 0 && + (re->optimization_flags & PCRE2_OPTIM_START_OPTIMIZE) != 0 && + !common->has_skip_in_assert_back) detect_early_fail(common, common->start, &private_data_size, 0, 0); set_private_data_ptrs(common, &private_data_size, ccend); @@ -14292,29 +13458,35 @@ SLJIT_ASSERT(common->early_fail_start_ptr <= common->early_fail_end_ptr); if (private_data_size > 65536) { SLJIT_FREE(common->private_data_ptrs, allocator_data); - SLJIT_FREE(common->optimized_cbracket, allocator_data); - return PCRE2_ERROR_NOMEMORY; + return PCRE2_ERROR_JIT_UNSUPPORTED; } if (common->has_then) { - common->then_offsets = (sljit_u8 *)(common->private_data_ptrs + total_length); + total_length = ccend - common->start; + common->then_offsets = (sljit_u8 *)SLJIT_MALLOC(total_length, allocator_data); + if (!common->then_offsets) + { + SLJIT_FREE(common->private_data_ptrs, allocator_data); + return PCRE2_ERROR_NOMEMORY; + } memset(common->then_offsets, 0, total_length); set_then_offsets(common, common->start, NULL); } -compiler = sljit_create_compiler(allocator_data, NULL); +compiler = sljit_create_compiler(allocator_data); if (!compiler) { - SLJIT_FREE(common->optimized_cbracket, allocator_data); SLJIT_FREE(common->private_data_ptrs, allocator_data); + if (common->has_then) + SLJIT_FREE(common->then_offsets, allocator_data); return PCRE2_ERROR_NOMEMORY; } common->compiler = compiler; /* Main pcre2_jit_exec entry. */ SLJIT_ASSERT((private_data_size & (sizeof(sljit_sw) - 1)) == 0); -sljit_emit_enter(compiler, 0, SLJIT_ARGS1(W, W), 5, 5, SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS, 0, private_data_size); +sljit_emit_enter(compiler, 0, SLJIT_ARGS1(W, W), 5 | SLJIT_ENTER_VECTOR(SLJIT_NUMBER_OF_SCRATCH_VECTOR_REGISTERS), 5, private_data_size); /* Register init. */ reset_ovector(common, (re->top_bracket + 1) * 2); @@ -14348,7 +13520,7 @@ if ((re->overall_options & PCRE2_ANCHORED) == 0) mainloop_label = mainloop_entry(common); continue_match_label = LABEL(); /* Forward search if possible. */ - if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0) + if ((re->optimization_flags & PCRE2_OPTIM_START_OPTIMIZE) != 0) { if (mode == PCRE2_JIT_COMPLETE && fast_forward_first_n_chars(common)) ; @@ -14363,7 +13535,8 @@ if ((re->overall_options & PCRE2_ANCHORED) == 0) else continue_match_label = LABEL(); -if (mode == PCRE2_JIT_COMPLETE && re->minlength > 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0) +if (mode == PCRE2_JIT_COMPLETE && re->minlength > 0 && + (re->optimization_flags & PCRE2_OPTIM_START_OPTIMIZE) != 0) { OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH); OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(re->minlength)); @@ -14398,8 +13571,9 @@ compile_matchingpath(common, common->start, ccend, &rootbacktrack); if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) { sljit_free_compiler(compiler); - SLJIT_FREE(common->optimized_cbracket, allocator_data); SLJIT_FREE(common->private_data_ptrs, allocator_data); + if (common->has_then) + SLJIT_FREE(common->then_offsets, allocator_data); PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data); return PCRE2_ERROR_NOMEMORY; } @@ -14454,8 +13628,9 @@ compile_backtrackingpath(common, rootbacktrack.top); if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) { sljit_free_compiler(compiler); - SLJIT_FREE(common->optimized_cbracket, allocator_data); SLJIT_FREE(common->private_data_ptrs, allocator_data); + if (common->has_then) + SLJIT_FREE(common->then_offsets, allocator_data); PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data); return PCRE2_ERROR_NOMEMORY; } @@ -14536,6 +13711,8 @@ common->early_fail_end_ptr = 0; common->currententry = common->entries; common->local_quit_available = TRUE; quit_label = common->quit_label; +SLJIT_ASSERT(common->restore_end_ptr == 0); + if (common->currententry != NULL) { /* A free bit for each private data. */ @@ -14565,24 +13742,28 @@ if (common->currententry != NULL) SLJIT_ASSERT(sljit_get_compiler_error(compiler) || common->recurse_bitset == NULL); sljit_free_compiler(compiler); - SLJIT_FREE(common->optimized_cbracket, allocator_data); SLJIT_FREE(common->private_data_ptrs, allocator_data); + if (common->has_then) + SLJIT_FREE(common->then_offsets, allocator_data); PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data); return PCRE2_ERROR_NOMEMORY; } } + common->local_quit_available = FALSE; common->quit_label = quit_label; +SLJIT_ASSERT(common->restore_end_ptr == 0); -/* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */ +/* Allocating stack, returns with PCRE2_ERROR_JIT_STACKLIMIT if fails. */ /* This is a (really) rare case. */ set_jumps(common->stackalloc, LABEL()); /* RETURN_ADDR is not a saved register. */ -sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCALS0); +SLJIT_ASSERT(common->locals_size >= 2 * SSIZE_OF(sw)); +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCAL0); SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1); -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0); +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL1, STR_PTR, 0); OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0); OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_LIMIT, 0, SLJIT_IMM, STACK_GROWTH_RATE); OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, stack)); @@ -14593,8 +13774,8 @@ sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FU jump = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0); OP1(SLJIT_MOV, TMP2, 0, STACK_LIMIT, 0); OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_RETURN_REG, 0); -OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); -OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1); +OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0); +OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCAL1); OP_SRC(SLJIT_FAST_RETURN, TMP1, 0); /* Allocation failed. */ @@ -14715,10 +13896,11 @@ if (common->getucdtype != NULL) } #endif /* SUPPORT_UNICODE */ -SLJIT_FREE(common->optimized_cbracket, allocator_data); SLJIT_FREE(common->private_data_ptrs, allocator_data); +if (common->has_then) + SLJIT_FREE(common->then_offsets, allocator_data); -executable_func = sljit_generate_code(compiler); +executable_func = sljit_generate_code(compiler, 0, NULL); executable_size = sljit_get_generated_code_size(compiler); sljit_free_compiler(compiler); @@ -14786,10 +13968,37 @@ pcre2_jit_compile(pcre2_code *code, uint32_t options) { pcre2_real_code *re = (pcre2_real_code *)code; #ifdef SUPPORT_JIT +void *exec_memory; executable_functions *functions; static int executable_allocator_is_working = -1; + +if (executable_allocator_is_working == -1) + { + /* Checks whether the executable allocator is working. This check + might run multiple times in multi-threaded environments, but the + result should not be affected by it. */ + exec_memory = SLJIT_MALLOC_EXEC(32, NULL); + if (exec_memory != NULL) + { + SLJIT_FREE_EXEC(((sljit_u8*)(exec_memory)) + SLJIT_EXEC_OFFSET(exec_memory), NULL); + executable_allocator_is_working = 1; + } + else executable_allocator_is_working = 0; + } #endif +if (options & PCRE2_JIT_TEST_ALLOC) + { + if (options != PCRE2_JIT_TEST_ALLOC) + return PCRE2_ERROR_JIT_BADOPTION; + +#ifdef SUPPORT_JIT + return executable_allocator_is_working ? 0 : PCRE2_ERROR_NOMEMORY; +#else + return PCRE2_ERROR_JIT_UNSUPPORTED; +#endif + } + if (code == NULL) return PCRE2_ERROR_NULL; @@ -14850,20 +14059,6 @@ return PCRE2_ERROR_JIT_BADOPTION; if ((re->flags & PCRE2_NOJIT) != 0) return 0; -if (executable_allocator_is_working == -1) - { - /* Checks whether the executable allocator is working. This check - might run multiple times in multi-threaded environments, but the - result should not be affected by it. */ - void *ptr = SLJIT_MALLOC_EXEC(32, NULL); - if (ptr != NULL) - { - SLJIT_FREE_EXEC(((sljit_u8*)(ptr)) + SLJIT_EXEC_OFFSET(ptr), NULL); - executable_allocator_is_working = 1; - } - else executable_allocator_is_working = 0; - } - if (!executable_allocator_is_working) return PCRE2_ERROR_NOMEMORY; @@ -14904,10 +14099,5 @@ return 0; #define INCLUDED_FROM_PCRE2_JIT_COMPILE -#if 0 -/* NMAP_MODIFICATIONS */ -#include "pcre2_jit_match.c" -#include "pcre2_jit_misc.c" -#endif /* End of pcre2_jit_compile.c */ diff --git a/libpcre/src/pcre2_maketables.c b/libpcre/src/pcre2_maketables.c index ac8b63b80..0474cc7db 100644 --- a/libpcre/src/pcre2_maketables.c +++ b/libpcre/src/pcre2_maketables.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2020 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -155,10 +155,10 @@ return yield; PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION pcre2_maketables_free(pcre2_general_context *gcontext, const uint8_t *tables) { - if (gcontext) - gcontext->memctl.free((void *)tables, gcontext->memctl.memory_data); - else - free((void *)tables); +if (gcontext != NULL) + gcontext->memctl.free((void *)tables, gcontext->memctl.memory_data); +else + free((void *)tables); } #endif diff --git a/libpcre/src/pcre2_match.c b/libpcre/src/pcre2_match.c index b4a970313..5adc03480 100644 --- a/libpcre/src/pcre2_match.c +++ b/libpcre/src/pcre2_match.c @@ -155,17 +155,17 @@ changed, the code at RETURN_SWITCH below must be updated in sync. */ enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10, RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20, RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30, - RM31, RM32, RM33, RM34, RM35, RM36, RM37 }; + RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39 }; #ifdef SUPPORT_WIDE_CHARS -enum { RM100=100, RM101 }; +enum { RM100=100, RM101, RM102, RM103 }; #endif #ifdef SUPPORT_UNICODE enum { RM200=200, RM201, RM202, RM203, RM204, RM205, RM206, RM207, RM208, RM209, RM210, RM211, RM212, RM213, RM214, RM215, RM216, RM217, RM218, RM219, RM220, RM221, RM222, RM223, - RM224, RM225 }; + RM224 }; #endif /* Define short names for general fields in the current backtrack frame, which @@ -348,6 +348,7 @@ seems unlikely.) Arguments: offset index into the offset vector caseless TRUE if caseless + caseopts bitmask of REFI_FLAG_XYZ values F the current backtracking frame pointer mb points to match block lengthptr pointer for returning the length matched @@ -358,8 +359,8 @@ Returns: = 0 sucessful match; number of code units matched is set */ static int -match_ref(PCRE2_SIZE offset, BOOL caseless, heapframe *F, match_block *mb, - PCRE2_SIZE *lengthptr) +match_ref(PCRE2_SIZE offset, BOOL caseless, int caseopts, heapframe *F, + match_block *mb, PCRE2_SIZE *lengthptr) { PCRE2_SPTR p; PCRE2_SIZE length; @@ -389,6 +390,8 @@ if (caseless) { #if defined SUPPORT_UNICODE BOOL utf = (mb->poptions & PCRE2_UTF) != 0; + BOOL caseless_restrict = (caseopts & REFI_FLAG_CASELESS_RESTRICT) != 0; + BOOL turkish_casing = !caseless_restrict && (caseopts & REFI_FLAG_TURKISH_CASING) != 0; if (utf || (mb->poptions & PCRE2_UCP) != 0) { @@ -420,10 +423,20 @@ if (caseless) d = *p++; } - ur = GET_UCD(d); - if (c != d && c != (uint32_t)((int)d + ur->other_case)) + if (turkish_casing && UCD_ANY_I(d)) + { + c = UCD_FOLD_I_TURKISH(c); + d = UCD_FOLD_I_TURKISH(d); + if (c != d) return -1; /* No match */ + } + else if (c != d && c != (uint32_t)((int)d + (ur = GET_UCD(d))->other_case)) { const uint32_t *pp = PRIV(ucd_caseless_sets) + ur->caseset; + + /* When PCRE2_EXTRA_CASELESS_RESTRICT is set, ignore any caseless sets + that start with an ASCII character. */ + if (caseless_restrict && *pp < 128) return -1; /* No match */ + for (;;) { if (c < *pp) return -1; /* No match */ @@ -528,38 +541,46 @@ For hard partial matching, we immediately return a partial match. Otherwise, carrying on means that a complete match on the current subject will be sought. A partial match is returned only if no complete match can be found. */ -#define CHECK_PARTIAL()\ - if (Feptr >= mb->end_subject) \ - { \ - SCHECK_PARTIAL(); \ - } +#define CHECK_PARTIAL() \ + do { \ + if (Feptr >= mb->end_subject) \ + { \ + SCHECK_PARTIAL(); \ + } \ + } \ + while (0) -#define SCHECK_PARTIAL()\ - if (mb->partial != 0 && \ - (Feptr > mb->start_used_ptr || mb->allowemptypartial)) \ - { \ - mb->hitend = TRUE; \ - if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; \ - } +#define SCHECK_PARTIAL() \ + do { \ + if (mb->partial != 0 && \ + (Feptr > mb->start_used_ptr || mb->allowemptypartial)) \ + { \ + mb->hitend = TRUE; \ + if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; \ + } \ + } \ + while (0) /* These macros are used to implement backtracking. They simulate a recursive call to the match() function by means of a local vector of frames which remember the backtracking points. */ -#define RMATCH(ra,rb)\ - {\ - start_ecode = ra;\ - Freturn_id = rb;\ - goto MATCH_RECURSE;\ - L_##rb:;\ - } +#define RMATCH(ra,rb) \ + do { \ + start_ecode = ra; \ + Freturn_id = rb; \ + goto MATCH_RECURSE; \ + L_##rb:; \ + } \ + while (0) -#define RRETURN(ra)\ - {\ - rrc = ra;\ - goto RETURN_SWITCH;\ - } +#define RRETURN(ra) \ + do { \ + rrc = ra; \ + goto RETURN_SWITCH; \ + } \ + while (0) @@ -813,7 +834,10 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, offset = Flast_group_offset; for(;;) { + /* Corrupted heapframes?. Trigger an assert and return an error */ + PCRE2_ASSERT(offset != PCRE2_UNSET); if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL; + N = (heapframe *)((char *)match_data->heapframes + offset); P = (heapframe *)((char *)N - frame_size); if (N->group_frame_type == (GF_CAPTURE | number)) break; @@ -852,7 +876,10 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, offset = Flast_group_offset; for(;;) { + /* Corrupted heapframes?. Trigger an assert and return an error */ + PCRE2_ASSERT(offset != PCRE2_UNSET); if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL; + N = (heapframe *)((char *)match_data->heapframes + offset); P = (heapframe *)((char *)N - frame_size); if (GF_IDMASK(N->group_frame_type) == GF_RECURSE) break; @@ -1329,7 +1356,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, RRETURN(MATCH_NOMATCH); } } - /* Control never gets here */ + PCRE2_UNREACHABLE(); /* Control never reaches here */ } else /* Maximize */ @@ -1430,7 +1457,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, if (Lc != cc && Loc != cc) RRETURN(MATCH_NOMATCH); Feptr++; } - /* Control never gets here */ + PCRE2_UNREACHABLE(); /* Control never reaches here */ } else /* Maximize */ @@ -1488,7 +1515,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, } if (Lc != UCHAR21INCTEST(Feptr)) RRETURN(MATCH_NOMATCH); } - /* Control never gets here */ + PCRE2_UNREACHABLE(); /* Control never reaches here */ } else /* Maximize */ { @@ -1706,7 +1733,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, Feptr++; } } - /* Control never gets here */ + PCRE2_UNREACHABLE(); /* Control never reaches here */ } /* Maximize case */ @@ -1844,7 +1871,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, if (Lc == *Feptr++) RRETURN(MATCH_NOMATCH); } } - /* Control never gets here */ + PCRE2_UNREACHABLE(); /* Control never reaches here */ } /* Maximize case */ @@ -1928,7 +1955,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, #define Lmax F->temp_32[1] #define Lstart_eptr F->temp_sptr[0] #define Lbyte_map_address F->temp_sptr[1] -#define Lbyte_map ((unsigned char *)Lbyte_map_address) +#define Lbyte_map ((const unsigned char *)Lbyte_map_address) case OP_NCLASS: case OP_CLASS: @@ -2071,7 +2098,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH); } } - /* Control never gets here */ + PCRE2_UNREACHABLE(); /* Control never reaches here */ } /* If maximizing, find the longest possible run, then work backwards. */ @@ -2151,7 +2178,8 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, RRETURN(MATCH_NOMATCH); } } - /* Control never gets here */ + + PCRE2_UNREACHABLE(); /* Control never reaches here */ #undef Lbyte_map_address #undef Lbyte_map @@ -2219,7 +2247,9 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, RRETURN(MATCH_NOMATCH); } GETCHARINCTEST(fc, Feptr); - if (!PRIV(xclass)(fc, Lxclass_data, utf)) RRETURN(MATCH_NOMATCH); + if (!PRIV(xclass)(fc, Lxclass_data, + (const uint8_t*)mb->start_code, utf)) + RRETURN(MATCH_NOMATCH); } /* If Lmax == Lmin we can just continue with the main loop. */ @@ -2242,9 +2272,11 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, RRETURN(MATCH_NOMATCH); } GETCHARINCTEST(fc, Feptr); - if (!PRIV(xclass)(fc, Lxclass_data, utf)) RRETURN(MATCH_NOMATCH); + if (!PRIV(xclass)(fc, Lxclass_data, + (const uint8_t*)mb->start_code, utf)) + RRETURN(MATCH_NOMATCH); } - /* Control never gets here */ + PCRE2_UNREACHABLE(); /* Control never reaches here */ } /* If maximizing, find the longest possible run, then work backwards. */ @@ -2265,7 +2297,8 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, #else fc = *Feptr; #endif - if (!PRIV(xclass)(fc, Lxclass_data, utf)) break; + if (!PRIV(xclass)(fc, Lxclass_data, + (const uint8_t*)mb->start_code, utf)) break; Feptr += len; } @@ -2287,7 +2320,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, RRETURN(MATCH_NOMATCH); } - /* Control never gets here */ + PCRE2_UNREACHABLE(); /* Control never reaches here */ } #endif /* SUPPORT_WIDE_CHARS: end of XCLASS */ @@ -2297,6 +2330,151 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, #undef Lmax + /* ===================================================================== */ + /* Match a complex, set-based character class. This opcodes are used when + there is complex nesting or logical operations within the character + class. */ + +#define Lstart_eptr F->temp_sptr[0] +#define Leclass_data F->temp_sptr[1] +#define Leclass_len F->temp_size +#define Lmin F->temp_32[0] +#define Lmax F->temp_32[1] + +#ifdef SUPPORT_WIDE_CHARS + case OP_ECLASS: + { + Leclass_data = Fecode + 1 + LINK_SIZE; /* Save for matching */ + Fecode += GET(Fecode, 1); /* Advance past the item */ + Leclass_len = (PCRE2_SIZE)(Fecode - Leclass_data); + + switch (*Fecode) + { + case OP_CRSTAR: + case OP_CRMINSTAR: + case OP_CRPLUS: + case OP_CRMINPLUS: + case OP_CRQUERY: + case OP_CRMINQUERY: + case OP_CRPOSSTAR: + case OP_CRPOSPLUS: + case OP_CRPOSQUERY: + fc = *Fecode++ - OP_CRSTAR; + Lmin = rep_min[fc]; + Lmax = rep_max[fc]; + reptype = rep_typ[fc]; + break; + + case OP_CRRANGE: + case OP_CRMINRANGE: + case OP_CRPOSRANGE: + Lmin = GET2(Fecode, 1); + Lmax = GET2(Fecode, 1 + IMM2_SIZE); + if (Lmax == 0) Lmax = UINT32_MAX; /* Max 0 => infinity */ + reptype = rep_typ[*Fecode - OP_CRSTAR]; + Fecode += 1 + 2 * IMM2_SIZE; + break; + + default: /* No repeat follows */ + Lmin = Lmax = 1; + break; + } + + /* First, ensure the minimum number of matches are present. */ + + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + if (!PRIV(eclass)(fc, Leclass_data, Leclass_data + Leclass_len, + (const uint8_t*)mb->start_code, utf)) + RRETURN(MATCH_NOMATCH); + } + + /* If Lmax == Lmin we can just continue with the main loop. */ + + if (Lmin == Lmax) continue; + + /* If minimizing, keep testing the rest of the expression and advancing + the pointer while it matches the class. */ + + if (reptype == REPTYPE_MIN) + { + for (;;) + { + RMATCH(Fecode, RM102); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + if (!PRIV(eclass)(fc, Leclass_data, Leclass_data + Leclass_len, + (const uint8_t*)mb->start_code, utf)) + RRETURN(MATCH_NOMATCH); + } + PCRE2_UNREACHABLE(); /* Control never reaches here */ + } + + /* If maximizing, find the longest possible run, then work backwards. */ + + else + { + Lstart_eptr = Feptr; + for (i = Lmin; i < Lmax; i++) + { + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } +#ifdef SUPPORT_UNICODE + GETCHARLENTEST(fc, Feptr, len); +#else + fc = *Feptr; +#endif + if (!PRIV(eclass)(fc, Leclass_data, Leclass_data + Leclass_len, + (const uint8_t*)mb->start_code, utf)) + break; + Feptr += len; + } + + if (reptype == REPTYPE_POS) continue; /* No backtracking */ + + /* After \C in UTF mode, Lstart_eptr might be in the middle of a + Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't + go too far. */ + + for(;;) + { + RMATCH(Fecode, RM103); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Feptr-- <= Lstart_eptr) break; /* Tried at original position */ +#ifdef SUPPORT_UNICODE + if (utf) BACKCHAR(Feptr); +#endif + } + RRETURN(MATCH_NOMATCH); + } + + PCRE2_UNREACHABLE(); /* Control never reaches here */ + } +#endif /* SUPPORT_WIDE_CHARS: end of ECLASS */ + +#undef Lstart_eptr +#undef Leclass_data +#undef Leclass_len +#undef Lmin +#undef Lmax + + /* ===================================================================== */ /* Match various character types when PCRE2_UCP is not set. These opcodes are not generated when PCRE2_UCP is set - instead appropriate property @@ -2492,10 +2670,6 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, switch(Fecode[1]) { - case PT_ANY: - if (notmatch) RRETURN(MATCH_NOMATCH); - break; - case PT_LAMP: chartype = prop->chartype; if ((chartype == ucp_Lu || @@ -2606,6 +2780,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, /* This should never occur */ default: + PCRE2_DEBUG_UNREACHABLE(); return PCRE2_ERROR_INTERNAL; } @@ -2728,19 +2903,6 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, BOOL notmatch = Lctype == OP_NOTPROP; switch(proptype) { - case PT_ANY: - if (notmatch) RRETURN(MATCH_NOMATCH); - for (i = 1; i <= Lmin; i++) - { - if (Feptr >= mb->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - GETCHARINCTEST(fc, Feptr); - } - break; - case PT_LAMP: for (i = 1; i <= Lmin; i++) { @@ -2969,6 +3131,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, /* This should not occur */ default: + PCRE2_DEBUG_UNREACHABLE(); return PCRE2_ERROR_INTERNAL; } } @@ -3244,6 +3407,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, break; default: + PCRE2_DEBUG_UNREACHABLE(); return PCRE2_ERROR_INTERNAL; } /* End switch(Lctype) */ @@ -3496,6 +3660,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, break; default: + PCRE2_DEBUG_UNREACHABLE(); return PCRE2_ERROR_INTERNAL; } } @@ -3516,27 +3681,11 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, { switch(proptype) { - case PT_ANY: - for (;;) - { - RMATCH(Fecode, RM208); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); - if (Feptr >= mb->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - GETCHARINCTEST(fc, Feptr); - if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH); - } - /* Control never gets here */ - case PT_LAMP: for (;;) { int chartype; - RMATCH(Fecode, RM209); + RMATCH(Fecode, RM208); if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); if (Feptr >= mb->end_subject) @@ -3551,12 +3700,12 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, chartype == ucp_Lt) == (Lctype == OP_NOTPROP)) RRETURN(MATCH_NOMATCH); } - /* Control never gets here */ + PCRE2_UNREACHABLE(); /* Control never reaches here */ case PT_GC: for (;;) { - RMATCH(Fecode, RM210); + RMATCH(Fecode, RM209); if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); if (Feptr >= mb->end_subject) @@ -3568,12 +3717,12 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, if ((UCD_CATEGORY(fc) == Lpropvalue) == (Lctype == OP_NOTPROP)) RRETURN(MATCH_NOMATCH); } - /* Control never gets here */ + PCRE2_UNREACHABLE(); /* Control never reaches here */ case PT_PC: for (;;) { - RMATCH(Fecode, RM211); + RMATCH(Fecode, RM210); if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); if (Feptr >= mb->end_subject) @@ -3585,12 +3734,12 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, if ((UCD_CHARTYPE(fc) == Lpropvalue) == (Lctype == OP_NOTPROP)) RRETURN(MATCH_NOMATCH); } - /* Control never gets here */ + PCRE2_UNREACHABLE(); /* Control never reaches here */ case PT_SC: for (;;) { - RMATCH(Fecode, RM212); + RMATCH(Fecode, RM211); if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); if (Feptr >= mb->end_subject) @@ -3602,14 +3751,14 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, if ((UCD_SCRIPT(fc) == Lpropvalue) == (Lctype == OP_NOTPROP)) RRETURN(MATCH_NOMATCH); } - /* Control never gets here */ + PCRE2_UNREACHABLE(); /* Control never reaches here */ case PT_SCX: for (;;) { BOOL ok; const ucd_record *prop; - RMATCH(Fecode, RM225); + RMATCH(Fecode, RM224); if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); if (Feptr >= mb->end_subject) @@ -3624,13 +3773,13 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, if (ok == (Lctype == OP_NOTPROP)) RRETURN(MATCH_NOMATCH); } - /* Control never gets here */ + PCRE2_UNREACHABLE(); /* Control never reaches here */ case PT_ALNUM: for (;;) { int category; - RMATCH(Fecode, RM213); + RMATCH(Fecode, RM212); if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); if (Feptr >= mb->end_subject) @@ -3643,7 +3792,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, if ((category == ucp_L || category == ucp_N) == (Lctype == OP_NOTPROP)) RRETURN(MATCH_NOMATCH); } - /* Control never gets here */ + PCRE2_UNREACHABLE(); /* Control never reaches here */ /* Perl space used to exclude VT, but from Perl 5.18 it is included, which means that Perl space and POSIX space are now identical. PCRE @@ -3653,7 +3802,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, case PT_PXSPACE: /* POSIX space */ for (;;) { - RMATCH(Fecode, RM214); + RMATCH(Fecode, RM213); if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); if (Feptr >= mb->end_subject) @@ -3675,13 +3824,13 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, break; } } - /* Control never gets here */ + PCRE2_UNREACHABLE(); /* Control never reaches here */ case PT_WORD: for (;;) { int chartype, category; - RMATCH(Fecode, RM215); + RMATCH(Fecode, RM214); if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); if (Feptr >= mb->end_subject) @@ -3698,13 +3847,13 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, chartype == ucp_Pc) == (Lctype == OP_NOTPROP)) RRETURN(MATCH_NOMATCH); } - /* Control never gets here */ + PCRE2_UNREACHABLE(); /* Control never reaches here */ case PT_CLIST: for (;;) { const uint32_t *cp; - RMATCH(Fecode, RM216); + RMATCH(Fecode, RM215); if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); if (Feptr >= mb->end_subject) @@ -3735,12 +3884,12 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, } } } - /* Control never gets here */ + PCRE2_UNREACHABLE(); /* Control never reaches here */ case PT_UCNC: for (;;) { - RMATCH(Fecode, RM217); + RMATCH(Fecode, RM216); if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); if (Feptr >= mb->end_subject) @@ -3754,12 +3903,12 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, fc >= 0xe000) == (Lctype == OP_NOTPROP)) RRETURN(MATCH_NOMATCH); } - /* Control never gets here */ + PCRE2_UNREACHABLE(); /* Control never reaches here */ case PT_BIDICL: for (;;) { - RMATCH(Fecode, RM224); + RMATCH(Fecode, RM223); if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); if (Feptr >= mb->end_subject) @@ -3771,14 +3920,14 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, if ((UCD_BIDICLASS(fc) == Lpropvalue) == (Lctype == OP_NOTPROP)) RRETURN(MATCH_NOMATCH); } - /* Control never gets here */ + PCRE2_UNREACHABLE(); /* Control never reaches here */ case PT_BOOL: for (;;) { BOOL ok; const ucd_record *prop; - RMATCH(Fecode, RM223); + RMATCH(Fecode, RM222); if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); if (Feptr >= mb->end_subject) @@ -3793,10 +3942,11 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, if (ok == (Lctype == OP_NOTPROP)) RRETURN(MATCH_NOMATCH); } - /* Control never gets here */ + PCRE2_UNREACHABLE(); /* Control never reaches here */ /* This should never occur */ default: + PCRE2_DEBUG_UNREACHABLE(); return PCRE2_ERROR_INTERNAL; } } @@ -3808,7 +3958,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, { for (;;) { - RMATCH(Fecode, RM218); + RMATCH(Fecode, RM217); if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); if (Feptr >= mb->end_subject) @@ -3835,7 +3985,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, { for (;;) { - RMATCH(Fecode, RM219); + RMATCH(Fecode, RM218); if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); if (Feptr >= mb->end_subject) @@ -3951,6 +4101,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, break; default: + PCRE2_DEBUG_UNREACHABLE(); return PCRE2_ERROR_INTERNAL; } } @@ -4095,11 +4246,13 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, break; default: + PCRE2_DEBUG_UNREACHABLE(); return PCRE2_ERROR_INTERNAL; } } } - /* Control never gets here */ + + PCRE2_DEBUG_UNREACHABLE(); /* Control should never reach here */ } /* If maximizing, it is worth using inline code for speed, doing the type @@ -4117,21 +4270,6 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, BOOL notmatch = Lctype == OP_NOTPROP; switch(proptype) { - case PT_ANY: - for (i = Lmin; i < Lmax; i++) - { - int len = 1; - if (Feptr >= mb->end_subject) - { - SCHECK_PARTIAL(); - break; - } - GETCHARLENTEST(fc, Feptr, len); - if (notmatch) break; - Feptr+= len; - } - break; - case PT_LAMP: for (i = Lmin; i < Lmax; i++) { @@ -4377,6 +4515,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, break; default: + PCRE2_DEBUG_UNREACHABLE(); return PCRE2_ERROR_INTERNAL; } @@ -4391,7 +4530,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, for(;;) { if (Feptr <= Lstart_eptr) break; - RMATCH(Fecode, RM222); + RMATCH(Fecode, RM221); if (rrc != MATCH_NOMATCH) RRETURN(rrc); Feptr--; if (utf) BACKCHAR(Feptr); @@ -4434,7 +4573,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, PCRE2_SPTR fptr; if (Feptr <= Lstart_eptr) break; /* At start of char run */ - RMATCH(Fecode, RM220); + RMATCH(Fecode, RM219); if (rrc != MATCH_NOMATCH) RRETURN(rrc); /* Backtracking over an extended grapheme cluster involves inspecting @@ -4695,6 +4834,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, break; default: + PCRE2_DEBUG_UNREACHABLE(); return PCRE2_ERROR_INTERNAL; } @@ -4707,7 +4847,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, for(;;) { if (Feptr <= Lstart_eptr) break; - RMATCH(Fecode, RM221); + RMATCH(Fecode, RM220); if (rrc != MATCH_NOMATCH) RRETURN(rrc); Feptr--; BACKCHAR(Feptr); @@ -4952,6 +5092,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, break; default: + PCRE2_DEBUG_UNREACHABLE(); return PCRE2_ERROR_INTERNAL; } @@ -4988,16 +5129,18 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, #define Lmin F->temp_32[0] #define Lmax F->temp_32[1] #define Lcaseless F->temp_32[2] +#define Lcaseopts F->temp_32[3] #define Lstart F->temp_sptr[0] #define Loffset F->temp_size case OP_DNREF: case OP_DNREFI: Lcaseless = (Fop == OP_DNREFI); + Lcaseopts = (Fop == OP_DNREFI)? Fecode[1 + 2*IMM2_SIZE] : 0; { int count = GET2(Fecode, 1+IMM2_SIZE); PCRE2_SPTR slot = mb->name_table + GET2(Fecode, 1) * mb->name_entry_size; - Fecode += 1 + 2*IMM2_SIZE; + Fecode += 1 + 2*IMM2_SIZE + (Fop == OP_DNREFI? 1 : 0); while (count-- > 0) { @@ -5011,8 +5154,9 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, case OP_REF: case OP_REFI: Lcaseless = (Fop == OP_REFI); + Lcaseopts = (Fop == OP_REFI)? Fecode[1 + IMM2_SIZE] : 0; Loffset = (GET2(Fecode, 1) << 1) - 2; - Fecode += 1 + IMM2_SIZE; + Fecode += 1 + IMM2_SIZE + (Fop == OP_REFI? 1 : 0); /* Set up for repetition, or handle the non-repeated case. The maximum and minimum must be in the heap frame, but as they are short-term values, we @@ -5044,7 +5188,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, default: /* No repeat follows */ { - rrc = match_ref(Loffset, Lcaseless, F, mb, &length); + rrc = match_ref(Loffset, Lcaseless, Lcaseopts, F, mb, &length); if (rrc != 0) { if (rrc > 0) Feptr = mb->end_subject; /* Partial match */ @@ -5078,7 +5222,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, for (i = 1; i <= Lmin; i++) { PCRE2_SIZE slength; - rrc = match_ref(Loffset, Lcaseless, F, mb, &slength); + rrc = match_ref(Loffset, Lcaseless, Lcaseopts, F, mb, &slength); if (rrc != 0) { if (rrc > 0) Feptr = mb->end_subject; /* Partial match */ @@ -5102,7 +5246,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, RMATCH(Fecode, RM20); if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); - rrc = match_ref(Loffset, Lcaseless, F, mb, &slength); + rrc = match_ref(Loffset, Lcaseless, Lcaseopts, F, mb, &slength); if (rrc != 0) { if (rrc > 0) Feptr = mb->end_subject; /* Partial match */ @@ -5111,7 +5255,8 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, } Feptr += slength; } - /* Control never gets here */ + + PCRE2_UNREACHABLE(); /* Control never reaches here */ } /* If maximizing, find the longest string and work backwards, as long as @@ -5126,7 +5271,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, for (i = Lmin; i < Lmax; i++) { PCRE2_SIZE slength; - rrc = match_ref(Loffset, Lcaseless, F, mb, &slength); + rrc = match_ref(Loffset, Lcaseless, Lcaseopts, F, mb, &slength); if (rrc != 0) { /* Can't use CHECK_PARTIAL because we don't want to update Feptr in @@ -5177,7 +5322,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, for (i = Lmin; i < Lmax; i++) { PCRE2_SIZE slength; - (void)match_ref(Loffset, Lcaseless, F, mb, &slength); + (void)match_ref(Loffset, Lcaseless, Lcaseopts, F, mb, &slength); Feptr += slength; } } @@ -5185,7 +5330,8 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, RRETURN(MATCH_NOMATCH); } - /* Control never gets here */ + + PCRE2_DEBUG_UNREACHABLE(); /* Control should never reach here */ #undef Lcaseless #undef Lmin @@ -5409,7 +5555,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, Fecode += GET(Fecode, 1); if (*Fecode != OP_ALT) RRETURN(MATCH_NOMATCH); } - /* Control never reaches here. */ + PCRE2_UNREACHABLE(); /* Control never reaches here */ #undef Lframe_type @@ -5494,7 +5640,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, Lstart_branch = next_ecode; if (*Lstart_branch != OP_ALT) RRETURN(MATCH_NOMATCH); } - /* Control never reaches here. */ + PCRE2_UNREACHABLE(); /* Control never reaches here */ #undef Lframe_type #undef Lstart_branch @@ -5585,6 +5731,132 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, #undef Lframe_type + /* ===================================================================== */ + /* Handle scan substring operation. */ + +#define Lframe_type F->temp_32[0] +#define Lextra_size F->temp_32[1] +#define Lsaved_moptions F->temp_32[2] +#define Lsaved_end_subject F->temp_sptr[0] +#define Lsaved_eptr F->temp_sptr[1] +#define Ltrue_end_extra F->temp_size + + case OP_ASSERT_SCS: + { + PCRE2_SPTR ecode = Fecode + 1 + LINK_SIZE; + uint32_t extra_size = 0; + int count; + PCRE2_SPTR slot; + + /* Disable compiler warning. */ + offset = 0; + (void)offset; + + for (;;) + { + if (*ecode == OP_CREF) + { + extra_size += 1+IMM2_SIZE; + offset = (GET2(ecode, 1) << 1) - 2; + ecode += 1+IMM2_SIZE; + if (offset < Foffset_top && Fovector[offset] != PCRE2_UNSET) + goto SCS_OFFSET_FOUND; + continue; + } + + if (*ecode != OP_DNCREF) RRETURN(MATCH_NOMATCH); + + count = GET2(ecode, 1 + IMM2_SIZE); + slot = mb->name_table + GET2(ecode, 1) * mb->name_entry_size; + extra_size += 1+2*IMM2_SIZE; + ecode += 1+2*IMM2_SIZE; + + while (count > 0) + { + offset = (GET2(slot, 0) << 1) - 2; + if (offset < Foffset_top && Fovector[offset] != PCRE2_UNSET) + goto SCS_OFFSET_FOUND; + slot += mb->name_entry_size; + count--; + } + } + + SCS_OFFSET_FOUND: + + /* Skip remaining options. */ + for (;;) + { + if (*ecode == OP_CREF) + { + extra_size += 1+IMM2_SIZE; + ecode += 1+IMM2_SIZE; + } + else if (*ecode == OP_DNCREF) + { + extra_size += 1+2*IMM2_SIZE; + ecode += 1+2*IMM2_SIZE; + } + else break; + } + + Lextra_size = extra_size; + } + + Lsaved_end_subject = mb->end_subject; + Ltrue_end_extra = mb->true_end_subject - mb->end_subject; + Lsaved_eptr = Feptr; + Lsaved_moptions = mb->moptions; + + Feptr = mb->start_subject + Fovector[offset]; + mb->true_end_subject = mb->end_subject = + mb->start_subject + Fovector[offset + 1]; + mb->moptions &= ~PCRE2_NOTEOL; + + Lframe_type = GF_NOCAPTURE | Fop; + for (;;) + { + group_frame_type = Lframe_type; + RMATCH(Fecode + 1 + LINK_SIZE + Lextra_size, RM38); + if (rrc == MATCH_ACCEPT) + { + memcpy(Fovector, + (char *)assert_accept_frame + offsetof(heapframe, ovector), + assert_accept_frame->offset_top * sizeof(PCRE2_SIZE)); + Foffset_top = assert_accept_frame->offset_top; + Fmark = assert_accept_frame->mark; + break; + } + + if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) + { + mb->end_subject = Lsaved_end_subject; + mb->true_end_subject = mb->end_subject + Ltrue_end_extra; + mb->moptions = Lsaved_moptions; + RRETURN(rrc); + } + + Fecode += GET(Fecode, 1); + if (*Fecode != OP_ALT) + { + mb->end_subject = Lsaved_end_subject; + mb->true_end_subject = mb->end_subject + Ltrue_end_extra; + mb->moptions = Lsaved_moptions; + RRETURN(MATCH_NOMATCH); + } + Lextra_size = 0; + } + + do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT); + Fecode += 1 + LINK_SIZE; + Feptr = Lsaved_eptr; + break; + +#undef Lframe_type +#undef Lextra_size +#undef Lsaved_end_subject +#undef Lsaved_eptr +#undef Ltrue_end_extra +#undef Lsave_moptions /* ===================================================================== */ /* The callout item calls an external function, if one is provided, passing @@ -5795,8 +6067,11 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, #ifdef SUPPORT_UNICODE if (utf) { - while (number-- > 0) + /* We used to do a simpler `while (number-- > 0)` but that triggers + clang's unsigned integer overflow sanitizer. */ + while (number > 0) { + --number; if (Feptr <= mb->check_subject) RRETURN(MATCH_NOMATCH); Feptr--; BACKCHAR(Feptr); @@ -5862,14 +6137,14 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, { ptrdiff_t diff = Feptr - mb->start_subject; - uint32_t available = (diff > 65535)? 65535 : ((diff > 0)? diff : 0); + uint32_t available = (diff > 65535)? 65535 : ((diff > 0)? (int)diff : 0); if (Lmin > available) RRETURN(MATCH_NOMATCH); if (Lmax > available) Lmax = available; Feptr -= Lmax; } /* Now try matching, moving forward one character on failure, until we - reach the mimimum back length. */ + reach the minimum back length. */ for (;;) { @@ -5881,7 +6156,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, if (utf) { FORWARDCHARTEST(Feptr, mb->end_subject); } #endif } - /* Control never reaches here */ + PCRE2_UNREACHABLE(); /* Control never reaches here */ #undef Lmin #undef Lmax @@ -5931,14 +6206,20 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, (char *)P->eptr - (char *)mb->start_subject); #endif - /* If we are at the end of an assertion that is a condition, return a - match, discarding any intermediate backtracking points. Copy back the - mark setting and the captures into the frame before N so that they are - set on return. Doing this for all assertions, both positive and negative, - seems to match what Perl does. */ + /* If we are at the end of an assertion that is a condition, first check + to see if we are at the end of a variable-length branch in a lookbehind. + If this is the case and we have not landed on the current character, + return no match. Compare code below for non-condition lookbehinds. In + other cases, return a match, discarding any intermediate backtracking + points. Copy back the mark setting and the captures into the frame before + N so that they are set on return. Doing this for all assertions, both + positive and negative, seems to match what Perl does. */ if (GF_IDMASK(N->group_frame_type) == GF_CONDASSERT) { + if ((*bracode == OP_ASSERTBACK || *bracode == OP_ASSERTBACK_NOT) && + branch_start[1 + LINK_SIZE] == OP_VREVERSE && Feptr != P->eptr) + RRETURN(MATCH_NOMATCH); memcpy((char *)P + offsetof(heapframe, ovector), Fovector, Foffset_top * sizeof(PCRE2_SIZE)); P->offset_top = Foffset_top; @@ -5967,7 +6248,11 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, /* It is the end of whole-pattern recursion. */ offset = Flast_group_offset; + + /* Corrupted heapframes?. Trigger an assert and return an error */ + PCRE2_ASSERT(offset != PCRE2_UNSET); if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL; + N = (heapframe *)((char *)match_data->heapframes + offset); P = (heapframe *)((char *)N - frame_size); Flast_group_offset = P->last_group_offset; @@ -6042,6 +6327,23 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, case OP_ASSERT_NOT: RRETURN(MATCH_MATCH); + /* A scan substring group must preserve the current end_subject, + and restore it before the backtracking is performed into its sub + pattern. */ + + case OP_ASSERT_SCS: + F->temp_sptr[0] = mb->end_subject; + mb->end_subject = P->temp_sptr[0]; + mb->true_end_subject = mb->end_subject + P->temp_size; + Feptr = P->temp_sptr[1]; + + RMATCH(Fecode + 1 + LINK_SIZE, RM39); + + mb->end_subject = F->temp_sptr[0]; + mb->true_end_subject = mb->end_subject; + RRETURN(rrc); + break; + /* At the end of a script run, apply the script-checking rules. This code will never by exercised if Unicode support it not compiled, because in that environment script runs cause an error at compile time. */ @@ -6165,8 +6467,8 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, case OP_EODN: ASSERT_NL_OR_EOS: - if (Feptr < mb->end_subject && - (!IS_NEWLINE(Feptr) || Feptr != mb->end_subject - mb->nllen)) + if (Feptr < mb->true_end_subject && + (!IS_NEWLINE(Feptr) || Feptr != mb->true_end_subject - mb->nllen)) { if (mb->partial != 0 && Feptr + 1 >= mb->end_subject && @@ -6447,6 +6749,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, something seriously wrong in the code above or the OP_xxx definitions. */ default: + PCRE2_DEBUG_UNREACHABLE(); return PCRE2_ERROR_INTERNAL; } @@ -6455,8 +6758,8 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, loop. */ } /* End of main loop */ -/* Control never reaches here */ +PCRE2_DEBUG_UNREACHABLE(); /* Control should never reach here */ /* ========================================================================= */ /* The RRETURN() macro jumps here. The number that is saved in Freturn_id @@ -6482,20 +6785,21 @@ switch (Freturn_id) LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16) LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24) LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32) - LBL(33) LBL(34) LBL(35) LBL(36) LBL(37) + LBL(33) LBL(34) LBL(35) LBL(36) LBL(37) LBL(38) LBL(39) #ifdef SUPPORT_WIDE_CHARS - LBL(100) LBL(101) + LBL(100) LBL(101) LBL(102) LBL(103) #endif #ifdef SUPPORT_UNICODE LBL(200) LBL(201) LBL(202) LBL(203) LBL(204) LBL(205) LBL(206) LBL(207) LBL(208) LBL(209) LBL(210) LBL(211) LBL(212) LBL(213) LBL(214) LBL(215) LBL(216) LBL(217) LBL(218) LBL(219) LBL(220) - LBL(221) LBL(222) LBL(223) LBL(224) LBL(225) + LBL(221) LBL(222) LBL(223) LBL(224) #endif default: + PCRE2_DEBUG_UNREACHABLE(); return PCRE2_ERROR_INTERNAL; } #undef LBL @@ -6621,7 +6925,7 @@ if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8) /* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the options variable for this function. Users of PCRE2 who are not calling the function directly would like to have a way of setting these flags, in the same -way that they can set pcre2_compile() flags like PCRE2_NO_AUTOPOSSESS with +way that they can set pcre2_compile() flags like PCRE2_NO_AUTO_POSSESS with constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and (*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which we now transfer to the options for this function. The bits are guaranteed to be @@ -6703,9 +7007,6 @@ if (use_jit) #ifdef SUPPORT_UNICODE if (utf && (options & PCRE2_NO_UTF_CHECK) == 0 && !allow_invalid) { -#if PCRE2_CODE_UNIT_WIDTH != 32 - unsigned int i; -#endif /* For 8-bit and 16-bit UTF, check that the first code unit is a valid character start. */ @@ -6726,7 +7027,7 @@ if (use_jit) start of matching. */ #if PCRE2_CODE_UNIT_WIDTH != 32 - for (i = re->max_lookbehind; i > 0 && start_match > subject; i--) + for (unsigned int i = re->max_lookbehind; i > 0 && start_match > subject; i--) { start_match--; while (start_match > subject && @@ -6973,10 +7274,10 @@ mb->mark = mb->nomatch_mark = NULL; /* In case never set */ /* The name table is needed for finding all the numbers associated with a given name, for condition testing. The code follows the name table. */ -mb->name_table = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code)); +mb->name_table = (PCRE2_SPTR)((const uint8_t *)re + sizeof(pcre2_real_code)); mb->name_count = re->name_count; mb->name_entry_size = re->name_entry_size; -mb->start_code = mb->name_table + re->name_count * re->name_entry_size; +mb->start_code = (PCRE2_SPTR)((const uint8_t *)re + re->code_start); /* Process the \R and newline settings. */ @@ -7013,7 +7314,9 @@ switch(re->newline_convention) mb->nltype = NLTYPE_ANYCRLF; break; - default: return PCRE2_ERROR_INTERNAL; + default: + PCRE2_DEBUG_UNREACHABLE(); + return PCRE2_ERROR_INTERNAL; } /* The backtracking frames have fixed data at the front, and a PCRE2_SIZE @@ -7159,7 +7462,7 @@ for(;;) However, there is an option (settable at compile time) that disables these, for testing and for ensuring that all callouts do actually occur. */ - if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0) + if ((re->optimization_flags & PCRE2_OPTIM_START_OPTIMIZE) != 0) { /* If firstline is TRUE, the start of the match is constrained to the first line of a multiline string. That is, the match must be before or at the diff --git a/libpcre/src/pcre2_match_data.c b/libpcre/src/pcre2_match_data.c index 757dab9df..100e7c9d9 100644 --- a/libpcre/src/pcre2_match_data.c +++ b/libpcre/src/pcre2_match_data.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2022 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -77,14 +77,16 @@ return yield; * Create a match data block using pattern data * *************************************************/ -/* If no context is supplied, use the memory allocator from the code. */ +/* If no context is supplied, use the memory allocator from the code. This code +assumes that a general context contains nothing other than a memory allocator. +If that ever changes, this code will need fixing. */ PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION pcre2_match_data_create_from_pattern(const pcre2_code *code, pcre2_general_context *gcontext) { if (gcontext == NULL) gcontext = (pcre2_general_context *)code; -return pcre2_match_data_create(((pcre2_real_code *)code)->top_bracket + 1, +return pcre2_match_data_create(((const pcre2_real_code *)code)->top_bracket + 1, gcontext); } diff --git a/libpcre/src/pcre2_ord2utf.c b/libpcre/src/pcre2_ord2utf.c index 140373099..a1e9e0880 100644 --- a/libpcre/src/pcre2_ord2utf.c +++ b/libpcre/src/pcre2_ord2utf.c @@ -117,4 +117,4 @@ return 1; } #endif /* SUPPORT_UNICODE */ -/* End of pcre_ord2utf.c */ +/* End of pcre2_ord2utf.c */ diff --git a/libpcre/src/pcre2_pattern_info.c b/libpcre/src/pcre2_pattern_info.c index a29f5eff6..fe4d3c661 100644 --- a/libpcre/src/pcre2_pattern_info.c +++ b/libpcre/src/pcre2_pattern_info.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2018 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -64,7 +64,7 @@ Returns: 0 when data returned PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION pcre2_pattern_info(const pcre2_code *code, uint32_t what, void *where) { -const pcre2_real_code *re = (pcre2_real_code *)code; +const pcre2_real_code *re = (const pcre2_real_code *)code; if (where == NULL) /* Requests field length */ { @@ -230,7 +230,8 @@ switch(what) break; case PCRE2_INFO_NAMETABLE: - *((PCRE2_SPTR *)where) = (PCRE2_SPTR)((char *)re + sizeof(pcre2_real_code)); + *((PCRE2_SPTR *)where) = (PCRE2_SPTR)((const char *)re + + sizeof(pcre2_real_code)); break; case PCRE2_INFO_NEWLINE: @@ -268,7 +269,7 @@ PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION pcre2_callout_enumerate(const pcre2_code *code, int (*callback)(pcre2_callout_enumerate_block *, void *), void *callout_data) { -pcre2_real_code *re = (pcre2_real_code *)code; +const pcre2_real_code *re = (const pcre2_real_code *)code; pcre2_callout_enumerate_block cb; PCRE2_SPTR cc; #ifdef SUPPORT_UNICODE @@ -291,7 +292,7 @@ if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC; if ((re->flags & (PCRE2_CODE_UNIT_WIDTH/8)) == 0) return PCRE2_ERROR_BADMODE; cb.version = 0; -cc = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code)) +cc = (PCRE2_SPTR)((const uint8_t *)re + sizeof(pcre2_real_code)) + re->name_count * re->name_entry_size; while (TRUE) @@ -383,8 +384,9 @@ while (TRUE) #endif break; -#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 +#ifdef SUPPORT_WIDE_CHARS case OP_XCLASS: + case OP_ECLASS: cc += GET(cc, 1); break; #endif diff --git a/libpcre/src/pcre2_serialize.c b/libpcre/src/pcre2_serialize.c index ba17a26d2..a10e3020b 100644 --- a/libpcre/src/pcre2_serialize.c +++ b/libpcre/src/pcre2_serialize.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2020 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -127,25 +127,25 @@ dst_bytes += TABLES_LENGTH; for (i = 0; i < number_of_codes; i++) { re = (const pcre2_real_code *)(codes[i]); - (void)memcpy(dst_bytes, (char *)re, re->blocksize); - - /* Certain fields in the compiled code block are re-set during - deserialization. In order to ensure that the serialized data stream is always - the same for the same pattern, set them to zero here. We can't assume the - copy of the pattern is correctly aligned for accessing the fields as part of + (void)memcpy(dst_bytes, (const char *)re, re->blocksize); + + /* Certain fields in the compiled code block are re-set during + deserialization. In order to ensure that the serialized data stream is always + the same for the same pattern, set them to zero here. We can't assume the + copy of the pattern is correctly aligned for accessing the fields as part of a structure. Note the use of sizeof(void *) in the second of these, to - specify the size of a pointer. If sizeof(uint8_t *) is used (tables is a - pointer to uint8_t), gcc gives a warning because the first argument is also a - pointer to uint8_t. Casting the first argument to (void *) can stop this, but + specify the size of a pointer. If sizeof(uint8_t *) is used (tables is a + pointer to uint8_t), gcc gives a warning because the first argument is also a + pointer to uint8_t. Casting the first argument to (void *) can stop this, but it didn't stop Coverity giving the same complaint. */ - - (void)memset(dst_bytes + offsetof(pcre2_real_code, memctl), 0, + + (void)memset(dst_bytes + offsetof(pcre2_real_code, memctl), 0, sizeof(pcre2_memctl)); - (void)memset(dst_bytes + offsetof(pcre2_real_code, tables), 0, + (void)memset(dst_bytes + offsetof(pcre2_real_code, tables), 0, sizeof(void *)); (void)memset(dst_bytes + offsetof(pcre2_real_code, executable_jit), 0, - sizeof(void *)); - + sizeof(void *)); + dst_bytes += re->blocksize; } @@ -232,10 +232,10 @@ for (i = 0; i < number_of_codes; i++) if (dst_re->magic_number != MAGIC_NUMBER || dst_re->name_entry_size > MAX_NAME_SIZE + IMM2_SIZE + 1 || dst_re->name_count > MAX_NAME_COUNT) - { - memctl->free(dst_re, memctl->memory_data); + { + memctl->free(dst_re, memctl->memory_data); return PCRE2_ERROR_BADSERIALIZEDDATA; - } + } /* At the moment only one table is supported. */ diff --git a/libpcre/src/pcre2_study.c b/libpcre/src/pcre2_study.c index 792e696da..85764cea5 100644 --- a/libpcre/src/pcre2_study.c +++ b/libpcre/src/pcre2_study.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2023 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -114,7 +114,7 @@ uint32_t once_fudge = 0; BOOL had_recurse = FALSE; BOOL dupcapused = (re->flags & PCRE2_DUPCAPUSED) != 0; PCRE2_SPTR nextbranch = code + GET(code, 1); -PCRE2_UCHAR *cc = (PCRE2_UCHAR *)code + 1 + LINK_SIZE; +PCRE2_SPTR cc = code + 1 + LINK_SIZE; recurse_check this_recurse; /* If this is a "could be empty" group, its minimum length is 0. */ @@ -136,12 +136,13 @@ passes 16-bits, reset to that value and skip the rest of the branch. */ for (;;) { int d, min, recno; - PCRE2_UCHAR op, *cs, *ce; + PCRE2_UCHAR op; + PCRE2_SPTR cs, ce; if (branchlength >= UINT16_MAX) { branchlength = UINT16_MAX; - cc = (PCRE2_UCHAR *)nextbranch; + cc = nextbranch; } op = *cc; @@ -249,6 +250,7 @@ for (;;) case OP_ASSERTBACK: case OP_ASSERTBACK_NOT: case OP_ASSERT_NA: + case OP_ASSERT_SCS: case OP_ASSERTBACK_NA: do cc += GET(cc, 1); while (*cc == OP_ALT); /* Fall through */ @@ -417,15 +419,14 @@ for (;;) case OP_NCLASS: #ifdef SUPPORT_WIDE_CHARS case OP_XCLASS: + case OP_ECLASS: /* The original code caused an unsigned overflow in 64 bit systems, so now we use a conditional statement. */ - if (op == OP_XCLASS) + if (op == OP_XCLASS || op == OP_ECLASS) cc += GET(cc, 1); else - cc += PRIV(OP_lengths)[OP_CLASS]; -#else - cc += PRIV(OP_lengths)[OP_CLASS]; #endif + cc += PRIV(OP_lengths)[OP_CLASS]; switch (*cc) { @@ -479,8 +480,8 @@ for (;;) if (!dupcapused && (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) == 0) { int count = GET2(cc, 1+IMM2_SIZE); - PCRE2_UCHAR *slot = - (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code)) + + PCRE2_SPTR slot = + (PCRE2_SPTR)((const uint8_t *)re + sizeof(pcre2_real_code)) + GET2(cc, 1) * re->name_entry_size; d = INT_MAX; @@ -496,13 +497,12 @@ for (;;) dd = backref_cache[recno]; else { - ce = cs = (PCRE2_UCHAR *)PRIV(find_bracket)(startcode, utf, recno); + ce = cs = PRIV(find_bracket)(startcode, utf, recno); if (cs == NULL) return -2; do ce += GET(ce, 1); while (*ce == OP_ALT); dd = 0; - if (!dupcapused || - (PCRE2_UCHAR *)PRIV(find_bracket)(ce, utf, recno) == NULL) + if (!dupcapused || PRIV(find_bracket)(ce, utf, recno) == NULL) { if (cc > cs && cc < ce) /* Simple recursion */ { @@ -539,7 +539,7 @@ for (;;) } } else d = 0; - cc += 1 + 2*IMM2_SIZE; + cc += PRIV(OP_lengths)[*cc]; goto REPEAT_BACK_REFERENCE; /* Single back reference by number. References by name are converted to by @@ -557,12 +557,11 @@ for (;;) if ((re->overall_options & PCRE2_MATCH_UNSET_BACKREF) == 0) { - ce = cs = (PCRE2_UCHAR *)PRIV(find_bracket)(startcode, utf, recno); + ce = cs = PRIV(find_bracket)(startcode, utf, recno); if (cs == NULL) return -2; do ce += GET(ce, 1); while (*ce == OP_ALT); - if (!dupcapused || - (PCRE2_UCHAR *)PRIV(find_bracket)(ce, utf, recno) == NULL) + if (!dupcapused || PRIV(find_bracket)(ce, utf, recno) == NULL) { if (cc > cs && cc < ce) /* Simple recursion */ { @@ -593,7 +592,7 @@ for (;;) backref_cache[0] = recno; } - cc += 1 + IMM2_SIZE; + cc += PRIV(OP_lengths)[*cc]; /* Handle repeated back references */ @@ -643,7 +642,7 @@ for (;;) pattern contains multiple subpatterns with the same number. */ case OP_RECURSE: - cs = ce = (PCRE2_UCHAR *)startcode + GET(cc, 1); + cs = ce = startcode + GET(cc, 1); recno = GET2(cs, 1+LINK_SIZE); if (recno == prev_recurse_recno) { @@ -755,10 +754,13 @@ for (;;) new ones get added they are properly considered. */ default: + PCRE2_DEBUG_UNREACHABLE(); return -3; } } -/* Control never gets here */ + +PCRE2_DEBUG_UNREACHABLE(); /* Control should never reach here */ +return -3; /* Avoid compiler warnings */ } @@ -919,6 +921,138 @@ if (table_limit != 32) for (c = 24; c < 32; c++) re->start_bitmap[c] = 0xff; +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 +/************************************************* +* Set starting bits for a character list. * +*************************************************/ + +/* This function sets starting bits for a character list. It enumerates +all characters and character ranges in the character list, and sets +the starting bits accordingly. + +Arguments: + code pointer to the code + start_bitmap pointer to the starting bitmap + +Returns: nothing +*/ +static void +study_char_list(PCRE2_SPTR code, uint8_t *start_bitmap, + const uint8_t *char_lists_end) +{ +uint32_t type, list_ind; +uint32_t char_list_add = XCL_CHAR_LIST_LOW_16_ADD; +uint32_t range_start = ~(uint32_t)0, range_end = 0; +const uint8_t *next_char; +PCRE2_UCHAR start_buffer[6], end_buffer[6]; +PCRE2_UCHAR start, end; + +/* Only needed in 8-bit mode at the moment. */ +type = (uint32_t)(code[0] << 8) | code[1]; +code += 2; + +/* Align characters. */ +next_char = char_lists_end - (GET(code, 0) << 1); +type &= XCL_TYPE_MASK; +list_ind = 0; + +if ((type & XCL_BEGIN_WITH_RANGE) != 0) + range_start = XCL_CHAR_LIST_LOW_16_START; + +while (type > 0) + { + uint32_t item_count = type & XCL_ITEM_COUNT_MASK; + + if (item_count == XCL_ITEM_COUNT_MASK) + { + if (list_ind <= 1) + { + item_count = *(const uint16_t*)next_char; + next_char += 2; + } + else + { + item_count = *(const uint32_t*)next_char; + next_char += 4; + } + } + + while (item_count > 0) + { + if (list_ind <= 1) + { + range_end = *(const uint16_t*)next_char; + next_char += 2; + } + else + { + range_end = *(const uint32_t*)next_char; + next_char += 4; + } + + if ((range_end & XCL_CHAR_END) != 0) + { + range_end = char_list_add + (range_end >> XCL_CHAR_SHIFT); + + PRIV(ord2utf)(range_end, end_buffer); + end = end_buffer[0]; + + if (range_start < range_end) + { + PRIV(ord2utf)(range_start, start_buffer); + for (start = start_buffer[0]; start <= end; start++) + start_bitmap[start / 8] |= (1u << (start & 7)); + } + else + start_bitmap[end / 8] |= (1u << (end & 7)); + + range_start = ~(uint32_t)0; + } + else + range_start = char_list_add + (range_end >> XCL_CHAR_SHIFT); + + item_count--; + } + + list_ind++; + type >>= XCL_TYPE_BIT_LEN; + + if (range_start == ~(uint32_t)0) + { + if ((type & XCL_BEGIN_WITH_RANGE) != 0) + { + /* In 8 bit mode XCL_CHAR_LIST_HIGH_32_START is not possible. */ + if (list_ind == 1) range_start = XCL_CHAR_LIST_HIGH_16_START; + else range_start = XCL_CHAR_LIST_LOW_32_START; + } + } + else if ((type & XCL_BEGIN_WITH_RANGE) == 0) + { + PRIV(ord2utf)(range_start, start_buffer); + + /* In 8 bit mode XCL_CHAR_LIST_LOW_32_END and + XCL_CHAR_LIST_HIGH_32_END are not possible. */ + if (list_ind == 1) range_end = XCL_CHAR_LIST_LOW_16_END; + else range_end = XCL_CHAR_LIST_HIGH_16_END; + + PRIV(ord2utf)(range_end, end_buffer); + end = end_buffer[0]; + + for (start = start_buffer[0]; start <= end; start++) + start_bitmap[start / 8] |= (1u << (start & 7)); + + range_start = ~(uint32_t)0; + } + + /* In 8 bit mode XCL_CHAR_LIST_HIGH_32_ADD is not possible. */ + if (list_ind == 1) char_list_add = XCL_CHAR_LIST_HIGH_16_ADD; + else char_list_add = XCL_CHAR_LIST_LOW_32_ADD; + } +} +#endif + + + /************************************************* * Create bitmap of starting code units * *************************************************/ @@ -980,7 +1114,7 @@ do { int rc; PCRE2_SPTR ncode; - uint8_t *classmap = NULL; + const uint8_t *classmap = NULL; #ifdef SUPPORT_WIDE_CHARS PCRE2_UCHAR xclassflags; #endif @@ -1134,6 +1268,7 @@ do case OP_ASSERTBACK_NOT: case OP_ASSERT_NA: case OP_ASSERTBACK_NA: + case OP_ASSERT_SCS: ncode += GET(ncode, 1); while (*ncode == OP_ALT) ncode += GET(ncode, 1); ncode += 1 + LINK_SIZE; @@ -1252,12 +1387,14 @@ do tcode += GET(tcode, 1 + 2*LINK_SIZE); break; - /* Skip over lookbehind and negative lookahead assertions */ + /* Skip over lookbehind, negative lookahead, and scan substring + assertions */ case OP_ASSERT_NOT: case OP_ASSERTBACK: case OP_ASSERTBACK_NOT: case OP_ASSERTBACK_NA: + case OP_ASSERT_SCS: do tcode += GET(tcode, 1); while (*tcode == OP_ALT); tcode += 1 + LINK_SIZE; break; @@ -1578,6 +1715,13 @@ do tcode += 2; break; + /* Set-based ECLASS: treat it the same as a "complex" XCLASS; give up. */ + +#ifdef SUPPORT_WIDE_CHARS + case OP_ECLASS: + return SSB_FAIL; +#endif + /* Extended class: if there are any property checks, or if this is a negative XCLASS without a map, give up. If there are no property checks, there must be wide characters on the XCLASS list, because otherwise an @@ -1596,7 +1740,7 @@ do map pointer if there is one, and fall through. */ classmap = ((xclassflags & XCL_MAP) == 0)? NULL : - (uint8_t *)(tcode + 1 + LINK_SIZE + 1); + (const uint8_t *)(tcode + 1 + LINK_SIZE + 1); /* In UTF-8 mode, scan the character list and set bits for leading bytes, then jump to handle the map. */ @@ -1608,6 +1752,13 @@ do PCRE2_SPTR p = tcode + 1 + LINK_SIZE + 1 + ((classmap == NULL)? 0:32); tcode += GET(tcode, 1); + if (*p >= XCL_LIST) + { + study_char_list(p, re->start_bitmap, + ((const uint8_t *)re + re->code_start)); + goto HANDLE_CLASSMAP; + } + for (;;) switch (*p++) { case XCL_SINGLE: @@ -1629,6 +1780,7 @@ do goto HANDLE_CLASSMAP; default: + PCRE2_DEBUG_UNREACHABLE(); return SSB_UNKNOWN; /* Internal error, should not occur */ } } @@ -1665,7 +1817,7 @@ do case OP_CLASS: if (*tcode == OP_XCLASS) tcode += GET(tcode, 1); else { - classmap = (uint8_t *)(++tcode); + classmap = (const uint8_t *)(++tcode); tcode += 32 / sizeof(PCRE2_UCHAR); } @@ -1768,8 +1920,7 @@ BOOL ucp = (re->overall_options & PCRE2_UCP) != 0; /* Find start of compiled code */ -code = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code)) + - re->name_entry_size * re->name_count; +code = (PCRE2_UCHAR *)((uint8_t *)re + re->code_start); /* For a pattern that has a first code unit, or a multiline pattern that matches only at "line start", there is no point in seeking a list of starting @@ -1779,7 +1930,11 @@ if ((re->flags & (PCRE2_FIRSTSET|PCRE2_STARTLINE)) == 0) { int depth = 0; int rc = set_start_bits(re, code, utf, ucp, &depth); - if (rc == SSB_UNKNOWN) return 1; + if (rc == SSB_UNKNOWN) + { + PCRE2_DEBUG_UNREACHABLE(); + return 1; + } /* If a list of starting code units was set up, scan the list to see if only one or two were listed. Having only one listed is rare because usually a @@ -1852,25 +2007,22 @@ if ((re->flags & (PCRE2_FIRSTSET|PCRE2_STARTLINE)) == 0) } } - /* Replace the start code unit bits with a first code unit, but only if it - is not the same as a required later code unit. This is because a search for - a required code unit starts after an explicit first code unit, but at a - code unit found from the bitmap. Patterns such as /a*a/ don't work - if both the start unit and required unit are the same. */ + /* Replace the start code unit bits with a first code unit. If it is the + same as a required later code unit, then clear the required later code + unit. This is because a search for a required code unit starts after an + explicit first code unit, but at a code unit found from the bitmap. + Patterns such as /a*a/ don't work if both the start unit and required + unit are the same. */ - if (a >= 0 && - ( - (re->flags & PCRE2_LASTSET) == 0 || - ( - re->last_codeunit != (uint32_t)a && - (b < 0 || re->last_codeunit != (uint32_t)b) - ) - )) - { + if (a >= 0) { + if ((re->flags & PCRE2_LASTSET) && (re->last_codeunit == (uint32_t)a || (b >= 0 && re->last_codeunit == (uint32_t)b))) { + re->flags &= ~(PCRE2_LASTSET | PCRE2_LASTCASELESS); + re->last_codeunit = 0; + } re->first_codeunit = a; flags = PCRE2_FIRSTSET; if (b >= 0) flags |= PCRE2_FIRSTCASELESS; - } + } DONE: re->flags |= flags; @@ -1898,9 +2050,11 @@ if ((re->flags & (PCRE2_MATCH_EMPTY|PCRE2_HASACCEPT)) == 0 && break; /* Leave minlength unchanged (will be zero) */ case -2: + PCRE2_DEBUG_UNREACHABLE(); return 2; /* missing capturing bracket */ case -3: + PCRE2_DEBUG_UNREACHABLE(); return 3; /* unrecognized opcode */ default: diff --git a/libpcre/src/pcre2_substitute.c b/libpcre/src/pcre2_substitute.c index edbb78c6d..17040ce5f 100644 --- a/libpcre/src/pcre2_substitute.c +++ b/libpcre/src/pcre2_substitute.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2022 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -130,17 +130,21 @@ for (; ptr < ptrend; ptr++) ptr += 1; /* Must point after \ */ erc = PRIV(check_escape)(&ptr, ptrend, &ch, &errorcode, - code->overall_options, code->extra_options, FALSE, NULL); + code->overall_options, code->extra_options, code->top_bracket, FALSE, NULL); ptr -= 1; /* Back to last code unit of escape */ if (errorcode != 0) { - rc = errorcode; + /* errorcode from check_escape is positive, so must not be returned by + pcre2_substitute(). */ + rc = PCRE2_ERROR_BADREPESCAPE; goto EXIT; } switch(erc) { case 0: /* Data character */ + case ESC_b: /* Data character */ + case ESC_v: /* Data character */ case ESC_E: /* Isolated \E is ignored */ break; @@ -148,7 +152,18 @@ for (; ptr < ptrend; ptr++) literal = TRUE; break; + case ESC_g: + /* The \g form (\g already handled by check_escape) + + Don't worry about finding the matching ">". We are super, super lenient + about validating ${} replacements inside find_text_end(), so we certainly + don't need to worry about other syntax. Importantly, a \g<..> or $<...> + sequence can't contain a '}' character. */ + break; + default: + if (erc < 0) + break; /* capture group reference */ rc = PCRE2_ERROR_BADREPESCAPE; goto EXIT; } @@ -163,6 +178,426 @@ return rc; } +/************************************************* +* Validate group name * +*************************************************/ + +/* This function scans for a capture group name, validating it +consists of legal characters, is not empty, and does not exceed +MAX_NAME_SIZE. + +Arguments: + ptrptr points to the pointer to the start of the text (updated) + ptrend end of the whole string + utf true if the input is UTF-encoded + ctypes pointer to the character types table + +Returns: TRUE if a name was read + FALSE otherwise +*/ + +static BOOL +read_name_subst(PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend, BOOL utf, + const uint8_t* ctypes) +{ +PCRE2_SPTR ptr = *ptrptr; +PCRE2_SPTR nameptr = ptr; + +if (ptr >= ptrend) /* No characters in name */ + goto FAILED; + +/* We do not need to check whether the name starts with a non-digit. +We are simply referencing names here, not defining them. */ + +/* See read_name in the pcre2_compile.c for the corresponding logic +restricting group names inside the pattern itself. */ + +#ifdef SUPPORT_UNICODE +if (utf) + { + uint32_t c, type; + + while (ptr < ptrend) + { + GETCHAR(c, ptr); + type = UCD_CHARTYPE(c); + if (type != ucp_Nd && PRIV(ucp_gentype)[type] != ucp_L && + c != CHAR_UNDERSCORE) break; + ptr++; + FORWARDCHARTEST(ptr, ptrend); + } + } +else +#else +(void)utf; /* Avoid compiler warning */ +#endif /* SUPPORT_UNICODE */ + +/* Handle group names in non-UTF modes. */ + + { + while (ptr < ptrend && MAX_255(*ptr) && (ctypes[*ptr] & ctype_word) != 0) + { + ptr++; + } + } + +/* Check name length */ + +if (ptr - nameptr > MAX_NAME_SIZE) + goto FAILED; + +/* Subpattern names must not be empty */ +if (ptr == nameptr) + goto FAILED; + +*ptrptr = ptr; +return TRUE; + +FAILED: +*ptrptr = ptr; +return FALSE; +} + + +/************************************************* +* Case transformations * +*************************************************/ + +#define PCRE2_SUBSTITUTE_CASE_NONE 0 +// 1, 2, 3 are PCRE2_SUBSTITUTE_CASE_LOWER, UPPER, TITLE_FIRST. +#define PCRE2_SUBSTITUTE_CASE_REVERSE_TITLE_FIRST 4 + +typedef struct { + int to_case; /* One of PCRE2_SUBSTITUTE_CASE_xyz */ + BOOL single_char; +} case_state; + +/* Helper to guess how much a string is likely to increase in size when +case-transformed. Usually, strings don't change size at all, but some rare +characters do grow. Estimate +10%, plus another few characters. + +Performing this estimation is unfortunate, but inevitable, since we can't call +the callout if we ran out of buffer space to prepare its input. + +Because this estimate is inexact (and in pathological cases, underestimates the +required buffer size) we must document that when you have a +substitute_case_callout, and you are using PCRE2_SUBSTITUTE_OVERFLOW_LENGTH, you +may need more than two calls to determine the final buffer size. */ + +static PCRE2_SIZE +pessimistic_case_inflation(PCRE2_SIZE len) +{ +return (len >> 3u) + 10; +} + +/* Case transformation behaviour if no callout is passed. */ + +static PCRE2_SIZE +default_substitute_case_callout( + PCRE2_SPTR input, PCRE2_SIZE input_len, + PCRE2_UCHAR *output, PCRE2_SIZE output_cap, + case_state *state, const pcre2_code *code) +{ +PCRE2_SPTR input_end = input + input_len; +#ifdef SUPPORT_UNICODE +BOOL utf; +BOOL ucp; +#endif +PCRE2_UCHAR temp[6]; +BOOL next_to_upper; +BOOL rest_to_upper; +BOOL single_char; +BOOL overflow = FALSE; +PCRE2_SIZE written = 0; + +/* Helpful simplifying invariant: input and output are disjoint buffers. +I believe that this code is technically undefined behaviour, because the two +pointers input/output are "unrelated" pointers and hence not comparable. Casting +via char* bypasses some but not all of those technical rules. It is not included +in release builds, in any case. */ +PCRE2_ASSERT((char *)(input + input_len) <= (char *)output || + (char *)(output + output_cap) <= (char *)input); + +#ifdef SUPPORT_UNICODE +utf = (code->overall_options & PCRE2_UTF) != 0; +ucp = (code->overall_options & PCRE2_UCP) != 0; +#endif + +if (input_len == 0) return 0; + +switch (state->to_case) + { + default: + PCRE2_DEBUG_UNREACHABLE(); + return 0; + + case PCRE2_SUBSTITUTE_CASE_LOWER: // Can be single_char TRUE or FALSE + case PCRE2_SUBSTITUTE_CASE_UPPER: // Can only be single_char FALSE + next_to_upper = rest_to_upper = (state->to_case == PCRE2_SUBSTITUTE_CASE_UPPER); + break; + + case PCRE2_SUBSTITUTE_CASE_TITLE_FIRST: // Can be single_char TRUE or FALSE + next_to_upper = TRUE; + rest_to_upper = FALSE; + state->to_case = PCRE2_SUBSTITUTE_CASE_LOWER; + break; + + case PCRE2_SUBSTITUTE_CASE_REVERSE_TITLE_FIRST: // Can only be single_char FALSE + next_to_upper = FALSE; + rest_to_upper = TRUE; + state->to_case = PCRE2_SUBSTITUTE_CASE_UPPER; + break; + } + +single_char = state->single_char; +if (single_char) + state->to_case = PCRE2_SUBSTITUTE_CASE_NONE; + +while (input < input_end) + { + uint32_t ch; + unsigned int chlen; + + GETCHARINCTEST(ch, input); + +#ifdef SUPPORT_UNICODE + if ((utf || ucp) && ch >= 128) + { + uint32_t type = UCD_CHARTYPE(ch); + if (PRIV(ucp_gentype)[type] == ucp_L && + type != (next_to_upper? ucp_Lu : ucp_Ll)) + ch = UCD_OTHERCASE(ch); + + /* TODO This is far from correct... it doesn't support the SpecialCasing.txt + mappings, but worse, it's not even correct for all the ordinary case + mappings. We should add support for those (at least), and then add the + SpecialCasing.txt mappings for Esszet and ligatures, and finally use the + Turkish casing flag on the match context. */ + } + else +#endif + if (MAX_255(ch)) + { + if (((code->tables + cbits_offset + + (next_to_upper? cbit_upper:cbit_lower) + )[ch/8] & (1u << (ch%8))) == 0) + ch = (code->tables + fcc_offset)[ch]; + } + +#ifdef SUPPORT_UNICODE + if (utf) chlen = PRIV(ord2utf)(ch, temp); else +#endif + { + temp[0] = ch; + chlen = 1; + } + + if (!overflow && chlen <= output_cap) + { + memcpy(output, temp, CU2BYTES(chlen)); + output += chlen; + output_cap -= chlen; + } + else + { + overflow = TRUE; + } + + if (chlen > ~(PCRE2_SIZE)0 - written) /* Integer overflow */ + return ~(PCRE2_SIZE)0; + written += chlen; + + next_to_upper = rest_to_upper; + + /* memcpy the remainder, if only transforming a single character. */ + + if (single_char) + { + PCRE2_SIZE rest_len = input_end - input; + + if (!overflow && rest_len <= output_cap) + memcpy(output, input, CU2BYTES(rest_len)); + + if (rest_len > ~(PCRE2_SIZE)0 - written) /* Integer overflow */ + return ~(PCRE2_SIZE)0; + written += rest_len; + + return written; + } + } + +return written; +} + +/* Helper to perform the call to the substitute_case_callout. We wrap the +user-provided callout because our internal arguments are slightly extended. We +don't want the user callout to handle the case of "\l" (first character only to +lowercase) or "\l\U" (first character to lowercase, rest to uppercase) because +those are not operations defined by Unicode. Instead the user callout simply +needs to provide the three Unicode primitives: lower, upper, titlecase. */ + +static PCRE2_SIZE +do_case_copy( + PCRE2_UCHAR *input_output, PCRE2_SIZE input_len, PCRE2_SIZE output_cap, + case_state *state, BOOL utf, + PCRE2_SIZE (*substitute_case_callout)(PCRE2_SPTR, PCRE2_SIZE, PCRE2_UCHAR *, + PCRE2_SIZE, int, void *), + void *substitute_case_callout_data) +{ +PCRE2_SPTR input = input_output; +PCRE2_UCHAR *output = input_output; +PCRE2_SIZE rc; +PCRE2_SIZE rc2; +int ch1_to_case; +int rest_to_case; +PCRE2_UCHAR ch1[6]; +PCRE2_SIZE ch1_len; +PCRE2_SPTR rest; +PCRE2_SIZE rest_len; +BOOL ch1_overflow = FALSE; +BOOL rest_overflow = FALSE; + +#if PCRE2_CODE_UNIT_WIDTH == 32 || !defined(SUPPORT_UNICODE) +(void)utf; /* Avoid compiler warning. */ +#endif + +PCRE2_ASSERT(input_len != 0); + +switch (state->to_case) + { + default: + PCRE2_DEBUG_UNREACHABLE(); + return 0; + + case PCRE2_SUBSTITUTE_CASE_LOWER: // Can be single_char TRUE or FALSE + case PCRE2_SUBSTITUTE_CASE_UPPER: // Can only be single_char FALSE + case PCRE2_SUBSTITUTE_CASE_TITLE_FIRST: // Can be single_char TRUE or FALSE + + /* The easy case, where our internal casing operations align with those of + the callout. */ + + if (state->single_char == FALSE) + { + rc = substitute_case_callout(input, input_len, output, output_cap, + state->to_case, substitute_case_callout_data); + + if (state->to_case == PCRE2_SUBSTITUTE_CASE_TITLE_FIRST) + state->to_case = PCRE2_SUBSTITUTE_CASE_LOWER; + + return rc; + } + + ch1_to_case = state->to_case; + rest_to_case = PCRE2_SUBSTITUTE_CASE_NONE; + break; + + case PCRE2_SUBSTITUTE_CASE_REVERSE_TITLE_FIRST: // Can only be single_char FALSE + ch1_to_case = PCRE2_SUBSTITUTE_CASE_LOWER; + rest_to_case = PCRE2_SUBSTITUTE_CASE_UPPER; + break; + } + +/* Identify the leading character. Take copy, because its storage overlaps with +`output`, and hence may be scrambled by the callout. */ + + { + PCRE2_SPTR ch_end = input; + uint32_t ch; + + GETCHARINCTEST(ch, ch_end); + (void) ch; + PCRE2_ASSERT(ch_end <= input + input_len && ch_end - input <= 6); + ch1_len = ch_end - input; + memcpy(ch1, input, CU2BYTES(ch1_len)); + } + +rest = input + ch1_len; +rest_len = input_len - ch1_len; + +/* Transform just ch1. The buffers are always in-place (input == output). With a +custom callout, we need a loop to discover its required buffer size. The loop +wouldn't be required if the callout were well-behaved, but it might be naughty +and return "5" the first time, then "10" the next time we call it using the +exact same input! */ + + { + PCRE2_SIZE ch1_cap; + PCRE2_SIZE max_ch1_cap; + + ch1_cap = ch1_len; /* First attempt uses the space vacated by ch1. */ + PCRE2_ASSERT(output_cap >= input_len && input_len >= rest_len); + max_ch1_cap = output_cap - rest_len; + + while (TRUE) + { + rc = substitute_case_callout(ch1, ch1_len, output, ch1_cap, ch1_to_case, + substitute_case_callout_data); + if (rc == ~(PCRE2_SIZE)0) return rc; + + if (rc <= ch1_cap) break; + + if (rc > max_ch1_cap) + { + ch1_overflow = TRUE; + break; + } + + /* Move the rest to the right, to make room for expanding ch1. */ + + memmove(input_output + rc, rest, CU2BYTES(rest_len)); + rest = input + rc; + + ch1_cap = rc; + + /* Proof of loop termination: `ch1_cap` is growing on each iteration, but + the loop ends if `rc` reaches the (unchanging) upper bound of output_cap. */ + } + } + +if (rest_to_case == PCRE2_SUBSTITUTE_CASE_NONE) + { + if (!ch1_overflow) + { + PCRE2_ASSERT(rest_len <= output_cap - rc); + memmove(output + rc, rest, CU2BYTES(rest_len)); + } + rc2 = rest_len; + + state->to_case = PCRE2_SUBSTITUTE_CASE_NONE; + } +else + { + PCRE2_UCHAR dummy[1]; + + rc2 = substitute_case_callout(rest, rest_len, + ch1_overflow? dummy : output + rc, + ch1_overflow? 0u : output_cap - rc, + rest_to_case, substitute_case_callout_data); + if (rc2 == ~(PCRE2_SIZE)0) return rc2; + + if (!ch1_overflow && rc2 > output_cap - rc) rest_overflow = TRUE; + + /* If ch1 grows so that `xform(ch1)+rest` can't fit in the buffer, but then + `rest` shrinks, it's actually possible for the total calculated length of + `xform(ch1)+xform(rest)` to come out at less than output_cap. But we can't + report that, because it would make it seem that the operation succeeded. + If either of xform(ch1) or xform(rest) won't fit in the buffer, our final + result must be > output_cap. */ + if (ch1_overflow && rc2 < rest_len) + rc2 = rest_len; + + state->to_case = PCRE2_SUBSTITUTE_CASE_UPPER; + } + +if (rc2 > ~(PCRE2_SIZE)0 - rc) /* Integer overflow */ + return ~(PCRE2_SIZE)0; + +PCRE2_ASSERT(!(ch1_overflow || rest_overflow) || rc + rc2 > output_cap); +(void)rest_overflow; + +return rc + rc2; +} + /************************************************* * Match and substitute * @@ -194,25 +629,107 @@ Returns: >= 0 number of substitutions made overflow, either give an error immediately, or keep on, accumulating the length. */ -#define CHECKMEMCPY(from,length) \ - { \ - if (!overflowed && lengthleft < length) \ - { \ - if ((suboptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) == 0) goto NOROOM; \ - overflowed = TRUE; \ - extra_needed = length - lengthleft; \ - } \ - else if (overflowed) \ - { \ - extra_needed += length; \ - } \ - else \ - { \ - memcpy(buffer + buff_offset, from, CU2BYTES(length)); \ - buff_offset += length; \ - lengthleft -= length; \ - } \ - } +#define CHECKMEMCPY(from, length_) \ + do { \ + PCRE2_SIZE chkmc_length = length_; \ + if (overflowed) \ + { \ + if (chkmc_length > ~(PCRE2_SIZE)0 - extra_needed) /* Integer overflow */ \ + goto TOOLARGEREPLACE; \ + extra_needed += chkmc_length; \ + } \ + else if (lengthleft < chkmc_length) \ + { \ + if ((suboptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) == 0) goto NOROOM; \ + overflowed = TRUE; \ + extra_needed = chkmc_length - lengthleft; \ + } \ + else \ + { \ + memcpy(buffer + buff_offset, from, CU2BYTES(chkmc_length)); \ + buff_offset += chkmc_length; \ + lengthleft -= chkmc_length; \ + } \ + } \ + while (0) + +/* This macro checks for space and copies characters with casing modifications. +On overflow, it behaves as for CHECKMEMCPY(). + +When substitute_case_callout is NULL, the source and destination buffers must +not overlap, because our default handler does not support this. */ + +#define CHECKCASECPY_BASE(length_, do_call) \ + do { \ + PCRE2_SIZE chkcc_length = (PCRE2_SIZE)(length_); \ + PCRE2_SIZE chkcc_rc; \ + do_call \ + if (lengthleft < chkcc_rc) \ + { \ + if ((suboptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) == 0) goto NOROOM; \ + overflowed = TRUE; \ + extra_needed = chkcc_rc - lengthleft; \ + } \ + else \ + { \ + buff_offset += chkcc_rc; \ + lengthleft -= chkcc_rc; \ + } \ + } \ + while (0) + +#define CHECKCASECPY_DEFAULT(from, length_) \ + CHECKCASECPY_BASE(length_, { \ + chkcc_rc = default_substitute_case_callout(from, chkcc_length, \ + buffer + buff_offset, \ + overflowed? 0 : lengthleft, \ + &forcecase, code); \ + if (overflowed) \ + { \ + if (chkcc_rc > ~(PCRE2_SIZE)0 - extra_needed) /* Integer overflow */ \ + goto TOOLARGEREPLACE; \ + extra_needed += chkcc_rc; \ + break; \ + } \ + }) + +#define CHECKCASECPY_CALLOUT(length_) \ + CHECKCASECPY_BASE(length_, { \ + chkcc_rc = do_case_copy(buffer + buff_offset, chkcc_length, \ + lengthleft, &forcecase, utf, \ + substitute_case_callout, \ + substitute_case_callout_data); \ + if (chkcc_rc == ~(PCRE2_SIZE)0) goto CASEERROR; \ + }) + +/* This macro does a delayed case transformation, for the situation when we have +a case-forcing callout. */ + +#define DELAYEDFORCECASE() \ + do { \ + PCRE2_SIZE chars_outstanding = (buff_offset - casestart_offset) + \ + (extra_needed - casestart_extra_needed); \ + if (chars_outstanding > 0) \ + { \ + if (overflowed) \ + { \ + PCRE2_SIZE guess = pessimistic_case_inflation(chars_outstanding); \ + if (guess > ~(PCRE2_SIZE)0 - extra_needed) /* Integer overflow */ \ + goto TOOLARGEREPLACE; \ + extra_needed += guess; \ + } \ + else \ + { \ + /* Rewind the buffer */ \ + lengthleft += (buff_offset - casestart_offset); \ + buff_offset = casestart_offset; \ + /* Care! In-place case transformation */ \ + CHECKCASECPY_CALLOUT(chars_outstanding); \ + } \ + } \ + } \ + while (0) + /* Here's the function */ @@ -224,8 +741,6 @@ pcre2_substitute(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length, { int rc; int subs; -int forcecase = 0; -int forcecasereset = 0; uint32_t ovector_count; uint32_t goptions = 0; uint32_t suboptions; @@ -234,18 +749,19 @@ BOOL escaped_literal = FALSE; BOOL overflowed = FALSE; BOOL use_existing_match; BOOL replacement_only; -#ifdef SUPPORT_UNICODE BOOL utf = (code->overall_options & PCRE2_UTF) != 0; -BOOL ucp = (code->overall_options & PCRE2_UCP) != 0; -#endif PCRE2_UCHAR temp[6]; PCRE2_SPTR ptr; -PCRE2_SPTR repend; +PCRE2_SPTR repend = NULL; PCRE2_SIZE extra_needed = 0; PCRE2_SIZE buff_offset, buff_length, lengthleft, fraglength; PCRE2_SIZE *ovector; PCRE2_SIZE ovecsave[3]; pcre2_substitute_callout_block scb; +PCRE2_SIZE sub_start_extra_needed; +PCRE2_SIZE (*substitute_case_callout)(PCRE2_SPTR, PCRE2_SIZE, PCRE2_UCHAR *, + PCRE2_SIZE, int, void *) = NULL; +void *substitute_case_callout_data = NULL; /* General initialization */ @@ -254,6 +770,12 @@ lengthleft = buff_length = *blength; *blength = PCRE2_UNSET; ovecsave[0] = ovecsave[1] = ovecsave[2] = PCRE2_UNSET; +if (mcontext != NULL) + { + substitute_case_callout = mcontext->substitute_case_callout; + substitute_case_callout_data = mcontext->substitute_case_callout_data; + } + /* Partial matching is not valid. This must come after setting *blength to PCRE2_UNSET, so as not to imply an offset in the replacement. */ @@ -286,27 +808,34 @@ case, we copy the existing match into the internal block, except for any cached heap frame size and pointer. This ensures that no changes are made to the external match data block. */ +/* WARNING: In both cases below a general context is constructed "by hand" +because calling pcre2_general_context_create() involves a memory allocation. If +the contents of a general context control block are ever changed there will +have to be changes below. */ + if (match_data == NULL) { - pcre2_general_context *gcontext; + pcre2_general_context gcontext; if (use_existing_match) return PCRE2_ERROR_NULL; - gcontext = (mcontext == NULL)? - (pcre2_general_context *)code : - (pcre2_general_context *)mcontext; + gcontext.memctl = (mcontext == NULL)? + ((const pcre2_real_code *)code)->memctl : + ((pcre2_real_match_context *)mcontext)->memctl; match_data = internal_match_data = - pcre2_match_data_create_from_pattern(code, gcontext); + pcre2_match_data_create_from_pattern(code, &gcontext); if (internal_match_data == NULL) return PCRE2_ERROR_NOMEMORY; } else if (use_existing_match) { - pcre2_general_context *gcontext = (mcontext == NULL)? - (pcre2_general_context *)code : - (pcre2_general_context *)mcontext; - int pairs = (code->top_bracket + 1 < match_data->oveccount)? + int pairs; + pcre2_general_context gcontext; + gcontext.memctl = (mcontext == NULL)? + ((const pcre2_real_code *)code)->memctl : + ((pcre2_real_match_context *)mcontext)->memctl; + pairs = (code->top_bracket + 1 < match_data->oveccount)? code->top_bracket + 1 : match_data->oveccount; internal_match_data = pcre2_match_data_create(match_data->oveccount, - gcontext); + &gcontext); if (internal_match_data == NULL) return PCRE2_ERROR_NOMEMORY; memcpy(internal_match_data, match_data, offsetof(pcre2_match_data, ovector) + 2*pairs*sizeof(PCRE2_SIZE)); @@ -380,6 +909,9 @@ do { PCRE2_SPTR ptrstack[PTR_STACK_SIZE]; uint32_t ptrstackptr = 0; + case_state forcecase = { PCRE2_SUBSTITUTE_CASE_NONE, FALSE }; + PCRE2_SIZE casestart_offset = 0; + PCRE2_SIZE casestart_extra_needed = 0; if (use_existing_match) { @@ -412,8 +944,9 @@ do save_start = start_offset++; if (subject[start_offset-1] == CHAR_CR && - code->newline_convention != PCRE2_NEWLINE_CR && - code->newline_convention != PCRE2_NEWLINE_LF && + (code->newline_convention == PCRE2_NEWLINE_CRLF || + code->newline_convention == PCRE2_NEWLINE_ANY || + code->newline_convention == PCRE2_NEWLINE_ANYCRLF) && start_offset < length && subject[start_offset] == CHAR_LF) start_offset++; @@ -480,14 +1013,16 @@ do } subs++; - /* Copy the text leading up to the match (unless not required), and remember - where the insert begins and how many ovector pairs are set. */ + /* Copy the text leading up to the match (unless not required); remember + where the insert begins and how many ovector pairs are set; and remember how + much space we have requested in extra_needed. */ if (rc == 0) rc = ovector_count; fraglength = ovector[0] - start_offset; if (!replacement_only) CHECKMEMCPY(subject + start_offset, fraglength); scb.output_offsets[0] = buff_offset; scb.oveccount = rc; + sub_start_extra_needed = extra_needed; /* Process the replacement string. If the entire replacement is literal, just copy it with length check. */ @@ -507,6 +1042,13 @@ do { uint32_t ch; unsigned int chlen; + int group; + uint32_t special; + PCRE2_SPTR text1_start = NULL; + PCRE2_SPTR text1_end = NULL; + PCRE2_SPTR text2_start = NULL; + PCRE2_SPTR text2_end = NULL; + PCRE2_UCHAR name[MAX_NAME_SIZE + 1]; /* If at the end of a nested substring, pop the stack. */ @@ -535,25 +1077,62 @@ do if (*ptr == CHAR_DOLLAR_SIGN) { - int group, n; - uint32_t special = 0; BOOL inparens; + BOOL inangle; BOOL star; PCRE2_SIZE sublength; - PCRE2_SPTR text1_start = NULL; - PCRE2_SPTR text1_end = NULL; - PCRE2_SPTR text2_start = NULL; - PCRE2_SPTR text2_end = NULL; PCRE2_UCHAR next; - PCRE2_UCHAR name[33]; + PCRE2_SPTR subptr, subptrend; if (++ptr >= repend) goto BAD; if ((next = *ptr) == CHAR_DOLLAR_SIGN) goto LOADLITERAL; + special = 0; + text1_start = NULL; + text1_end = NULL; + text2_start = NULL; + text2_end = NULL; group = -1; - n = 0; inparens = FALSE; + inangle = FALSE; star = FALSE; + subptr = NULL; + subptrend = NULL; + + /* Special $ sequences, as supported by Perl, JavaScript, .NET and others. */ + if (next == CHAR_AMPERSAND) + { + ++ptr; + group = 0; + goto GROUP_SUBSTITUTE; + } + if (next == CHAR_GRAVE_ACCENT || next == CHAR_APOSTROPHE) + { + ++ptr; + rc = pcre2_substring_length_bynumber(match_data, 0, &sublength); + if (rc < 0) goto PTREXIT; /* (Sanity-check ovector before reading from it.) */ + + if (next == CHAR_GRAVE_ACCENT) + { + subptr = subject; + subptrend = subject + ovector[0]; + } + else + { + subptr = subject + ovector[1]; + subptrend = subject + length; + } + + goto SUBPTR_SUBSTITUTE; + } + if (next == CHAR_UNDERSCORE) + { + /* Java, .NET support $_ for "entire input string". */ + ++ptr; + subptr = subject; + subptrend = subject + length; + goto SUBPTR_SUBSTITUTE; + } if (next == CHAR_LEFT_CURLY_BRACKET) { @@ -561,22 +1140,31 @@ do next = *ptr; inparens = TRUE; } + else if (next == CHAR_LESS_THAN_SIGN) + { + /* JavaScript compatibility syntax, $. Processes only named + groups (not numbered) and does not support extensions such as star + (you can do ${name} and ${*name}, but not $<*name>). */ + if (++ptr >= repend) goto BAD; + next = *ptr; + inangle = TRUE; + } - if (next == CHAR_ASTERISK) + if (!inangle && next == CHAR_ASTERISK) { if (++ptr >= repend) goto BAD; next = *ptr; star = TRUE; } - if (!star && next >= CHAR_0 && next <= CHAR_9) + if (!star && !inangle && next >= CHAR_0 && next <= CHAR_9) { group = next - CHAR_0; while (++ptr < repend) { next = *ptr; if (next < CHAR_0 || next > CHAR_9) break; - group = group * 10 + next - CHAR_0; + group = group * 10 + (next - CHAR_0); /* A check for a number greater than the hightest captured group is sufficient here; no need for a separate overflow check. If unknown @@ -600,25 +1188,25 @@ do } else { - const uint8_t *ctypes = code->tables + ctypes_offset; - while (MAX_255(next) && (ctypes[next] & ctype_word) != 0) - { - name[n++] = next; - if (n > 32) goto BAD; - if (++ptr >= repend) break; - next = *ptr; - } - if (n == 0) goto BAD; - name[n] = 0; + PCRE2_SIZE name_len; + PCRE2_SPTR name_start = ptr; + if (!read_name_subst(&ptr, repend, utf, code->tables + ctypes_offset)) + goto BAD; + name_len = ptr - name_start; + memcpy(name, name_start, CU2BYTES(name_len)); + name[name_len] = 0; } + next = 0; /* not used or updated after this point */ + (void)next; + /* In extended mode we recognize ${name:+set text:unset text} and ${name:-default text}. */ if (inparens) { if ((suboptions & PCRE2_SUBSTITUTE_EXTENDED) != 0 && - !star && ptr < repend - 2 && next == CHAR_COLON) + !star && ptr < repend - 2 && *ptr == CHAR_COLON) { special = *(++ptr); if (special != CHAR_PLUS && special != CHAR_MINUS) @@ -653,6 +1241,13 @@ do ptr++; } + if (inangle) + { + if (ptr >= repend || *ptr != CHAR_GREATER_THAN_SIGN) + goto BAD; + ptr++; + } + /* Have found a syntactically correct group number or name, or *name. Only *MARK is currently recognized. */ @@ -663,10 +1258,14 @@ do PCRE2_SPTR mark = pcre2_get_mark(match_data); if (mark != NULL) { - PCRE2_SPTR mark_start = mark; - while (*mark != 0) mark++; - fraglength = mark - mark_start; - CHECKMEMCPY(mark_start, fraglength); + /* Peek backwards one code unit to obtain the length of the mark. + It can (theoretically) contain an embedded NUL. */ + fraglength = mark[-1]; + if (forcecase.to_case != PCRE2_SUBSTITUTE_CASE_NONE && + substitute_case_callout == NULL) + CHECKCASECPY_DEFAULT(mark, fraglength); + else + CHECKMEMCPY(mark, fraglength); } } else goto BAD; @@ -677,8 +1276,7 @@ do else { - PCRE2_SPTR subptr, subptrend; - + GROUP_SUBSTITUTE: /* Find a number for a named group. In case there are duplicate names, search for the first one that is set. If the name is not found when PCRE2_SUBSTITUTE_UNKNOWN_EMPTY is set, set the group number to a @@ -775,41 +1373,14 @@ do /* Substitute a literal string, possibly forcing alphabetic case. */ - while (subptr < subptrend) - { - GETCHARINCTEST(ch, subptr); - if (forcecase != 0) - { -#ifdef SUPPORT_UNICODE - if (utf || ucp) - { - uint32_t type = UCD_CHARTYPE(ch); - if (PRIV(ucp_gentype)[type] == ucp_L && - type != ((forcecase > 0)? ucp_Lu : ucp_Ll)) - ch = UCD_OTHERCASE(ch); - } - else -#endif - { - if (((code->tables + cbits_offset + - ((forcecase > 0)? cbit_upper:cbit_lower) - )[ch/8] & (1u << (ch%8))) == 0) - ch = (code->tables + fcc_offset)[ch]; - } - forcecase = forcecasereset; - } - -#ifdef SUPPORT_UNICODE - if (utf) chlen = PRIV(ord2utf)(ch, temp); else -#endif - { - temp[0] = ch; - chlen = 1; - } - CHECKMEMCPY(temp, chlen); - } + SUBPTR_SUBSTITUTE: + if (forcecase.to_case != PCRE2_SUBSTITUTE_CASE_NONE && + substitute_case_callout == NULL) + CHECKCASECPY_DEFAULT(subptr, subptrend - subptr); + else + CHECKMEMCPY(subptr, subptrend - subptr); } - } + } /* End of $ processing */ /* Handle an escape sequence in extended mode. We can use check_escape() to process \Q, \E, \c, \o, \x and \ followed by non-alphanumerics, but @@ -820,123 +1391,239 @@ do *ptr == CHAR_BACKSLASH) { int errorcode; + case_state new_forcecase = { PCRE2_SUBSTITUTE_CASE_NONE, FALSE }; if (ptr < repend - 1) switch (ptr[1]) { case CHAR_L: - forcecase = forcecasereset = -1; + new_forcecase.to_case = PCRE2_SUBSTITUTE_CASE_LOWER; + new_forcecase.single_char = FALSE; ptr += 2; - continue; + break; case CHAR_l: - forcecase = -1; - forcecasereset = 0; + new_forcecase.to_case = PCRE2_SUBSTITUTE_CASE_LOWER; + new_forcecase.single_char = TRUE; ptr += 2; - continue; + if (ptr + 2 < repend && ptr[0] == CHAR_BACKSLASH && ptr[1] == CHAR_U) + { + /* Perl reverse-title-casing feature for \l\U */ + new_forcecase.to_case = PCRE2_SUBSTITUTE_CASE_REVERSE_TITLE_FIRST; + new_forcecase.single_char = FALSE; + ptr += 2; + } + break; case CHAR_U: - forcecase = forcecasereset = 1; + new_forcecase.to_case = PCRE2_SUBSTITUTE_CASE_UPPER; + new_forcecase.single_char = FALSE; ptr += 2; - continue; + break; case CHAR_u: - forcecase = 1; - forcecasereset = 0; + new_forcecase.to_case = PCRE2_SUBSTITUTE_CASE_TITLE_FIRST; + new_forcecase.single_char = TRUE; ptr += 2; - continue; + if (ptr + 2 < repend && ptr[0] == CHAR_BACKSLASH && ptr[1] == CHAR_L) + { + /* Perl title-casing feature for \u\L */ + new_forcecase.to_case = PCRE2_SUBSTITUTE_CASE_TITLE_FIRST; + new_forcecase.single_char = FALSE; + ptr += 2; + } + break; default: break; } + if (new_forcecase.to_case != PCRE2_SUBSTITUTE_CASE_NONE) + { + SETFORCECASE: + + /* If the substitute_case_callout is unset, our case-forcing is done + immediately. If there is a callout however, then its action is delayed + until all the characters have been collected. + + Apply the callout now, before we set the new casing mode. */ + + if (substitute_case_callout != NULL && + forcecase.to_case != PCRE2_SUBSTITUTE_CASE_NONE) + DELAYEDFORCECASE(); + + forcecase = new_forcecase; + casestart_offset = buff_offset; + casestart_extra_needed = extra_needed; + continue; + } + ptr++; /* Point after \ */ rc = PRIV(check_escape)(&ptr, repend, &ch, &errorcode, - code->overall_options, code->extra_options, FALSE, NULL); + code->overall_options, code->extra_options, code->top_bracket, FALSE, NULL); if (errorcode != 0) goto BADESCAPE; switch(rc) { case ESC_E: - forcecase = forcecasereset = 0; - continue; + goto SETFORCECASE; case ESC_Q: escaped_literal = TRUE; continue; case 0: /* Data character */ - goto LITERAL; + case ESC_b: /* \b is backspace in a substitution */ + case ESC_v: /* \v is vertical tab in a substitution */ + + if (rc == ESC_b) ch = CHAR_BS; + if (rc == ESC_v) ch = CHAR_VT; + +#ifdef SUPPORT_UNICODE + if (utf) chlen = PRIV(ord2utf)(ch, temp); else +#endif + { + temp[0] = ch; + chlen = 1; + } + + if (forcecase.to_case != PCRE2_SUBSTITUTE_CASE_NONE && + substitute_case_callout == NULL) + CHECKCASECPY_DEFAULT(temp, chlen); + else + CHECKMEMCPY(temp, chlen); + continue; + + case ESC_g: + { + PCRE2_SIZE name_len; + PCRE2_SPTR name_start; + + /* Parse the \g form (\g already handled by check_escape) */ + if (ptr >= repend || *ptr != CHAR_LESS_THAN_SIGN) + goto BADESCAPE; + ++ptr; + + name_start = ptr; + if (!read_name_subst(&ptr, repend, utf, code->tables + ctypes_offset)) + goto BADESCAPE; + name_len = ptr - name_start; + + if (ptr >= repend || *ptr != CHAR_GREATER_THAN_SIGN) + goto BADESCAPE; + ++ptr; + + special = 0; + group = -1; + memcpy(name, name_start, CU2BYTES(name_len)); + name[name_len] = 0; + goto GROUP_SUBSTITUTE; + } default: + if (rc < 0) + { + special = 0; + group = -rc - 1; + goto GROUP_SUBSTITUTE; + } goto BADESCAPE; } - } + } /* End of backslash processing */ /* Handle a literal code unit */ else { + PCRE2_SPTR ch_start; + LOADLITERAL: + ch_start = ptr; GETCHARINCTEST(ch, ptr); /* Get character value, increment pointer */ + (void) ch; - LITERAL: - if (forcecase != 0) - { -#ifdef SUPPORT_UNICODE - if (utf || ucp) - { - uint32_t type = UCD_CHARTYPE(ch); - if (PRIV(ucp_gentype)[type] == ucp_L && - type != ((forcecase > 0)? ucp_Lu : ucp_Ll)) - ch = UCD_OTHERCASE(ch); - } - else -#endif - { - if (((code->tables + cbits_offset + - ((forcecase > 0)? cbit_upper:cbit_lower) - )[ch/8] & (1u << (ch%8))) == 0) - ch = (code->tables + fcc_offset)[ch]; - } - forcecase = forcecasereset; - } - -#ifdef SUPPORT_UNICODE - if (utf) chlen = PRIV(ord2utf)(ch, temp); else -#endif - { - temp[0] = ch; - chlen = 1; - } - CHECKMEMCPY(temp, chlen); + if (forcecase.to_case != PCRE2_SUBSTITUTE_CASE_NONE && + substitute_case_callout == NULL) + CHECKCASECPY_DEFAULT(ch_start, ptr - ch_start); + else + CHECKMEMCPY(ch_start, ptr - ch_start); } /* End handling a literal code unit */ } /* End of loop for scanning the replacement. */ + /* If the substitute_case_callout is unset, our case-forcing is done + immediately. If there is a callout however, then its action is delayed + until all the characters have been collected. + + We now clean up any trailing section of the replacement for which we deferred + the case-forcing. */ + + if (substitute_case_callout != NULL && + forcecase.to_case != PCRE2_SUBSTITUTE_CASE_NONE) + DELAYEDFORCECASE(); + /* The replacement has been copied to the output, or its size has been - remembered. Do the callout if there is one and we have done an actual - replacement. */ + remembered. Handle the callout if there is one. */ - if (!overflowed && mcontext != NULL && mcontext->substitute_callout != NULL) + if (mcontext != NULL && mcontext->substitute_callout != NULL) { - scb.subscount = subs; - scb.output_offsets[1] = buff_offset; - rc = mcontext->substitute_callout(&scb, mcontext->substitute_callout_data); + /* If we an actual (non-simulated) replacement, do the callout. */ - /* A non-zero return means cancel this substitution. Instead, copy the - matched string fragment. */ - - if (rc != 0) + if (!overflowed) { - PCRE2_SIZE newlength = scb.output_offsets[1] - scb.output_offsets[0]; + scb.subscount = subs; + scb.output_offsets[1] = buff_offset; + rc = mcontext->substitute_callout(&scb, + mcontext->substitute_callout_data); + + /* A non-zero return means cancel this substitution. Instead, copy the + matched string fragment. */ + + if (rc != 0) + { + PCRE2_SIZE newlength = scb.output_offsets[1] - scb.output_offsets[0]; + PCRE2_SIZE oldlength = ovector[1] - ovector[0]; + + buff_offset -= newlength; + lengthleft += newlength; + if (!replacement_only) CHECKMEMCPY(subject + ovector[0], oldlength); + + /* A negative return means do not do any more. */ + + if (rc < 0) suboptions &= (~PCRE2_SUBSTITUTE_GLOBAL); + } + } + + /* In this interesting case, we cannot do the callout, so it's hard to + estimate the required buffer size. What callers want is to be able to make + two calls to pcre2_substitute(), once with PCRE2_SUBSTITUTE_OVERFLOW_LENGTH + to discover the buffer size, and then a second and final call. Older + versions of PCRE2 violated this assumption, by proceding as if the callout + had returned zero - but on the second call to pcre2_substitute() it could + return non-zero and then overflow the buffer again. Callers probably don't + want to keep on looping to incrementally discover the buffer size. */ + + else + { + PCRE2_SIZE newlength_buf = buff_offset - scb.output_offsets[0]; + PCRE2_SIZE newlength_extra = extra_needed - sub_start_extra_needed; + PCRE2_SIZE newlength = + (newlength_extra > ~(PCRE2_SIZE)0 - newlength_buf)? /* Integer overflow */ + ~(PCRE2_SIZE)0 : newlength_buf + newlength_extra; /* Cap the addition */ PCRE2_SIZE oldlength = ovector[1] - ovector[0]; - buff_offset -= newlength; - lengthleft += newlength; - if (!replacement_only) CHECKMEMCPY(subject + ovector[0], oldlength); + /* Be pessimistic: request whichever buffer size is larger out of + accepting or rejecting the substitution. */ - /* A negative return means do not do any more. */ + if (oldlength > newlength) + { + PCRE2_SIZE additional = oldlength - newlength; + if (additional > ~(PCRE2_SIZE)0 - extra_needed) /* Integer overflow */ + goto TOOLARGEREPLACE; + extra_needed += additional; + } - if (rc < 0) suboptions &= (~PCRE2_SUBSTITUTE_GLOBAL); + /* Proceed as if the callout did not return a negative. A negative + effectively rejects all future substitutions, but we want to examine them + pessimistically. */ } } @@ -973,6 +1660,9 @@ needed. Otherwise, an overflow generates an immediate error return. */ if (overflowed) { rc = PCRE2_ERROR_NOMEMORY; + + if (extra_needed > ~(PCRE2_SIZE)0 - buff_length) /* Integer overflow */ + goto TOOLARGEREPLACE; *blength = buff_length + extra_needed; } @@ -994,6 +1684,14 @@ NOROOM: rc = PCRE2_ERROR_NOMEMORY; goto EXIT; +CASEERROR: +rc = PCRE2_ERROR_REPLACECASE; +goto EXIT; + +TOOLARGEREPLACE: +rc = PCRE2_ERROR_TOOLARGEREPLACE; +goto EXIT; + BAD: rc = PCRE2_ERROR_BADREPLACEMENT; goto PTREXIT; diff --git a/libpcre/src/pcre2_substring.c b/libpcre/src/pcre2_substring.c index 14e919dce..88afd2348 100644 --- a/libpcre/src/pcre2_substring.c +++ b/libpcre/src/pcre2_substring.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2023 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -486,7 +486,7 @@ pcre2_substring_nametable_scan(const pcre2_code *code, PCRE2_SPTR stringname, uint16_t bot = 0; uint16_t top = code->name_count; uint16_t entrysize = code->name_entry_size; -PCRE2_SPTR nametable = (PCRE2_SPTR)((char *)code + sizeof(pcre2_real_code)); +PCRE2_SPTR nametable = (PCRE2_SPTR)((const char *)code + sizeof(pcre2_real_code)); while (top > bot) { diff --git a/libpcre/src/pcre2_tables.c b/libpcre/src/pcre2_tables.c index e00252f1e..097a1acca 100644 --- a/libpcre/src/pcre2_tables.c +++ b/libpcre/src/pcre2_tables.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2021 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -171,9 +171,9 @@ are implementing). 6. Do not break after Prepend characters. 7. Do not break within emoji modifier sequences or emoji zwj sequences. That - is, do not break between characters with the Extended_Pictographic property. - Extend and ZWJ characters are allowed between the characters; this cannot be - represented in this table, the code has to deal with it. + is, do not break between characters with the Extended_Pictographic property + if a ZWJ intervenes. Extend characters are allowed between the characters; + this cannot be represented in this table, the code has to deal with it. 8. Do not break within emoji flag sequences. That is, do not break between regional indicator (RI) symbols if there are an odd number of RI characters @@ -203,8 +203,8 @@ const uint32_t PRIV(ucp_gbtable)[] = { ESZ|(1u< +#endif + +/* PCRE2_ASSERT(x) can be used to inject an assert() for conditions +that the code below doesn't support. It is a NOP for non debug builds +but in debug builds will print information about the location of the +code where it triggered and crash. + +It is meant to work like assert(), and therefore the expression used +should indicate what the expected state is, and shouldn't have any +side-effects. */ + +#if defined(HAVE_ASSERT_H) && !defined(NDEBUG) +#define PCRE2_ASSERT(x) assert(x) +#else +#define PCRE2_ASSERT(x) do \ +{ \ + if (!(x)) \ + { \ + fprintf(stderr, "Assertion failed at " __FILE__ ":%d\n", __LINE__); \ + abort(); \ + } \ +} while(0) +#endif + +/* PCRE2_UNREACHABLE() can be used to mark locations on the code that +shouldn't be reached. In non debug builds is defined as a hint for +the compiler to eliminate any code after it, so it is useful also for +performance reasons, but should be used with care because if it is +ever reached will trigger Undefined Behaviour and if you are lucky a +crash. In debug builds it will report the location where it was triggered +and crash. One important point to consider when using this macro, is +that it is only implemented for a few compilers, and therefore can't +be relied on to always be active either, so if it is followed by some +code it is important to make sure that the whole thing is safe to +use even if the macro is not there (ex: make sure there is a `break` +after it if used at the end of a `case`) and to test your code also +with a configuration where the macro will be a NOP. */ + +#if defined(HAVE_ASSERT_H) && !defined(NDEBUG) +#define PCRE2_UNREACHABLE() \ +assert(((void)"Execution reached unexpected point", 0)) +#else +#define PCRE2_UNREACHABLE() do \ +{ \ +fprintf(stderr, "Execution reached unexpected point at " __FILE__ \ + ":%d\n", __LINE__); \ +abort(); \ +} while(0) +#endif + +/* PCRE2_DEBUG_UNREACHABLE() is a debug only version of the previous +macro. It is meant to be used in places where the code is handling +an error situation in code that shouldn't be reached, but that has +some sort of fallback code to normally handle the error. When in +doubt you should use this instead of the previous macro. Like in +the previous case, it is a good idea to document as much as possible +the reason and the actions that should be taken if it ever triggers. */ + +#define PCRE2_DEBUG_UNREACHABLE() PCRE2_UNREACHABLE() + +#endif /* PCRE2_DEBUG */ + +#ifndef PCRE2_DEBUG_UNREACHABLE +#define PCRE2_DEBUG_UNREACHABLE() do {} while(0) +#endif + +#ifndef PCRE2_UNREACHABLE +#ifdef HAVE_BUILTIN_UNREACHABLE +#define PCRE2_UNREACHABLE() __builtin_unreachable() +#elif defined(HAVE_BUILTIN_ASSUME) +#define PCRE2_UNREACHABLE() __assume(0) +#else +#define PCRE2_UNREACHABLE() do {} while(0) +#endif +#endif /* !PCRE2_UNREACHABLE */ + +#ifndef PCRE2_ASSERT +#define PCRE2_ASSERT(x) do {} while(0) +#endif + +#endif /* PCRE2_UTIL_H_IDEMPOTENT_GUARD */ + +/* End of pcre2_util.h */ diff --git a/libpcre/src/pcre2_xclass.c b/libpcre/src/pcre2_xclass.c index 5df25d2c8..25de7cbf3 100644 --- a/libpcre/src/pcre2_xclass.c +++ b/libpcre/src/pcre2_xclass.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2023 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -38,9 +38,9 @@ POSSIBILITY OF SUCH DAMAGE. ----------------------------------------------------------------------------- */ -/* This module contains an internal function that is used to match an extended -class. It is used by pcre2_auto_possessify() and by both pcre2_match() and -pcre2_def_match(). */ +/* This module contains two internal functions that are used to match +OP_XCLASS and OP_ECLASS. It is used by pcre2_auto_possessify() and by both +pcre2_match() and pcre2_dfa_match(). */ #ifdef HAVE_CONFIG_H @@ -66,114 +66,75 @@ Returns: TRUE if character matches, else FALSE */ BOOL -PRIV(xclass)(uint32_t c, PCRE2_SPTR data, BOOL utf) +PRIV(xclass)(uint32_t c, PCRE2_SPTR data, const uint8_t *char_lists_end, BOOL utf) { +/* Update PRIV(update_classbits) when this function is changed. */ PCRE2_UCHAR t; -BOOL negated = (*data & XCL_NOT) != 0; +BOOL not_negated = (*data & XCL_NOT) == 0; +uint32_t type, max_index, min_index, value; +const uint8_t *next_char; #if PCRE2_CODE_UNIT_WIDTH == 8 /* In 8 bit mode, this must always be TRUE. Help the compiler to know that. */ utf = TRUE; #endif -/* Code points < 256 are matched against a bitmap, if one is present. If not, -we still carry on, because there may be ranges that start below 256 in the -additional data. */ +/* Code points < 256 are matched against a bitmap, if one is present. */ -if (c < 256) +if ((*data++ & XCL_MAP) != 0) { - if ((*data & XCL_HASPROP) == 0) - { - if ((*data & XCL_MAP) == 0) return negated; - return (((uint8_t *)(data + 1))[c/8] & (1u << (c&7))) != 0; - } - if ((*data & XCL_MAP) != 0 && - (((uint8_t *)(data + 1))[c/8] & (1u << (c&7))) != 0) - return !negated; /* char found */ + if (c < 256) + return (((const uint8_t *)data)[c/8] & (1u << (c&7))) != 0; + /* Skip bitmap. */ + data += 32 / sizeof(PCRE2_UCHAR); } -/* First skip the bit map if present. Then match against the list of Unicode -properties or large chars or ranges that end with a large char. We won't ever +/* Match against the list of Unicode properties. We won't ever encounter XCL_PROP or XCL_NOTPROP when UTF support is not compiled. */ - -if ((*data++ & XCL_MAP) != 0) data += 32 / sizeof(PCRE2_UCHAR); - -while ((t = *data++) != XCL_END) +#ifdef SUPPORT_UNICODE +if (*data == XCL_PROP || *data == XCL_NOTPROP) { - uint32_t x, y; - if (t == XCL_SINGLE) - { -#ifdef SUPPORT_UNICODE - if (utf) - { - GETCHARINC(x, data); /* macro generates multiple statements */ - } - else -#endif - x = *data++; - if (c == x) return !negated; - } - else if (t == XCL_RANGE) - { -#ifdef SUPPORT_UNICODE - if (utf) - { - GETCHARINC(x, data); /* macro generates multiple statements */ - GETCHARINC(y, data); /* macro generates multiple statements */ - } - else -#endif - { - x = *data++; - y = *data++; - } - if (c >= x && c <= y) return !negated; - } + /* The UCD record is the same for all properties. */ + const ucd_record *prop = GET_UCD(c); -#ifdef SUPPORT_UNICODE - else /* XCL_PROP & XCL_NOTPROP */ + do { int chartype; - const ucd_record *prop = GET_UCD(c); - BOOL isprop = t == XCL_PROP; + BOOL isprop = (*data++) == XCL_PROP; BOOL ok; switch(*data) { - case PT_ANY: - if (isprop) return !negated; - break; - case PT_LAMP: chartype = prop->chartype; if ((chartype == ucp_Lu || chartype == ucp_Ll || - chartype == ucp_Lt) == isprop) return !negated; + chartype == ucp_Lt) == isprop) return not_negated; break; case PT_GC: if ((data[1] == PRIV(ucp_gentype)[prop->chartype]) == isprop) - return !negated; + return not_negated; break; case PT_PC: - if ((data[1] == prop->chartype) == isprop) return !negated; + if ((data[1] == prop->chartype) == isprop) return not_negated; break; case PT_SC: - if ((data[1] == prop->script) == isprop) return !negated; + if ((data[1] == prop->script) == isprop) return not_negated; break; case PT_SCX: ok = (data[1] == prop->script || MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), data[1]) != 0); - if (ok == isprop) return !negated; + if (ok == isprop) return not_negated; break; case PT_ALNUM: chartype = prop->chartype; if ((PRIV(ucp_gentype)[chartype] == ucp_L || PRIV(ucp_gentype)[chartype] == ucp_N) == isprop) - return !negated; + return not_negated; break; /* Perl space used to exclude VT, but from Perl 5.18 it is included, @@ -186,12 +147,12 @@ while ((t = *data++) != XCL_END) { HSPACE_CASES: VSPACE_CASES: - if (isprop) return !negated; + if (isprop) return not_negated; break; default: if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == isprop) - return !negated; + return not_negated; break; } break; @@ -201,7 +162,7 @@ while ((t = *data++) != XCL_END) if ((PRIV(ucp_gentype)[chartype] == ucp_L || PRIV(ucp_gentype)[chartype] == ucp_N || chartype == ucp_Mn || chartype == ucp_Pc) == isprop) - return !negated; + return not_negated; break; case PT_UCNC: @@ -209,24 +170,24 @@ while ((t = *data++) != XCL_END) { if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT || c == CHAR_GRAVE_ACCENT) == isprop) - return !negated; + return not_negated; } else { if ((c < 0xd800 || c > 0xdfff) == isprop) - return !negated; + return not_negated; } break; case PT_BIDICL: if ((UCD_BIDICLASS_PROP(prop) == data[1]) == isprop) - return !negated; + return not_negated; break; case PT_BOOL: ok = MAPBIT(PRIV(ucd_boolprop_sets) + UCD_BPROPS_PROP(prop), data[1]) != 0; - if (ok == isprop) return !negated; + if (ok == isprop) return not_negated; break; /* The following three properties can occur only in an XCLASS, as there @@ -248,7 +209,7 @@ while ((t = *data++) != XCL_END) (chartype == ucp_Cf && c != 0x061c && c != 0x180e && (c < 0x2066 || c > 0x2069)) )) == isprop) - return !negated; + return not_negated; break; /* Printable character: same as graphic, with the addition of Zs, i.e. @@ -262,7 +223,7 @@ while ((t = *data++) != XCL_END) (chartype == ucp_Cf && c != 0x061c && (c < 0x2066 || c > 0x2069)) )) == isprop) - return !negated; + return not_negated; break; /* Punctuation: all Unicode punctuation, plus ASCII characters that @@ -273,7 +234,7 @@ while ((t = *data++) != XCL_END) chartype = prop->chartype; if ((PRIV(ucp_gentype)[chartype] == ucp_P || (c < 128 && PRIV(ucp_gentype)[chartype] == ucp_S)) == isprop) - return !negated; + return not_negated; break; /* Perl has two sets of hex digits */ @@ -285,24 +246,300 @@ while ((t = *data++) != XCL_END) (c >= 0xff10 && c <= 0xff19) || /* Fullwidth digits */ (c >= 0xff21 && c <= 0xff26) || /* Fullwidth letters */ (c >= 0xff41 && c <= 0xff46)) == isprop) - return !negated; + return not_negated; break; /* This should never occur, but compilers may mutter if there is no default. */ default: + PCRE2_DEBUG_UNREACHABLE(); return FALSE; } data += 2; } + while (*data == XCL_PROP || *data == XCL_NOTPROP); + } #else (void)utf; /* Avoid compiler warning */ #endif /* SUPPORT_UNICODE */ + +/* Match against large chars or ranges that end with a large char. */ +if (*data < XCL_LIST) + { + while ((t = *data++) != XCL_END) + { + uint32_t x, y; + +#ifdef SUPPORT_UNICODE + if (utf) + { + GETCHARINC(x, data); /* macro generates multiple statements */ + } + else +#endif + x = *data++; + + if (t == XCL_SINGLE) + { + /* Since character ranges follow the properties, and they are + sorted, early return is possible for all characters <= x. */ + if (c <= x) return (c == x) ? not_negated : !not_negated; + continue; + } + + PCRE2_ASSERT(t == XCL_RANGE); +#ifdef SUPPORT_UNICODE + if (utf) + { + GETCHARINC(y, data); /* macro generates multiple statements */ + } + else +#endif + y = *data++; + + /* Since character ranges follow the properties, and they are + sorted, early return is possible for all characters <= y. */ + if (c <= y) return (c >= x) ? not_negated : !not_negated; + } + + return !not_negated; /* char did not match */ } -return negated; /* char did not match */ +#if PCRE2_CODE_UNIT_WIDTH == 8 +type = (uint32_t)(data[0] << 8) | data[1]; +data += 2; +#else +type = data[0]; +data++; +#endif /* CODE_UNIT_WIDTH */ + +/* Align characters. */ +next_char = char_lists_end - (GET(data, 0) << 1); +type &= XCL_TYPE_MASK; + +/* Alignment check. */ +PCRE2_ASSERT(((uintptr_t)next_char & 0x1) == 0); + +if (c >= XCL_CHAR_LIST_HIGH_16_START) + { + max_index = type & XCL_ITEM_COUNT_MASK; + if (max_index == XCL_ITEM_COUNT_MASK) + { + max_index = *(const uint16_t*)next_char; + PCRE2_ASSERT(max_index >= XCL_ITEM_COUNT_MASK); + next_char += 2; + } + + next_char += max_index << 1; + type >>= XCL_TYPE_BIT_LEN; + } + +if (c < XCL_CHAR_LIST_LOW_32_START) + { + max_index = type & XCL_ITEM_COUNT_MASK; + + c = (uint16_t)((c << XCL_CHAR_SHIFT) | XCL_CHAR_END); + + if (max_index == XCL_ITEM_COUNT_MASK) + { + max_index = *(const uint16_t*)next_char; + PCRE2_ASSERT(max_index >= XCL_ITEM_COUNT_MASK); + next_char += 2; + } + + if (max_index == 0 || c < *(const uint16_t*)next_char) + return ((type & XCL_BEGIN_WITH_RANGE) != 0) == not_negated; + + min_index = 0; + value = ((const uint16_t*)next_char)[--max_index]; + if (c >= value) + return (value == c || (value & XCL_CHAR_END) == 0) == not_negated; + + max_index--; + + /* Binary search of a range. */ + while (TRUE) + { + uint32_t mid_index = (min_index + max_index) >> 1; + value = ((const uint16_t*)next_char)[mid_index]; + + if (c < value) + max_index = mid_index - 1; + else if (((const uint16_t*)next_char)[mid_index + 1] <= c) + min_index = mid_index + 1; + else + return (value == c || (value & XCL_CHAR_END) == 0) == not_negated; + } + } + +/* Skip the 16 bit ranges. */ +max_index = type & XCL_ITEM_COUNT_MASK; +if (max_index == XCL_ITEM_COUNT_MASK) + { + max_index = *(const uint16_t*)next_char; + PCRE2_ASSERT(max_index >= XCL_ITEM_COUNT_MASK); + next_char += 2; + } + +next_char += (max_index << 1); +type >>= XCL_TYPE_BIT_LEN; + +/* Alignment check. */ +PCRE2_ASSERT(((uintptr_t)next_char & 0x3) == 0); + +max_index = type & XCL_ITEM_COUNT_MASK; + +#if PCRE2_CODE_UNIT_WIDTH == 32 +if (c >= XCL_CHAR_LIST_HIGH_32_START) + { + if (max_index == XCL_ITEM_COUNT_MASK) + { + max_index = *(const uint32_t*)next_char; + PCRE2_ASSERT(max_index >= XCL_ITEM_COUNT_MASK); + next_char += 4; + } + + next_char += max_index << 2; + type >>= XCL_TYPE_BIT_LEN; + max_index = type & XCL_ITEM_COUNT_MASK; + } +#endif + +c = (uint32_t)((c << XCL_CHAR_SHIFT) | XCL_CHAR_END); + +if (max_index == XCL_ITEM_COUNT_MASK) + { + max_index = *(const uint32_t*)next_char; + next_char += 4; + } + +if (max_index == 0 || c < *(const uint32_t*)next_char) + return ((type & XCL_BEGIN_WITH_RANGE) != 0) == not_negated; + +min_index = 0; +value = ((const uint32_t*)next_char)[--max_index]; +if (c >= value) + return (value == c || (value & XCL_CHAR_END) == 0) == not_negated; + +max_index--; + +/* Binary search of a range. */ +while (TRUE) + { + uint32_t mid_index = (min_index + max_index) >> 1; + value = ((const uint32_t*)next_char)[mid_index]; + + if (c < value) + max_index = mid_index - 1; + else if (((const uint32_t*)next_char)[mid_index + 1] <= c) + min_index = mid_index + 1; + else + return (value == c || (value & XCL_CHAR_END) == 0) == not_negated; + } +} + + + +/************************************************* +* Match character against an ECLASS * +*************************************************/ + +/* This function is called to match a character against an extended class +used for describing characters using boolean operations on sets. + +Arguments: + c the character + data_start points to the start of the ECLASS data + data_end points one-past-the-last of the ECLASS data + utf TRUE if in UTF mode + +Returns: TRUE if character matches, else FALSE +*/ + +BOOL +PRIV(eclass)(uint32_t c, PCRE2_SPTR data_start, PCRE2_SPTR data_end, + const uint8_t *char_lists_end, BOOL utf) +{ +PCRE2_SPTR ptr = data_start; +PCRE2_UCHAR flags; +uint32_t stack = 0; +int stack_depth = 0; + +PCRE2_ASSERT(data_start < data_end); +flags = *ptr++; +PCRE2_ASSERT((flags & ECL_MAP) == 0 || + (data_end - ptr) >= 32 / (int)sizeof(PCRE2_UCHAR)); + +/* Code points < 256 are matched against a bitmap, if one is present. +Otherwise all codepoints are checked later. */ + +if ((flags & ECL_MAP) != 0) + { + if (c < 256) + return (((const uint8_t *)ptr)[c/8] & (1u << (c&7))) != 0; + + /* Skip the bitmap. */ + ptr += 32 / sizeof(PCRE2_UCHAR); + } + +/* Do a little loop, until we reach the end of the ECLASS. */ +while (ptr < data_end) + { + switch (*ptr) + { + case ECL_AND: + ++ptr; + stack = (stack >> 1) & (stack | ~(uint32_t)1u); + PCRE2_ASSERT(stack_depth >= 2); + --stack_depth; + break; + + case ECL_OR: + ++ptr; + stack = (stack >> 1) | (stack & (uint32_t)1u); + PCRE2_ASSERT(stack_depth >= 2); + --stack_depth; + break; + + case ECL_XOR: + ++ptr; + stack = (stack >> 1) ^ (stack & (uint32_t)1u); + PCRE2_ASSERT(stack_depth >= 2); + --stack_depth; + break; + + case ECL_NOT: + ++ptr; + stack ^= (uint32_t)1u; + PCRE2_ASSERT(stack_depth >= 1); + break; + + case ECL_XCLASS: + { + uint32_t matched = PRIV(xclass)(c, ptr + 1 + LINK_SIZE, char_lists_end, utf); + + ptr += GET(ptr, 1); + stack = (stack << 1) | matched; + ++stack_depth; + break; + } + + /* This should never occur, but compilers may mutter if there is no + default. */ + + default: + PCRE2_DEBUG_UNREACHABLE(); + return FALSE; + } + } + +PCRE2_ASSERT(stack_depth == 1); +(void)stack_depth; /* Ignore unused variable, if assertions are disabled. */ + +/* The final bit left on the stack now holds the match result. */ +return (stack & 1u) != 0; } /* End of pcre2_xclass.c */ diff --git a/libpcre/src/pcre2posix.c b/libpcre/src/pcre2posix.c index 9fe3199d8..f9dcbceba 100644 --- a/libpcre/src/pcre2posix.c +++ b/libpcre/src/pcre2posix.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2022 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -94,11 +94,13 @@ changed. This #define is a copy of the one in pcre2_internal.h. */ #include "pcre2.h" #include "pcre2posix.h" +#include "pcre2_util.h" /* Table to translate PCRE2 compile time error codes into POSIX error codes. Only a few PCRE2 errors with a value greater than 23 turn into special POSIX codes: most go to REG_BADPAT. The second table lists, in pairs, those that -don't. */ +don't, even though some of them cannot currently be provoked from within the +POSIX wrapper. */ static const int eint1[] = { 0, /* No error */ @@ -137,7 +139,9 @@ static const int eint2[] = { 37, REG_EESCAPE, /* PCRE2 does not support \L, \l, \N{name}, \U, or \u */ 56, REG_INVARG, /* internal error: unknown newline setting */ 92, REG_INVARG, /* invalid option bits with PCRE2_LITERAL */ - 99, REG_EESCAPE /* \K in lookaround */ + 98, REG_EESCAPE, /* missing digit after \0 in NO_BS0 mode */ + 99, REG_EESCAPE, /* \K in lookaround */ + 102, REG_EESCAPE /* \ddd octal > \377 in PYTHON_OCTAL mode */ }; /* Table of texts corresponding to POSIX error codes */ @@ -191,7 +195,7 @@ if (preg != NULL && (int)preg->re_erroffset != -1) /* no need to deal with UB in snprintf */ if (errbuf_size > INT_MAX) errbuf_size = INT_MAX; - /* there are 11 charactes between message and offset, + /* there are 11 characters between message and offset; update message_len() if changed */ ret = snprintf(errbuf, errbuf_size, "%s at offset %d", message, (int)preg->re_erroffset); @@ -207,6 +211,8 @@ else ret = (int)len; } +PCRE2_ASSERT(len > 0 || preg != NULL); + do { if (ret < 0) { diff --git a/libpcre/src/pcre2posix_test.c b/libpcre/src/pcre2posix_test.c deleted file mode 100644 index c9c03a48c..000000000 --- a/libpcre/src/pcre2posix_test.c +++ /dev/null @@ -1,209 +0,0 @@ -/************************************************* -* PCRE2 POSIX interface test program * -*************************************************/ - -/* -Written by Philip Hazel, December 2022 -Copyright (c) 2022 -File last edited: December 2022 - -This program tests the POSIX wrapper to the PCRE2 regular expression library. -The main PCRE2 test program is pcre2test, which also tests these function -calls. This little program is needed to test the case where the client includes -pcre2posix.h but not pcre2.h, mainly to make sure that it builds successfully. -However, the code is written as a flexible test program to which extra tests -can be added. - -Compile with -lpcre2-posix -lpcre2-8 - -If run with no options, there is no output on success, and the return code is -zero. If any test fails there is output to stderr, and the return code is 1. - -For testing purposes, the "-v" option causes verification output to be written -to stdout. */ - -#include -#include -#include - -#define CAPCOUNT 5 /* Number of captures supported */ -#define PRINTF if (v) printf /* Shorthand for testing output */ - -/* This vector contains compiler flags for each pattern that is tested. */ - -static int cflags[] = { - 0, /* Test 0 */ - REG_ICASE, /* Test 1 */ - 0, /* Test 2 */ - REG_NEWLINE, /* Test 3 */ - 0 /* Test 4 */ -}; - -/* This vector contains match flags for each pattern that is tested. */ - -static int mflags[] = { - 0, /* Test 0 */ - 0, /* Test 1 */ - 0, /* Test 2 */ - REG_NOTBOL, /* Test 3 */ - 0 /* Test 4 */ -}; - -/* Automate the number of patterns */ - -#define count (int)(sizeof(cflags)/sizeof(int)) - -/* The data for each pattern consists of a pattern string, followed by any -number of subject strings, terminated by NULL. Some tests share data, but use -different flags. */ - -static const char *data0_1[] = { "posix", "lower posix", "upper POSIX", NULL }; -static const char *data2_3[] = { "(*LF)^(cat|dog)", "catastrophic\ncataclysm", - "dogfight", "no animals", NULL }; -static const char *data4[] = { "*badpattern", NULL }; - -/* Index the data strings */ - -static char **data[] = { - (char **)(&data0_1), - (char **)(&data0_1), - (char **)(&data2_3), - (char **)(&data2_3), - (char **)(&data4) -}; - -/* The expected results for each pattern consist of a compiler return code, -optionally followed, for each subject string, by a match return code and, for a -successful match, up to CAPCOUNT pairs of returned match data. */ - -static int results0[] = { - 0, /* Compiler rc */ - 0, 6, 11, /* 1st match */ - REG_NOMATCH /* 2nd match */ -}; - -static int results1[] = { - 0, /* Compiler rc */ - 0, 6, 11, /* 1st match */ - 0, 6, 11 /* 2nd match */ -}; - -static int results2[] = { - 0, /* Compiler rc */ - 0, 0, 3, 0, 3, /* 1st match */ - 0, 0, 3, 0, 3, /* 2nd match */ - REG_NOMATCH /* 3rd match */ -}; - -static int results3[] = { - 0, /* Compiler rc */ - 0, 13, 16, 13, 16, /* 1st match */ - REG_NOMATCH, /* 2nd match */ - REG_NOMATCH /* 3rd match */ -}; - -static int results4[] = { - REG_BADRPT /* Compiler rc */ -}; - -/* Index the result vectors */ - -static int *results[] = { - (int *)(&results0), - (int *)(&results1), - (int *)(&results2), - (int *)(&results3), - (int *)(&results4) -}; - -/* And here is the program */ - -int main(int argc, char **argv) -{ -regex_t re; -regmatch_t match[CAPCOUNT]; -int v = argc > 1 && strcmp(argv[1], "-v") == 0; - -PRINTF("Test of pcre2posix.h without pcre2.h\n"); - -for (int i = 0; i < count; i++) - { - char *pattern = data[i][0]; - char **subjects = data[i] + 1; - int *rd = results[i]; - int rc = regcomp(&re, pattern, cflags[i]); - - PRINTF("Pattern: %s flags=0x%02x\n", pattern, cflags[i]); - - if (rc != *rd) - { - fprintf(stderr, "Unexpected compile error %d (expected %d)\n", rc, *rd); - fprintf(stderr, "Pattern is: %s\n", pattern); - return 1; - } - - if (rc != 0) - { - if (v) - { - char buffer[256]; - (void)regerror(rc, &re, buffer, sizeof(buffer)); - PRINTF("Compile error %d: %s (expected)\n", rc, buffer); - } - continue; - } - - for (; *subjects != NULL; subjects++) - { - rc = regexec(&re, *subjects, CAPCOUNT, match, mflags[i]); - - PRINTF("Subject: %s\n", *subjects); - PRINTF("Return: %d", rc); - - if (rc != *(++rd)) - { - PRINTF("\n"); - fprintf(stderr, "Unexpected match error %d (expected %d)\n", rc, *rd); - fprintf(stderr, "Pattern is: %s\n", pattern); - fprintf(stderr, "Subject is: %s\n", *subjects); - return 1; - } - - if (rc == 0) - { - for (int j = 0; j < CAPCOUNT; j++) - { - regmatch_t *m = match + j; - if (m->rm_so < 0) continue; - if (m->rm_so != *(++rd) || m->rm_eo != *(++rd)) - { - PRINTF("\n"); - fprintf(stderr, "Mismatched results for successful match\n"); - fprintf(stderr, "Pattern is: %s\n", pattern); - fprintf(stderr, "Subject is: %s\n", *subjects); - fprintf(stderr, "Result %d: expected %d %d received %d %d\n", - j, rd[-1], rd[0], m->rm_so, m->rm_eo); - return 1; - } - PRINTF(" (%d %d %d)", j, m->rm_so, m->rm_eo); - } - } - - else if (v) - { - char buffer[256]; - (void)regerror(rc, &re, buffer, sizeof(buffer)); - PRINTF(": %s (expected)", buffer); - } - - PRINTF("\n"); - } - - regfree(&re); - } - -PRINTF("End of test\n"); -return 0; -} - -/* End of pcre2posix_test.c */ diff --git a/libpcre/vms/configure.com b/libpcre/vms/configure.com new file mode 100644 index 000000000..831f38ff7 --- /dev/null +++ b/libpcre/vms/configure.com @@ -0,0 +1,1152 @@ +$! Configure procedure +$! (c) Alexey Chupahin 11-APR-2024 +$! alexey@vaxman.de, alexey_chupahin@mail.ru +$! +$! +$ SET NOON +$SET NOVER +$WRITE SYS$OUTPUT " " +$WRITE SYS$OUTPUT "Configuring PCRE2 library for OpenVMS " +$WRITE SYS$OUTPUT "(c) Alexey Chupahin CHAPG" +$WRITE SYS$OUTPUT " " +$! Checking architecture +$DECC = F$SEARCH("SYS$SYSTEM:DECC$COMPILER.EXE") .NES. "" +$ IF F$GETSYI("ARCH_TYPE").EQ.1 THEN CPU = "VAX" +$ IF F$GETSYI("ARCH_TYPE").EQ.2 THEN CPU = "Alpha" +$ IF F$GETSYI("ARCH_TYPE").EQ.3 THEN CPU = "I64" +$ IF F$GETSYI("ARCH_TYPE").EQ.4 THEN CPU = "x86" +$WRITE SYS$OUTPUT "Checking architecture ... ", CPU +$IF ( (CPU.EQS."Alpha").OR.(CPU.EQS."I64").OR(CPU.EQS."x86") ) +$ THEN +$ SHARED=64 +$ ELSE +$ SHARED=32 +$ENDIF +$! +$IF (DECC) THEN $WRITE SYS$OUTPUT "Compiler ... DEC C" +$IF (.NOT. DECC) THEN $WRITE SYS$OUTPUT "BAD compiler" GOTO EXIT +$MMS = F$SEARCH("SYS$SYSTEM:MMS.EXE") .NES. "" +$MMK = F$TYPE(MMK) +$IF (MMS .OR. MMK.NES."") THEN GOTO TEST_LIBRARIES +$! I cant find any make tool +$ WRITE SYS$OUTPUT "Install MMS or MMK" +$GOTO EXIT +$!PERL = F$TYPE(MMK) +$!IF (PERL.NES."") THEN GOTO TEST_LIBRARIES +$!WRITE SYS$OUTPUT "Install PERL" +$!GOTO EXIT +$! +$! +$! Is it package root directory? If no, go to [-] +$ IF (F$SEARCH("[]VMS.DIR").EQS."") .AND. (F$SEARCH("[]vms.dir").EQS."") +$ THEN +$ SET DEF [-] +$ ENDIF +$! +$TEST_LIBRARIES: +$! Setting as MAKE utility one of MMS or MMK. I prefer MMS. +$IF (MMK.NES."") THEN MAKE="MMK" +$IF (MMS) THEN MAKE="MMS" +$WRITE SYS$OUTPUT "Checking build utility ... ''MAKE'" +$!WRITE SYS$OUTPUT "Checking PERL ... found" +$WRITE SYS$OUTPUT " " +$! +$! +$! Check files and ODS-2. unzip makes files FILE.H.GENERIC like FILE_H.GENERIC. Should rename to FILE.H_GENERIC +$IF F$SEARCH("[.SRC]PCRE2_H.GENERIC") .NES. "" +$ THEN +$ REN [.SRC]PCRE2_H.GENERIC [.SRC]PCRE2.H_GENERIC +$ ELSE +$ IF F$SEARCH("[.SRC]PCRE2.H_GENERIC") .EQS. "" +$ THEN +$ WRITE SYS$OUTPUT "Not ODS-2 volume, or PCRE2_H.GENERIC not found" +$ EXIT +$ ENDIF +$ENDIF +$IF F$SEARCH("[.SRC]PCRE2_CHARTABLES_C.DIST") .NES. "" +$ THEN +$ REN [.SRC]PCRE2_CHARTABLES_C.DIST [.SRC]PCRE2_CHARTABLES.C_DIST +$ ELSE +$ IF F$SEARCH("[.SRC]PCRE2_CHARTABLES.C_DIST") .EQS. "" +$ THEN +$ WRITE SYS$OUTPUT "Not ODS-2 volume, or PCRE2_CHARTABLES_C.DIST not found" +$ EXIT +$ ENDIF +$ENDIF +$WRITE SYS$OUTPUT "Source Files OK" +$! +$! +$I18 = F$SEARCH("SYS$I18N_ICONV:ISO8859-1_UTF-8.ICONV") .NES. "" +$IF (I18) +$ THEN +$ WRITE SYS$OUTPUT "Found I18 extension ICONV codes" +$!"Checking for iconv " +$ DEFINE SYS$ERROR _NLA0: +$ DEFINE SYS$OUTPUT _NLA0: +$ CC/OBJECT=TEST.OBJ SYS$INPUT +#include +#include +#include +#include + +int main () +{ + /* */ + /* Declare variables to be used */ + /* */ + char fromcodeset[30]; + char tocodeset[30]; + int iconv_opened; + iconv_t iconv_struct; /* Iconv descriptor */ + + /* */ + /* Initialize variables */ + /* */ + sprintf(fromcodeset,"UTF-8"); + sprintf(tocodeset,"ISO8859-1"); + iconv_opened = FALSE; + + /* */ + /* Attempt to create a conversion descriptor for the codesets */ + /* specified. If the return value from iconv_open is -1 then */ + /* an error has occurred. Check value of errno. */ + /* */ + if ((iconv_struct = iconv_open (tocodeset, fromcodeset)) == (iconv_t)-1) + { + /* */ + /* Check the value of errno */ + /* */ + switch (errno) + { + case EMFILE: + case ENFILE: + printf("Too many iconv conversion files open\n"); + exit(2); + break; + + case ENOMEM: + printf("Not enough memory\n"); + printf("Checking iconv ..... no\n"); + exit(2); + break; + + case EINVAL: + printf("Unsupported conversion\n"); + exit(2); + break; + + default: + printf("Unexpected error from iconv_open\n"); + exit(2); + break; + } + } + else + /* */ + /* Successfully allocated a conversion descriptor */ + /* */ + iconv_opened = TRUE; + + /* */ + /* Was a conversion descriptor allocated */ + /* */ + if (iconv_opened) + { + /* */ + /* Attempt to deallocate the conversion descriptor. If */ + /* iconv_close returns -1 then an error has occurred. */ + /* */ + if (iconv_close (iconv_struct) == -1) + { + /* */ + /* An error occurred. Check the value of errno */ + /* */ + switch (errno) + { + case EBADF: + printf("Conversion descriptor is invalid\n"); + exit(2); + break; + default: + break; + } + } + else + printf("Checking iconv ..... yes\n"); + } + return(1); +} +$! +$TMP = $STATUS +$DEASS SYS$ERROR +$DEAS SYS$OUTPUT +$!WRITE SYS$OUTPUT TMP +$IF (TMP .NE. %X10B90001) +$ THEN +$ HAVE_ICONV=0 +$ GOTO NEXT0 +$ENDIF +$DEFINE SYS$ERROR _NLA0: +$DEFINE SYS$OUTPUT _NLA0: +$LINK/EXE=TEST TEST +$TMP = $STATUS +$DEAS SYS$ERROR +$DEAS SYS$OUTPUT +$!WRITE SYS$OUTPUT TMP +$IF (TMP .NE. %X10000001) +$ THEN +$ HAVE_ICONV=0 +$ GOTO NEXT0 +$ ELSE +$ HAVE_ICONV=1 +$ENDIF +$NEXT0: +$IF (HAVE_ICONV.EQ.1) +$ THEN +$ WRITE SYS$OUTPUT "Checking for iconv ... Yes" +$ ELSE +$ WRITE SYS$OUTPUT "Checking for iconv ... No" +$ENDIF +$! +$! +$! Checking for BZIP2 library +$! +$ DEFINE SYS$ERROR _NLA0: +$ DEFINE SYS$OUTPUT _NLA0: +$ CC/OBJECT=TEST.OBJ/INCLUDE=(BZ2LIB) SYS$INPUT + #include + #include + #include + int main() + { + printf("checking version bzip2 library: %s\n",BZ2_bzlibVersion()); + } +$TMP = $STATUS +$DEASS SYS$ERROR +$DEAS SYS$OUTPUT +$!WRITE SYS$OUTPUT TMP +$IF (TMP .NE. %X10B90001) +$ THEN +$ HAVE_BZIP2=0 +$ GOTO ERR0 +$ENDIF +$DEFINE SYS$ERROR _NLA0: +$DEFINE SYS$OUTPUT _NLA0: +$!Testing for CHAPG BZIP2 +$! +$LINK/EXE=TEST TEST,BZ2LIB:BZIP2/OPT +$TMP = $STATUS +$DEAS SYS$ERROR +$DEAS SYS$OUTPUT +$IF (TMP .NE. %X10000001) +$ THEN +$ HAVE_BZIP2=0 +$ GOTO ERR0 +$ ELSE +$ HAVE_BZIP2=1 +$ENDIF +$ERR0: +$IF (HAVE_BZIP2.EQ.1) +$ THEN +$ WRITE SYS$OUTPUT "Checking for CHAPG bzip2 library ... Yes" +$ RUN TEST +$ GOTO NEXT4 +$ ELSE +$ WRITE SYS$OUTPUT "Checking for correct bzip2 library ... No" +$ WRITE SYS$OUTPUT "To get bzip2 archives support, please download" +$ WRITE SYS$OUTPUT "and install good library ported by Alexey Chupahin" +$ WRITE SYS$OUTPUT "from openvms clamav site http://vaxvms.org/clamav/" +$ WRITE SYS$OUTPUT "" +$ GOTO EXIT +$ENDIF +$NEXT4: +$! +$! +$!"Checking for CHAPG zlib library " +$DEFINE SYS$ERROR _NLA0: +$DEFINE SYS$OUTPUT _NLA0: +$ CC/OBJECT=TEST.OBJ/INCLUDE=(ZLIB) SYS$INPUT + #include + #include + #include + #include + int main() + { + printf("checking version zlib: %s\n",zlibVersion()); + // printf("checking zlib is correct "); + } + +$TMP = $STATUS +$DEASS SYS$ERROR +$DEAS SYS$OUTPUT +$IF (TMP .NE. %X10B90001) +$ THEN +$ HAVE_ZLIB=0 +$ GOTO ERR4 +$ENDIF +$DEFINE SYS$ERROR _NLA0: +$DEFINE SYS$OUTPUT _NLA0: +$! +$LINK/EXE=TEST TEST,ZLIB:ZLIB.OPT/OPT +$TMP = $STATUS +$DEAS SYS$ERROR +$DEAS SYS$OUTPUT +$IF (TMP .NE. %X10000001) +$ THEN +$ HAVE_ZLIB=0 +$ GOTO ERR4 +$ ELSE +$ HAVE_ZLIB=1 +$ENDIF +$ERR4: +$IF (HAVE_ZLIB.EQ.1) +$ THEN +$ WRITE SYS$OUTPUT "Checking for CHAPG zlib library ... Yes" +$ RUN TEST +$ GOTO NEXT5 +$ ELSE +$ WRITE SYS$OUTPUT "Checking for CHAPG zlib library ... No" +$ WRITE SYS$OUTPUT "Please install ZLIB from" +$ WRITE SYS$OUTPUT "http://vaxvms.org/libsdl/required.html" +$ GOTO EXIT +$ENDIF +$! +$NEXT5: + +$! +$!WRITING BUILD FILES +$OPEN/WRITE OUT BUILD.COM +$ WRITE OUT "$","SET DEF [.SRC]" +$ WRITE OUT "$",MAKE +$ WRITE OUT "$ CURRENT = F$ENVIRONMENT (""DEFAULT"") " +$ WRITE OUT "$","SET DEF [-]" +$ WRITE OUT "$CLAM=CURRENT" +$ WRITE OUT "$OPEN/WRITE OUTT PCRE2$STARTUP.COM" +$ WRITE OUT "$WRITE OUTT ""DEFINE PCRE2 ","'","'","CLAM'"" " +$ WRITE OUT "$WRITE OUTT ""DEFINE PCRE2$SHR ","'","'","CLAM'PCRE2$SHR.EXE"" " +$ WRITE OUT "$WRITE OUTT ""PCRE2GREP:==$", "'","'","CLAM'PCRE2GREP.EXE""" +$ WRITE OUT "$CLOSE OUTT" +$ WRITE OUT "$WRITE SYS$OUTPUT "" "" " +$ WRITE OUT "$WRITE SYS$OUTPUT ""***************************************************************************** "" " +$ WRITE OUT "$WRITE SYS$OUTPUT ""Compilation is completed."" " +$ WRITE OUT "$WRITE SYS$OUTPUT ""PCRE2$STARTUP.COM is created. "" " +$ WRITE OUT "$WRITE SYS$OUTPUT ""This file setups all logicals needed."" " +$ WRITE OUT "$WRITE SYS$OUTPUT ""It should be executed before using PCRE2 Library. "" " +$ WRITE OUT "$WRITE SYS$OUTPUT ""Use PCRE2:PCRE2.OPT to link you program"" " +$ WRITE OUT "$WRITE SYS$OUTPUT ""PCRE2GREP grep utility is installed here for your needs "" " +$ WRITE OUT "$WRITE SYS$OUTPUT ""***************************************************************************** "" " +$CLOSE OUT +$! BUILD.COM finished +$ WRITE SYS$OUTPUT "BUILD.COM has been created" +$! +$!Creating OPT.OPT file containig external libraries for linker +$OPEN/WRITE OUT [.SRC]PCRE2.OPT +$IF (SHARED.GT.0) THEN WRITE OUT "PCRE2:PCRE2$SHR/SHARE" +$IF (SHARED.EQ.0) +$ THEN +$ WRITE OUT "PCRE2:PCRE2/LIB" +$ENDIF +$CLOSE OUT +$WRITE SYS$OUTPUT "PCRE2.OPT has been created" +$IF (SHARED.EQ.64) +$ THEN +$ COPY SYS$INPUT [.SRC]PCRE2$DEF.OPT +! +case_sensitive=NO +symbol_vector = (PCRE2_CONFIG_8 = PROCEDURE) +symbol_vector = (PCRE2_MAKETABLES_8 = PROCEDURE) +symbol_vector = (PCRE2_MAKETABLES_FREE_8 = PROCEDURE) +symbol_vector = (PCRE2_CODE_COPY_8 = PROCEDURE) +symbol_vector = (PCRE2_CODE_FREE_8 = PROCEDURE) +symbol_vector = (_PCRE2_CHECK_ESCAPE_8 = PROCEDURE) +symbol_vector = (PCRE2_COMPILE_8 = PROCEDURE) +symbol_vector = (PCRE2_CODE_COPY_WITH_TABLES_8 = PROCEDURE) +symbol_vector = (PCRE2_GET_ERROR_MESSAGE_8 = PROCEDURE) +symbol_vector = (PCRE2_MATCH_DATA_CREATE_8 = PROCEDURE) +symbol_vector = (VMS_PCRE2_GET_M_D_HPFRAM_S_8 = PROCEDURE) +symbol_vector = (PCRE2_GET_MATCH_DATA_SIZE_8 = PROCEDURE) +symbol_vector = (PCRE2_GET_STARTCHAR_8 = PROCEDURE) +symbol_vector = (PCRE2_GET_OVECTOR_COUNT_8 = PROCEDURE) +symbol_vector = (PCRE2_GET_OVECTOR_POINTER_8 = PROCEDURE) +symbol_vector = (PCRE2_GET_MARK_8 = PROCEDURE) +symbol_vector = (PCRE2_MATCH_DATA_FREE_8 = PROCEDURE) +symbol_vector = (VMS_PCRE2_M_D_CRT_FR_PATT_8 = PROCEDURE) +symbol_vector = (PCRE2_MATCH_8 = PROCEDURE) +symbol_vector = (PCRE2_PATTERN_INFO_8 = PROCEDURE) +symbol_vector = (PCRE2_CALLOUT_ENUMERATE_8 = PROCEDURE) +symbol_vector = (PCRE2_SET_GLOB_ESCAPE_8 = PROCEDURE) +symbol_vector = (PCRE2_SET_GLOB_SEPARATOR_8 = PROCEDURE) +symbol_vector = (VMS_PCRE2_SET_RCRS_MEM_MNG_8 = PROCEDURE) +symbol_vector = (PCRE2_SET_DEPTH_LIMIT_8 = PROCEDURE) +symbol_vector = (PCRE2_SET_RECURSION_LIMIT_8 = PROCEDURE) +symbol_vector = (PCRE2_SET_OFFSET_LIMIT_8 = PROCEDURE) +symbol_vector = (PCRE2_SET_MATCH_LIMIT_8 = PROCEDURE) +symbol_vector = (PCRE2_SET_HEAP_LIMIT_8 = PROCEDURE) +symbol_vector = (PCRE2_SET_SUBSTITUTE_CALLOUT_8 = PROCEDURE) +symbol_vector = (PCRE2_SET_CALLOUT_8 = PROCEDURE) +symbol_vector = (VMS_PCRE2_SET_CMPL_RCRS_GRD_8 = PROCEDURE) +symbol_vector = (VMS_PCRE2_SET_CMPL_EXT_OPT_8 = PROCEDURE) +symbol_vector = (PCRE2_SET_PARENS_NEST_LIMIT_8 = PROCEDURE) +symbol_vector = (PCRE2_SET_MAX_VARLOOKBEHIND_8 = PROCEDURE) +symbol_vector = (PCRE2_SET_NEWLINE_8 = PROCEDURE) +symbol_vector = (PCRE2_SET_MAX_PATTERN_LENGTH_8 = PROCEDURE) +symbol_vector = (PCRE2_SET_BSR_8 = PROCEDURE) +symbol_vector = (PCRE2_SET_CHARACTER_TABLES_8 = PROCEDURE) +symbol_vector = (PCRE2_CONVERT_CONTEXT_FREE_8 = PROCEDURE) +symbol_vector = (PCRE2_MATCH_CONTEXT_FREE_8 = PROCEDURE) +symbol_vector = (PCRE2_COMPILE_CONTEXT_FREE_8 = PROCEDURE) +symbol_vector = (PCRE2_GENERAL_CONTEXT_FREE_8 = PROCEDURE) +symbol_vector = (PCRE2_CONVERT_CONTEXT_COPY_8 = PROCEDURE) +symbol_vector = (PCRE2_MATCH_CONTEXT_COPY_8 = PROCEDURE) +symbol_vector = (PCRE2_COMPILE_CONTEXT_COPY_8 = PROCEDURE) +symbol_vector = (PCRE2_GENERAL_CONTEXT_COPY_8 = PROCEDURE) +symbol_vector = (_PCRE2_MEMCTL_MALLOC_8 = PROCEDURE) +symbol_vector = (PCRE2_CONVERT_CONTEXT_CREATE_8 = PROCEDURE) +symbol_vector = (PCRE2_MATCH_CONTEXT_CREATE_8 = PROCEDURE) +symbol_vector = (PCRE2_COMPILE_CONTEXT_CREATE_8 = PROCEDURE) +symbol_vector = (PCRE2_GENERAL_CONTEXT_CREATE_8 = PROCEDURE) +symbol_vector = (_PCRE2_AUTO_POSSESSIFY_8 = PROCEDURE) +symbol_vector = (_PCRE2_CKD_SMUL = PROCEDURE) +symbol_vector = (_PCRE2_FIND_BRACKET_8 = PROCEDURE) +symbol_vector = (_PCRE2_IS_NEWLINE_8 = PROCEDURE) +symbol_vector = (_PCRE2_WAS_NEWLINE_8 = PROCEDURE) +symbol_vector = (_PCRE2_SCRIPT_RUN_8 = PROCEDURE) +symbol_vector = (_PCRE2_STRCMP_8 = PROCEDURE) +symbol_vector = (_PCRE2_STRCPY_C8_8 = PROCEDURE) +symbol_vector = (_PCRE2_STRLEN_8 = PROCEDURE) +symbol_vector = (_PCRE2_STRNCMP_C8_8 = PROCEDURE) +symbol_vector = (_PCRE2_STRNCMP_8 = PROCEDURE) +symbol_vector = (_PCRE2_STRCMP_C8_8 = PROCEDURE) +symbol_vector = (_PCRE2_STUDY_8 = PROCEDURE) +symbol_vector = (_PCRE2_VALID_UTF_8 = PROCEDURE) +symbol_vector = (VMS_PCRE2_DEF_CMPL_CNTXT_8 = DATA) +symbol_vector = (VMS_PCRE2_DEF_CNVRT_CNTXT_8 = DATA) +symbol_vector = (_PCRE2_CALLOUT_END_DELIMS_8 = DATA) +symbol_vector = (_PCRE2_CALLOUT_START_DELIMS_8 = DATA) +symbol_vector = (_PCRE2_DEFAULT_MATCH_CONTEXT_8 = DATA) +symbol_vector = (_PCRE2_DEFAULT_TABLES_8 = DATA) +symbol_vector = (_PCRE2_HSPACE_LIST_8 = DATA) +symbol_vector = (_PCRE2_OP_LENGTHS_8 = DATA) +symbol_vector = (_PCRE2_UCD_CASELESS_SETS_8 = DATA) +symbol_vector = (_PCRE2_UCD_RECORDS_8 = DATA) +symbol_vector = (_PCRE2_UCD_STAGE1_8 = DATA) +symbol_vector = (_PCRE2_UCD_STAGE2_8 = DATA) +symbol_vector = (_PCRE2_VSPACE_LIST_8 = DATA) +! +! ### PSECT list extracted from PCRE2.MAP;1 +! +$ENDIF +$! +$! +COPY SYS$INPUT [.SRC]CONFIG.H +/* src/config.h.in. Generated from configure.ac by autoheader. */ + + +/* PCRE2 is written in Standard C, but there are a few non-standard things it +can cope with, allowing it to run on SunOS4 and other "close to standard" +systems. + +In environments that support the GNU autotools, config.h.in is converted into +config.h by the "configure" script. In environments that use CMake, +config-cmake.in is converted into config.h. If you are going to build PCRE2 "by +hand" without using "configure" or CMake, you should copy the distributed +config.h.generic to config.h, and edit the macro definitions to be the way you +need them. You must then add -DHAVE_CONFIG_H to all of your compile commands, +so that config.h is included at the start of every source. + +Alternatively, you can avoid editing by using -D on the compiler command line +to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H, +but if you do, default values will be taken from config.h for non-boolean +macros that are not defined on the command line. + +Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE2_8 should either be +defined (conventionally to 1) for TRUE, and not defined at all for FALSE. All +such macros are listed as a commented #undef in config.h.generic. Macros such +as MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are +surrounded by #ifndef/#endif lines so that the value can be overridden by -D. + +PCRE2 uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if +HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make +sure both macros are undefined; an emulation function will then be used. */ + +/* By default, the \R escape sequence matches any Unicode line ending + character or sequence of characters. If BSR_ANYCRLF is defined (to any + value), this is changed so that backslash-R matches only CR, LF, or CRLF. + The build-time default can be overridden by the user of PCRE2 at runtime. + */ +#undef BSR_ANYCRLF + +/* Define to any value to disable the use of the z and t modifiers in + formatting settings such as %zu or %td (this is rarely needed). */ +#undef DISABLE_PERCENT_ZT + +/* If you are compiling for a system that uses EBCDIC instead of ASCII + character codes, define this macro to any value. When EBCDIC is set, PCRE2 + assumes that all input strings are in EBCDIC. If you do not define this + macro, PCRE2 will assume input strings are ASCII or UTF-8/16/32 Unicode. It + is not possible to build a version of PCRE2 that supports both EBCDIC and + UTF-8/16/32. */ +#undef EBCDIC + +/* In an EBCDIC environment, define this macro to any value to arrange for the + NL character to be 0x25 instead of the default 0x15. NL plays the role that + LF does in an ASCII/Unicode environment. */ +#undef EBCDIC_NL25 + +/* Define this if your compiler supports __attribute__((uninitialized)) */ +#undef HAVE_ATTRIBUTE_UNINITIALIZED + +/* Define to 1 if you have the header file. */ +#define HAVE_ASSERT_H 1 + +/* Define to 1 if you have the 'bcopy' function. */ +#define HAVE_BCOPY 1 + +/* Define this if your compiler provides __builtin_mul_overflow() */ +#undef HAVE_BUILTIN_MUL_OVERFLOW + +/* Define this if your compiler provides __builtin_unreachable() */ +#undef HAVE_BUILTIN_UNREACHABLE + +/* Define to 1 if you have the header file. */ +#define HAVE_DIRENT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_DLFCN_H 1 + +/* Define to 1 if you have the header file. */ +#undef HAVE_EDITLINE_READLINE_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_EDIT_READLINE_READLINE_H + +/* Define to 1 if you have the header file. */ +#define HAVE_INTTYPES_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_LIMITS_H 1 + +/* Define to 1 if you have the 'memfd_create' function. */ +#undef HAVE_MEMFD_CREATE + +/* Define to 1 if you have the 'memmove' function. */ +#define HAVE_MEMMOVE 1 + +/* Define to 1 if you have the header file. */ +#undef HAVE_MINIX_CONFIG_H + +/* Define to 1 if you have the 'mkostemp' function. */ +#undef HAVE_MKOSTEMP + +/* Define if you have POSIX threads libraries and header files. */ +#define HAVE_PTHREAD 1 + +/* Have PTHREAD_PRIO_INHERIT. */ +#undef HAVE_PTHREAD_PRIO_INHERIT + +/* Define to 1 if you have the header file. */ +#undef HAVE_READLINE_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_READLINE_HISTORY_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_READLINE_READLINE_H + +/* Define to 1 if you have the `realpath' function. */ +#define HAVE_REALPATH 1 + +/* Define to 1 if you have the 'secure_getenv' function. */ +#undef HAVE_SECURE_GETENV + +/* Define to 1 if you have the header file. */ +#define HAVE_STDINT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDIO_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDLIB_H 1 + +/* Define to 1 if you have the 'strerror' function. */ +#define HAVE_STRERROR 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STRINGS_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STRING_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_STAT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_TYPES_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_WAIT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_UNISTD_H 1 + +/* Define to 1 if the compiler supports simple visibility declarations. */ +#undef HAVE_VISIBILITY + +/* Define to 1 if you have the header file. */ +#define HAVE_WCHAR_H 1 + +/* Define to 1 if you have the header file. */ +#undef HAVE_WINDOWS_H + +/* Define to 1 if you have the header file. */ + +/* This limits the amount of memory that may be used while matching a pattern. + It applies to both pcre2_match() and pcre2_dfa_match(). It does not apply + to JIT matching. The value is in kibibytes (units of 1024 bytes). */ +#undef HEAP_LIMIT + +/* The value of LINK_SIZE determines the number of bytes used to store links + as offsets within the compiled regex. The default is 2, which allows for + compiled patterns up to 65535 code units long. This covers the vast + majority of cases. However, PCRE2 can also be compiled to use 3 or 4 bytes + instead. This allows for longer patterns in extreme cases. */ +#undef LINK_SIZE + +/* Define to the sub-directory where libtool stores uninstalled libraries. */ +#undef LT_OBJDIR + +/* The value of MATCH_LIMIT determines the default number of times the + pcre2_match() function can record a backtrack position during a single + matching attempt. The value is also used to limit a loop counter in + pcre2_dfa_match(). There is a runtime interface for setting a different + limit. The limit exists in order to catch runaway regular expressions that + take forever to determine that they do not match. The default is set very + large so that it does not accidentally catch legitimate cases. */ +#undef MATCH_LIMIT + +/* The above limit applies to all backtracks, whether or not they are nested. + In some environments it is desirable to limit the nesting of backtracking + (that is, the depth of tree that is searched) more strictly, in order to + restrict the maximum amount of heap memory that is used. The value of + MATCH_LIMIT_DEPTH provides this facility. To have any useful effect, it + must be less than the value of MATCH_LIMIT. The default is to use the same + value as MATCH_LIMIT. There is a runtime method for setting a different + limit. In the case of pcre2_dfa_match(), this limit controls the depth of + the internal nested function calls that are used for pattern recursions, + lookarounds, and atomic groups. */ +#undef MATCH_LIMIT_DEPTH + +/* This limit is parameterized just in case anybody ever wants to change it. + Care must be taken if it is increased, because it guards against integer + overflow caused by enormously large patterns. */ +#undef MAX_NAME_COUNT + +/* This limit is parameterized just in case anybody ever wants to change it. + Care must be taken if it is increased, because it guards against integer + overflow caused by enormously large patterns. */ +#undef MAX_NAME_SIZE + +/* The value of MAX_VARLOOKBEHIND specifies the default maximum length, in + characters, for a variable-length lookbehind assertion. */ +#undef MAX_VARLOOKBEHIND + +/* Defining NEVER_BACKSLASH_C locks out the use of \C in all patterns. */ +#undef NEVER_BACKSLASH_C + +/* The value of NEWLINE_DEFAULT determines the default newline character + sequence. PCRE2 client programs can override this by selecting other values + at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY), 5 + (ANYCRLF), and 6 (NUL). */ +#undef NEWLINE_DEFAULT + +/* Name of package */ +#define PACKAGE "pcre2" + +/* Define to the address where bug reports for this package should be sent. */ +#define PACKAGE_BUGREPORT "" + +/* Define to the full name of this package. */ +#define PACKAGE_NAME "PCRE2" + +/* Define to the full name and version of this package. */ +#define PACKAGE_STRING "PCRE2 10.43 VMS" + +/* Define to the one symbol short name of this package. */ +#define PACKAGE_TARNAME "pcre2" + +/* Define to the home page for this package. */ +#define PACKAGE_URL "" + +/* Define to the version of this package. */ +#define PACKAGE_VERSION "10.43" + +/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested + parentheses (of any kind) in a pattern. This limits the amount of system + stack that is used while compiling a pattern. */ +#undef PARENS_NEST_LIMIT + +/* The value of PCRE2GREP_BUFSIZE is the starting size of the buffer used by + pcre2grep to hold parts of the file it is searching. The buffer will be + expanded up to PCRE2GREP_MAX_BUFSIZE if necessary, for files containing + very long lines. The actual amount of memory used by pcre2grep is three + times this number, because it allows for the buffering of "before" and + "after" lines. */ +#define PCRE2GREP_BUFSIZE 20480 + +/* The value of PCRE2GREP_MAX_BUFSIZE specifies the maximum size of the buffer + used by pcre2grep to hold parts of the file it is searching. The actual + amount of memory used by pcre2grep is three times this number, because it + allows for the buffering of "before" and "after" lines. */ +#define PCRE2GREP_MAX_BUFSIZE 1048576 + +/* Define to any value to include debugging code. */ +#undef PCRE2_DEBUG + +/* to make a symbol visible */ +#undef PCRE2_EXPORT + + +/* If you are compiling for a system other than a Unix-like system or + Win32, and it needs some magic to be inserted before the definition + of a function that is exported by the library, define this macro to + contain the relevant magic. If you do not define this macro, a suitable + __declspec value is used for Windows systems; in other environments + a compiler relevant "extern" is used with any "visibility" related + attributes from PCRE2_EXPORT included. + This macro apears at the start of every exported function that is part + of the external API. It does not appear on functions that are "external" + in the C sense, but which are internal to the library. */ +#undef PCRE2_EXP_DEFN + +/* Define to any value if linking statically (TODO: make nice with Libtool) */ +#undef PCRE2_STATIC + +/* Define to necessary symbol if this constant uses a non-standard name on + your system. */ +#undef PTHREAD_CREATE_JOINABLE + +/* Define to any non-zero number to enable support for SELinux compatible + executable memory allocator in JIT. Note that this will have no effect + unless SUPPORT_JIT is also defined. */ +#undef SLJIT_PROT_EXECUTABLE_ALLOCATOR + +/* Define to 1 if all of the C89 standard headers exist (not just the ones + required in a freestanding environment). This macro is provided for + backward compatibility; new code need not use it. */ +#define STDC_HEADERS 1 + +/* Define to any value to enable differential fuzzing support. */ +#undef SUPPORT_DIFF_FUZZ + +/* Define to any value to enable support for Just-In-Time compiling. */ +#undef SUPPORT_JIT + +/* Define to any value to allow pcre2grep to be linked with libbz2, so that it + is able to handle .bz2 files. */ + +/* Define to any value to allow pcre2test to be linked with libedit. */ +#undef SUPPORT_LIBEDIT + +/* Define to any value to allow pcre2test to be linked with libreadline. */ +#undef SUPPORT_LIBREADLINE + +/* Define to any value to allow pcre2grep to be linked with libz, so that it + is able to handle .gz files. */ + +/* Define to any value to enable callout script support in pcre2grep. */ +#undef SUPPORT_PCRE2GREP_CALLOUT + +/* Define to any value to enable fork support in pcre2grep callout scripts. + This will have no effect unless SUPPORT_PCRE2GREP_CALLOUT is also defined. + */ +#undef SUPPORT_PCRE2GREP_CALLOUT_FORK + +/* Define to any value to enable JIT support in pcre2grep. Note that this will + have no effect unless SUPPORT_JIT is also defined. */ +#undef SUPPORT_PCRE2GREP_JIT + +/* Define to any value to enable the 16 bit PCRE2 library. */ +#undef SUPPORT_PCRE2_16 + +/* Define to any value to enable the 32 bit PCRE2 library. */ +#undef SUPPORT_PCRE2_32 + +/* Define to any value to enable the 8 bit PCRE2 library. */ +#define SUPPORT_PCRE2_8 1 + +/* Define to any value to enable support for Unicode and UTF encoding. This + will work even in an EBCDIC environment, but it is incompatible with the + EBCDIC macro. That is, PCRE2 can support *either* EBCDIC code *or* + ASCII/Unicode, but not both at once. */ +#undef SUPPORT_UNICODE + +/* Define to any value for valgrind support to find invalid memory reads. */ +#undef SUPPORT_VALGRIND + +/* Enable extensions on AIX, Interix, z/OS. */ +#ifndef _ALL_SOURCE +# undef _ALL_SOURCE +#endif +/* Enable general extensions on macOS. */ +#ifndef _DARWIN_C_SOURCE +# undef _DARWIN_C_SOURCE +#endif +/* Enable general extensions on Solaris. */ +#ifndef __EXTENSIONS__ +# undef __EXTENSIONS__ +#endif +/* Enable GNU extensions on systems that have them. */ +#ifndef _GNU_SOURCE +# undef _GNU_SOURCE +#endif +/* Enable X/Open compliant socket functions that do not require linking + with -lxnet on HP-UX 11.11. */ +#ifndef _HPUX_ALT_XOPEN_SOCKET_API +# undef _HPUX_ALT_XOPEN_SOCKET_API +#endif +/* Identify the host operating system as Minix. + This macro does not affect the system headers' behavior. + A future release of Autoconf may stop defining this macro. */ +#ifndef _MINIX +# undef _MINIX +#endif +/* Enable general extensions on NetBSD. + Enable NetBSD compatibility extensions on Minix. */ +#ifndef _NETBSD_SOURCE +# undef _NETBSD_SOURCE +#endif +/* Enable OpenBSD compatibility extensions on NetBSD. + Oddly enough, this does nothing on OpenBSD. */ +#ifndef _OPENBSD_SOURCE +# undef _OPENBSD_SOURCE +#endif +/* Define to 1 if needed for POSIX-compatible behavior. */ +#ifndef _POSIX_SOURCE +# undef _POSIX_SOURCE +#endif +/* Define to 2 if needed for POSIX-compatible behavior. */ +#ifndef _POSIX_1_SOURCE +# undef _POSIX_1_SOURCE +#endif +/* Enable POSIX-compatible threading on Solaris. */ +#ifndef _POSIX_PTHREAD_SEMANTICS +# undef _POSIX_PTHREAD_SEMANTICS +#endif +/* Enable extensions specified by ISO/IEC TS 18661-5:2014. */ +#ifndef __STDC_WANT_IEC_60559_ATTRIBS_EXT__ +# undef __STDC_WANT_IEC_60559_ATTRIBS_EXT__ +#endif +/* Enable extensions specified by ISO/IEC TS 18661-1:2014. */ +#ifndef __STDC_WANT_IEC_60559_BFP_EXT__ +# undef __STDC_WANT_IEC_60559_BFP_EXT__ +#endif +/* Enable extensions specified by ISO/IEC TS 18661-2:2015. */ +#ifndef __STDC_WANT_IEC_60559_DFP_EXT__ +# undef __STDC_WANT_IEC_60559_DFP_EXT__ +#endif +/* Enable extensions specified by C23 Annex F. */ +#ifndef __STDC_WANT_IEC_60559_EXT__ +# undef __STDC_WANT_IEC_60559_EXT__ +#endif +/* Enable extensions specified by ISO/IEC TS 18661-4:2015. */ +#ifndef __STDC_WANT_IEC_60559_FUNCS_EXT__ +# undef __STDC_WANT_IEC_60559_FUNCS_EXT__ +#endif +/* Enable extensions specified by C23 Annex H and ISO/IEC TS 18661-3:2015. */ +#ifndef __STDC_WANT_IEC_60559_TYPES_EXT__ +# undef __STDC_WANT_IEC_60559_TYPES_EXT__ +#endif +/* Enable extensions specified by ISO/IEC TR 24731-2:2010. */ +#ifndef __STDC_WANT_LIB_EXT2__ +# undef __STDC_WANT_LIB_EXT2__ +#endif +/* Enable extensions specified by ISO/IEC 24747:2009. */ +#ifndef __STDC_WANT_MATH_SPEC_FUNCS__ +# undef __STDC_WANT_MATH_SPEC_FUNCS__ +#endif +/* Enable extensions on HP NonStop. */ +#ifndef _TANDEM_SOURCE +# undef _TANDEM_SOURCE +#endif +/* Enable X/Open extensions. Define to 500 only if necessary + to make mbstate_t available. */ +#ifndef _XOPEN_SOURCE +# undef _XOPEN_SOURCE +#endif + + +/* Version number of package */ +#undef VERSION + +/* Number of bits in a file offset, on hosts where this is settable. */ +#undef _FILE_OFFSET_BITS + +/* Define to 1 on platforms where this makes off_t a 64-bit type. */ +#undef _LARGE_FILES + +/* Number of bits in time_t, on hosts where this is settable. */ +#undef _TIME_BITS + +/* Define to 1 on platforms where this makes time_t a 64-bit type. */ +#undef __MINGW_USE_VC2005_COMPAT + +/* Define to empty if 'const' does not conform to ANSI C. */ +#undef const + +/* Define to the type of a signed integer type of width exactly 64 bits if + such a type exists and the standard includes do not define it. */ +#undef int64_t + +/* Define as 'unsigned int' if doesn't define. */ +#undef size_t + +// VMS +#include +#define PCRE2_EXPORT +#define LINK_SIZE 2 +#define MAX_NAME_COUNT 10000 +#define MAX_NAME_SIZE 32 +#define MATCH_LIMIT 10000000 +#define HEAP_LIMIT 20000000 +#define NEWLINE_DEFAULT 2 +#define PARENS_NEST_LIMIT 250 +#define MATCH_LIMIT_DEPTH MATCH_LIMIT +#define MAX_VARLOOKBEHIND 255 + +/* +#define _pcre2_default_compile_context_ vms_pcre2_def_cmpl_cntxt_ +#define _pcre2_default_convert_context_ vms_pcre2_def_cnvrt_cntxt_ +#define pcre2_set_compile_extra_options_8 vms_pcre2_set_cmpl_ext_opt_8 +#define pcre2_set_compile_recursion_guard_8 vms_pcre2_set_cmpl_rcrs_grd_8 +#define pcre2_set_recursion_memory_management_8 vms_pcre2_set_rcrs_mem_mng_8 +#define pcre2_match_data_create_from_pattern_8 vms_pcre2_m_d_crt_fr_patt_8 +#define pcre2_get_match_data_heapframes_size_8 vms_pcre2_get_m_d_hpfram_s_8 +#define pcre2_serialize_get_number_of_codes_8 vms_pcre2_ser_get_n_of_cod_8 +#define pcre2_substring_nametable_scan_8 vms_pcre2_substr_nmtab_scan_8 +#define pcre2_substring_length_bynumber_8 vms_pcre2_substr_len_bynum_8 +#define pcre2_substring_number_from_name_8 vms_pcre2_substr_num_f_nam_8 +*/ + +#define HAVE_BZLIB_H 1 +#define SUPPORT_LIBBZ2 1 + +#define HAVE_ZLIB_H 1 +#define SUPPORT_LIBZ 1 +$! +$! +$WRITE SYS$OUTPUT "config.h created" +$! +$!Creating Descrip.mms in each directory needed +$! +$! +$COPY SYS$INPUT [.SRC]DESCRIP.MMS +# (c) Alexey Chupahin 09-APR-2024 +# OpenVMS 7.3-2, DEC 2000 mod.300 +# OpenVMS 8.3, Digital PW 600au +# OpenVMS 8.4, Compaq DS10L +# OpenVMS 8.3, HP rx1620 + + +.FIRST + DEF PCRE2 [] + + +CC=cc +CFLAGS = /INCLUDE=([],[-],[-.VMS],ZLIB,BZ2LIB) \ + /DEFINE=(HAVE_CONFIG_H,PCRE2_CODE_UNIT_WIDTH=8)\ + /OPTIMIZE=(INLINE=SPEED) \ + /DEB + +OBJ=\ +PCRE2POSIX.OBJ,\ +PCRE2_AUTO_POSSESS.OBJ,\ +PCRE2_CHKDINT.OBJ,\ +PCRE2_CHARTABLES.OBJ,\ +PCRE2_COMPILE.OBJ,\ +PCRE2_CONFIG.OBJ,\ +PCRE2_CONTEXT.OBJ,\ +PCRE2_CONVERT.OBJ,\ +PCRE2_DFA_MATCH.OBJ,\ +PCRE2_ERROR.OBJ,\ +PCRE2_EXTUNI.OBJ,\ +PCRE2_FIND_BRACKET.OBJ,\ +PCRE2_JIT_COMPILE.OBJ,\ +PCRE2_MAKETABLES.OBJ,\ +PCRE2_MATCH.OBJ,\ +PCRE2_MATCH_DATA.OBJ,\ +PCRE2_NEWLINE.OBJ,\ +PCRE2_ORD2UTF.OBJ,\ +PCRE2_PATTERN_INFO.OBJ,\ +PCRE2_SCRIPT_RUN.OBJ,\ +PCRE2_SERIALIZE.OBJ,\ +PCRE2_STRING_UTILS.OBJ,\ +PCRE2_STUDY.OBJ,\ +PCRE2_SUBSTITUTE.OBJ,\ +PCRE2_SUBSTRING.OBJ,\ +PCRE2_TABLES.OBJ,\ +PCRE2_UCD.OBJ,\ +PCRE2_VALID_UTF.OBJ,\ +PCRE2_XCLASS.OBJ + +ALL : PCRE2.H PCRE2.OLB PCRE2$SHR.EXE PCRE2DEMO.EXE PCRE2GREP.EXE + $! + +PCRE2$SHR.EXE : PCRE2.OLB + LINK/SHARE=PCRE2$SHR.EXE PCRE2:PCRE2.OLB/LIB,PCRE2:PCRE2$DEF.OPT/OPT + +PCRE2.OLB : $(OBJ) + LIB/CREA PCRE2.OLB $(OBJ) + +PCRE2DEMO.EXE : PCRE2DEMO.OBJ + LINK/EXE=PCRE2DEMO PCRE2DEMO,PCRE2:PCRE2.OPT/OPT + +PCRE2GREP.EXE : PCRE2GREP.OBJ + LINK/EXE=PCRE2GREP PCRE2GREP,PCRE2:PCRE2.OPT/OPT,ZLIB:ZLIB.OPT/OPT,BZ2LIB:BZIP2.OPT/OPT + +PCRE2.H : PCRE2.H_GENERIC + WRITE SYS$OUTPUT "Patching PCRE2.H" + COPY/CONCAT [-.VMS]PCRE2.H_PATCH,[]PCRE2.H_GENERIC PCRE2.H + +PCRE2_CHARTABLES.OBJ : PCRE2_CHARTABLES.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_CHARTABLES.C : PCRE2_CHARTABLES.C_DIST + COPY PCRE2_CHARTABLES.C_DIST PCRE2_CHARTABLES.C + +PCRE2DEMO.OBJ : PCRE2DEMO.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2GREP.OBJ : PCRE2GREP.C + $(CC) $(CFLAGS) /WARN=DIS=ALL $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2POSIX.OBJ : PCRE2POSIX.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2POSIX_TEST.OBJ : PCRE2POSIX_TEST.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2TEST.OBJ : PCRE2TEST.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_AUTO_POSSESS.OBJ : PCRE2_AUTO_POSSESS.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_CHKDINT.OBJ : PCRE2_CHKDINT.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_COMPILE.OBJ : PCRE2_COMPILE.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_COMPILE.OBJ : PCRE2_COMPILE_CLASS.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_CONFIG.OBJ : PCRE2_CONFIG.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_CONTEXT.OBJ : PCRE2_CONTEXT.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_CONVERT.OBJ : PCRE2_CONVERT.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_DFA_MATCH.OBJ : PCRE2_DFA_MATCH.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_DFTABLES.OBJ : PCRE2_DFTABLES.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_ERROR.OBJ : PCRE2_ERROR.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_EXTUNI.OBJ : PCRE2_EXTUNI.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_FIND_BRACKET.OBJ : PCRE2_FIND_BRACKET.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_FUZZSUPPORT.OBJ : PCRE2_FUZZSUPPORT.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_JIT_COMPILE.OBJ : PCRE2_JIT_COMPILE.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_JIT_MATCH.OBJ : PCRE2_JIT_MATCH.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_JIT_MISC.OBJ : PCRE2_JIT_MISC.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_JIT_TEST.OBJ : PCRE2_JIT_TEST.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_MAKETABLES.OBJ : PCRE2_MAKETABLES.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_MATCH.OBJ : PCRE2_MATCH.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_MATCH_DATA.OBJ : PCRE2_MATCH_DATA.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_NEWLINE.OBJ : PCRE2_NEWLINE.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_ORD2UTF.OBJ : PCRE2_ORD2UTF.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_PATTERN_INFO.OBJ : PCRE2_PATTERN_INFO.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_PRINTINT.OBJ : PCRE2_PRINTINT.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_SCRIPT_RUN.OBJ : PCRE2_SCRIPT_RUN.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_SERIALIZE.OBJ : PCRE2_SERIALIZE.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_STRING_UTILS.OBJ : PCRE2_STRING_UTILS.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_STUDY.OBJ : PCRE2_STUDY.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_SUBSTITUTE.OBJ : PCRE2_SUBSTITUTE.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_SUBSTRING.OBJ : PCRE2_SUBSTRING.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_TABLES.OBJ : PCRE2_TABLES.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_UCD.OBJ : PCRE2_UCD.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_UCPTABLES.OBJ : PCRE2_UCPTABLES.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_VALID_UTF.OBJ : PCRE2_VALID_UTF.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_XCLASS.OBJ : PCRE2_XCLASS.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +$! +$! +$WRITE SYS$OUTPUT "DESCRIP.MMS's have been created" +$WRITE SYS$OUTPUT " " +$WRITE SYS$OUTPUT " " +$WRITE SYS$OUTPUT "Now you can type @BUILD " +$! +$EXIT: +$DEFINE SYS$ERROR _NLA0: +$DEFINE SYS$OUTPUT _NLA0: +$DEL TEST.C;* +$DEL TEST.OBJ;* +$DEL TEST.EXE;* +$DEL TEST.OPT;* +$DEAS SYS$ERROR +$DEAS SYS$OUTPUT + diff --git a/libpcre/vms/openvms_readme.txt b/libpcre/vms/openvms_readme.txt new file mode 100644 index 000000000..7978a758c --- /dev/null +++ b/libpcre/vms/openvms_readme.txt @@ -0,0 +1,20 @@ +This is directory for OpenVMS support, +provided shared and static library, +pcre2grep utility also. + +Requires: +bzip2 library : http://vaxvms.org/clamav/ +zlib library : http://vaxvms.org/libsdl/required.html + + +To build the library please: + +@[.VMS]CONFIGURE.COM +@BUILD + +After build, PCRE2$STARTUP.COM has been created +it should be started before use (good place from LOGIN.COM) + +Feel free to contact: +alexey@vaxman.de +Alexey Chupahin diff --git a/libpcre/vms/pcre2.h_patch b/libpcre/vms/pcre2.h_patch new file mode 100644 index 000000000..01347343f --- /dev/null +++ b/libpcre/vms/pcre2.h_patch @@ -0,0 +1,12 @@ +#define _pcre2_default_compile_context_ vms_pcre2_def_cmpl_cntxt_ +#define _pcre2_default_convert_context_ vms_pcre2_def_cnvrt_cntxt_ +#define pcre2_set_compile_extra_options_8 vms_pcre2_set_cmpl_ext_opt_8 +#define pcre2_set_compile_recursion_guard_8 vms_pcre2_set_cmpl_rcrs_grd_8 +#define pcre2_set_recursion_memory_management_8 vms_pcre2_set_rcrs_mem_mng_8 +#define pcre2_match_data_create_from_pattern_8 vms_pcre2_m_d_crt_fr_patt_8 +#define pcre2_get_match_data_heapframes_size_8 vms_pcre2_get_m_d_hpfram_s_8 +#define pcre2_serialize_get_number_of_codes_8 vms_pcre2_ser_get_n_of_cod_8 +#define pcre2_substring_nametable_scan_8 vms_pcre2_substr_nmtab_scan_8 +#define pcre2_substring_length_bynumber_8 vms_pcre2_substr_len_bynum_8 +#define pcre2_substring_number_from_name_8 vms_pcre2_substr_num_f_nam_8 +#define pcre2_set_max_pattern_compiled_length vms_pcre2_set_max_pat_cmpl_len diff --git a/libpcre/vms/stdint.h b/libpcre/vms/stdint.h new file mode 100644 index 000000000..3a5a5a295 --- /dev/null +++ b/libpcre/vms/stdint.h @@ -0,0 +1,9 @@ +#ifndef MY_VMS_STDINT +#define MY_VMS_STDINT +#include +#include +#include +#define SIZE_MAX UINT_MAX +#define UINT32_MAX 4294967295u +#define UINT16_MAX (65535) +#endif