blob: 5b5f6424b151c713d18cd9dfc41e0bd94c72965b [file] [log] [blame] [edit]
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2024 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains the mode-dependent code which is used by pcre2test.c.
It is #included in pcre2test.c at each supported code unit width, with
PCRE2_SUFFIX set appropriately, just like the functions that comprise the
library. */
/* ------- Macros for hiding the bit width of this file's members ---------- */
#define pbuffer PCRE2_SUFFIX(pbuffer)
#define pbuffer_size G(pbuffer,_size)
#if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
#define utf_to_ord G(G(utf,PCRE2_CODE_UNIT_WIDTH),_to_ord)
#endif
#define compiled_code PCRE2_SUFFIX(compiled_code_)
#define general_context PCRE2_SUFFIX(general_context_)
#define general_context_copy PCRE2_SUFFIX(general_context_copy_)
#define pat_context PCRE2_SUFFIX(pat_context_)
#define default_pat_context PCRE2_SUFFIX(default_pat_context_)
#define con_context PCRE2_SUFFIX(con_context_)
#define default_con_context PCRE2_SUFFIX(default_con_context_)
#define dat_context PCRE2_SUFFIX(dat_context_)
#define default_dat_context PCRE2_SUFFIX(default_dat_context_)
#define match_data PCRE2_SUFFIX(match_data_)
#define jit_stack PCRE2_SUFFIX(jit_stack_)
#define jit_stack_size PCRE2_SUFFIX(jit_stack_size_)
#define patstack PCRE2_SUFFIX(patstack_)
#define patstacknext PCRE2_SUFFIX(patstacknext_)
#define jit_callback PCRE2_SUFFIX(jit_callback_)
#define pcre2_strcmp_c8 PCRE2_SUFFIX(pcre2_strcmp_c8_)
#define pcre2_strlen PCRE2_SUFFIX(pcre2_strlen_)
#define pchars PCRE2_SUFFIX(pchars_)
#define ptrunc PCRE2_SUFFIX(ptrunc_)
#define config_str PCRE2_SUFFIX(config_str_)
#define check_modifier PCRE2_SUFFIX(check_modifier_)
#define decode_modifiers PCRE2_SUFFIX(decode_modifiers_)
#define pattern_info PCRE2_SUFFIX(pattern_info_)
#define show_memory_info PCRE2_SUFFIX(show_memory_info_)
#define show_framesize PCRE2_SUFFIX(show_framesize_)
#define show_heapframes_size PCRE2_SUFFIX(show_heapframes_size_)
#define print_error_message_file PCRE2_SUFFIX(print_error_message_file_)
#define print_error_message PCRE2_SUFFIX(print_error_message_)
#define callout_enumerate_function PCRE2_SUFFIX(callout_enumerate_function_)
#define callout_enumerate_function_void PCRE2_SUFFIX(callout_enumerate_function_void_)
#define callout_enumerate_function_fail PCRE2_SUFFIX(callout_enumerate_function_fail_)
#define show_pattern_info PCRE2_SUFFIX(show_pattern_info_)
#define serial_error PCRE2_SUFFIX(serial_error_)
#define process_command PCRE2_SUFFIX(process_command_)
#define process_pattern PCRE2_SUFFIX(process_pattern_)
#define have_active_pattern PCRE2_SUFFIX(have_active_pattern_)
#define free_active_pattern PCRE2_SUFFIX(free_active_pattern_)
#define check_match_limit PCRE2_SUFFIX(check_match_limit_)
#define substitute_callout_function PCRE2_SUFFIX(substitute_callout_function_)
#define substitute_case_callout_function PCRE2_SUFFIX(substitute_case_callout_function_)
#define callout_function PCRE2_SUFFIX(callout_function_)
#define copy_and_get PCRE2_SUFFIX(copy_and_get_)
#define process_data PCRE2_SUFFIX(process_data_)
#define init_globals PCRE2_SUFFIX(init_globals_)
#define free_globals PCRE2_SUFFIX(free_globals_)
#define unittest PCRE2_SUFFIX(unittest_)
/* ---------------------- Mode-dependent variables ------------------------- */
static pcre2_code *compiled_code = NULL;
static pcre2_general_context *general_context = NULL, *general_context_copy = NULL;
static pcre2_compile_context *pat_context = NULL, *default_pat_context = NULL;
static pcre2_convert_context *con_context = NULL, *default_con_context = NULL;
static pcre2_match_context *dat_context = NULL, *default_dat_context = NULL;
static pcre2_match_data *match_data = NULL;
static pcre2_jit_stack *jit_stack = NULL;
static size_t jit_stack_size = 0;
static pcre2_code *patstack[PATSTACKSIZE];
static int patstacknext = 0;
/*************************************************
* JIT memory callback *
*************************************************/
static pcre2_jit_stack*
jit_callback(void *arg)
{
jit_was_used = TRUE;
return (pcre2_jit_stack *)arg;
}
/*************************************************
* Compare zero-terminated PCRE2 & 8-bit strings *
*************************************************/
static int
pcre2_strcmp_c8(PCRE2_SPTR str1, const char *str2)
{
PCRE2_UCHAR c1, c2;
while (*str1 != '\0' || *str2 != '\0')
{
c1 = *str1++;
c2 = *str2++;
if (c1 != c2) return ((c1 > c2) << 1) - 1;
}
return 0;
}
/*************************************************
* Find the length of a PCRE2 string *
*************************************************/
static size_t
pcre2_strlen(PCRE2_SPTR str)
{
size_t c = 0;
while (*str++ != 0) c++;
return c;
}
/*************************************************
* Print character string *
*************************************************/
/* Must handle Unicode strings in UTF mode. Yields number of characters printed.
For printing *MARK strings, a negative length is given, indicating that the
length is in the first code unit. If handed a NULL file, this function just
counts chars without printing (because pchar() does that). */
static int pchars(PCRE2_SPTR p, ptrdiff_t length, BOOL utf, FILE *f)
{
#if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
PCRE2_SPTR end;
uint32_t c = 0;
int yield = 0;
if (length < 0) length = *p++;
end = p + length;
while (length-- > 0)
{
if (utf)
{
int rc = utf_to_ord(p, end, &c);
if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
{
length -= rc - 1;
p += rc;
yield += pchar(c, utf, f);
continue;
}
}
c = *p++;
yield += pchar(c, utf, f);
}
return yield;
#else
int yield = 0;
if (length < 0) length = *p++;
while (length-- > 0)
{
uint32_t c = *p++;
yield += pchar(c, utf, f);
}
return yield;
#endif
}
/*************************************************
* Print truncated character string *
*************************************************/
/* Must handle Unicode strings in UTF mode. Passed the total input string, and
the offset to print from/to. If left is true, prints up to the offset,
truncated; otherwise prints from the offset to the right, truncated. */
#if PCRE2_CODE_UNIT_WIDTH == 8
static void ptrunc_8(PCRE2_SPTR p, size_t p_len, size_t offset, BOOL left,
BOOL utf, FILE *f)
{
PCRE2_SPTR start = p + offset;
PCRE2_SPTR end = p + offset;
size_t printed = 0;
if (left)
{
while (start > p && printed < 10)
{
printed++;
start--;
if (utf)
{ while(start > p && (*start & 0xc0u) == 0x80u) start--; }
}
}
else
{
while (end < p + p_len && printed < 10)
{
printed++;
end++;
if (utf)
{ while(end < p + p_len && (*end & 0xc0u) == 0x80u) end++; }
}
}
if (left && start > p) fprintf(f, "...");
for (; start < end; start++) fprintf(f, "%c", CHAR_OUTPUT(*start));
if (!left && end < p + p_len) fprintf(f, "...");
}
#elif PCRE2_CODE_UNIT_WIDTH == 16
static void ptrunc_16(PCRE2_SPTR p, size_t p_len, size_t offset, BOOL left,
BOOL utf, FILE *f)
{
PCRE2_SPTR start = p + offset;
PCRE2_SPTR end = p + offset;
size_t printed = 0;
if (left)
{
while (start > p && printed < 10)
{
printed++;
start--;
if (utf)
{ while(start > p && (*start & 0xfc00u) == 0xdc00u) start--; }
}
}
else
{
while (end < p + p_len && printed < 10)
{
printed++;
end++;
if (utf)
{ while(end < p + p_len && (*end & 0xfc00u) == 0xdc00u) end++; }
}
}
if (left && start > p) fprintf(f, "...");
while (start < end)
{
uint32_t c;
int rc = utf16_to_ord(start, end, &c);
if (rc < 0) c = *start++;
else start += rc;
if (c > 0xff || (utf && c > 0x7f))
{
uint8_t u8buff[6];
int clen = ord_to_utf8(c, u8buff);
fprintf(f, "%.*s", clen, u8buff);
continue;
}
fputc((int)c, f);
}
if (!left && end < p + p_len) fprintf(f, "...");
}
#elif PCRE2_CODE_UNIT_WIDTH == 32
static void ptrunc_32(PCRE2_SPTR p, size_t p_len, size_t offset, BOOL left,
BOOL utf, FILE *f)
{
PCRE2_SPTR start = p + offset;
PCRE2_SPTR end = p + offset;
if (left)
{
start -= (offset > 10)? 10 : offset;
}
else
{
end += (p + p_len - end > 10)? 10 : p + p_len - end;
}
if (left && start > p) fprintf(f, "...");
while (start < end)
{
uint32_t c = *start++;
if (c > 0xff || (utf && c > 0x7f))
{
uint8_t u8buff[6];
int clen = ord_to_utf8(c, u8buff);
fprintf(f, "%.*s", clen, u8buff);
continue;
}
fputc((int)c, f);
}
if (!left && end < p + p_len) fprintf(f, "...");
}
#endif
#if PCRE2_CODE_UNIT_WIDTH == 16
/*************************************************
* Convert string to 16-bit *
*************************************************/
/* In UTF mode the input is always interpreted as a string of UTF-8 bytes using
the original UTF-8 definition of RFC 2279, which allows for up to 6 bytes, and
code values from 0 to 0x7fffffff. However, values greater than the later UTF
limit of 0x10ffff cause an error. In non-UTF mode the input is interpreted as
UTF-8 if the utf8_input modifier is set, but an error is generated for values
greater than 0xffff.
If all the input bytes are ASCII, the space needed for a 16-bit string is
exactly double the 8-bit size. Otherwise, the size needed for a 16-bit string
is no more than double, because up to 0xffff uses no more than 3 bytes in UTF-8
but possibly 4 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes
in UTF-16. The result is always left in pbuffer16. Impose a minimum size to
save repeated re-sizing.
Note that this function does not object to surrogate values. This is
deliberate; it makes it possible to construct UTF-16 strings that are invalid,
for the purpose of testing that they are correctly faulted.
Arguments:
p points to a byte string
utf true in UTF mode
lenptr points to number of bytes in the string (excluding trailing zero)
Returns: 0 on success, with the length updated to the number of 16-bit
data items used (excluding the trailing zero)
OR -1 if a UTF-8 string is malformed
OR -2 if a value > 0x10ffff is encountered in UTF mode
OR -3 if a value > 0xffff is encountered when not in UTF mode
*/
static int
to16(uint8_t *p, int utf, PCRE2_SIZE *lenptr)
{
uint16_t *pp;
PCRE2_SIZE len = *lenptr;
if (pbuffer16_size < 2*len + 2)
{
if (pbuffer16 != NULL) free(pbuffer16);
pbuffer16_size = 2*len + 2;
if (pbuffer16_size < 4096) pbuffer16_size = 4096;
pbuffer16 = (uint16_t *)malloc(pbuffer16_size);
if (pbuffer16 == NULL)
{
fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer16\n",
pbuffer16_size);
exit(1);
}
}
pp = pbuffer16;
if (!utf && (pat_patctl.control & CTL_UTF8_INPUT) == 0)
{
for (; len > 0; len--) *pp++ = *p++;
}
else while (len > 0)
{
uint32_t c;
const uint8_t *end = p + len;
int chlen = utf8_to_ord(p, end, &c);
if (chlen <= 0) return -1;
if (!utf && c > 0xffff) return -3;
if (c > 0x10ffff) return -2;
p += chlen;
len -= chlen;
if (c < 0x10000) *pp++ = c; else
{
c -= 0x10000;
*pp++ = 0xd800 | (c >> 10);
*pp++ = 0xdc00 | (c & 0x3ff);
}
}
*pp = 0;
*lenptr = pp - pbuffer16;
return 0;
}
#endif /* PCRE2_CODE_UNIT_WIDTH == 16 */
#if PCRE2_CODE_UNIT_WIDTH == 32
/*************************************************
* Convert string to 32-bit *
*************************************************/
/* In UTF mode the input is always interpreted as a string of UTF-8 bytes using
the original UTF-8 definition of RFC 2279, which allows for up to 6 bytes, and
code values from 0 to 0x7fffffff. However, values greater than the later UTF
limit of 0x10ffff cause an error.
In non-UTF mode the input is interpreted as UTF-8 if the utf8_input modifier
is set, and no limit is imposed. There is special interpretation of the 0xff
byte (which is illegal in UTF-8) in this case: it causes the top bit of the
next character to be set. This provides a way of generating 32-bit characters
greater than 0x7fffffff.
If all the input bytes are ASCII, the space needed for a 32-bit string is
exactly four times the 8-bit size. Otherwise, the size needed for a 32-bit
string is no more than four times, because the number of characters must be
less than the number of bytes. The result is always left in pbuffer32. Impose a
minimum size to save repeated re-sizing.
Note that this function does not object to surrogate values. This is
deliberate; it makes it possible to construct UTF-32 strings that are invalid,
for the purpose of testing that they are correctly faulted.
Arguments:
p points to a byte string
utf true in UTF mode
lenptr points to number of bytes in the string (excluding trailing zero)
Returns: 0 on success, with the length updated to the number of 32-bit
data items used (excluding the trailing zero)
OR -1 if a UTF-8 string is malformed
OR -2 if a value > 0x10ffff is encountered in UTF mode
*/
static int
to32(uint8_t *p, int utf, PCRE2_SIZE *lenptr)
{
uint32_t *pp;
PCRE2_SIZE len = *lenptr;
if (pbuffer32_size < 4*len + 4)
{
if (pbuffer32 != NULL) free(pbuffer32);
pbuffer32_size = 4*len + 4;
if (pbuffer32_size < 8192) pbuffer32_size = 8192;
pbuffer32 = (uint32_t *)malloc(pbuffer32_size);
if (pbuffer32 == NULL)
{
fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer32\n",
pbuffer32_size);
exit(1);
}
}
pp = pbuffer32;
if (!utf && (pat_patctl.control & CTL_UTF8_INPUT) == 0)
{
for (; len > 0; len--) *pp++ = *p++;
}
else while (len > 0)
{
int chlen;
uint32_t c;
uint32_t topbit = 0;
const uint8_t *end = p + len;
if (!utf && *p == 0xff && len > 1)
{
topbit = 0x80000000u;
p++;
len--;
}
chlen = utf8_to_ord(p, end, &c);
if (chlen <= 0) return -1;
if (utf && c > 0x10ffff) return -2;
p += chlen;
len -= chlen;
*pp++ = c | topbit;
}
*pp = 0;
*lenptr = pp - pbuffer32;
return 0;
}
#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
/*************************************************
* Read a string from pcre2_config() *
*************************************************/
/* Read out a version string from pcre2_config(), transcoding it into an
8-bit buffer.
Arguments:
what the item to read
where the 8-bit buffer to receive the string
*/
static void
config_str(uint32_t what, char *where)
{
int r1, r2;
PCRE2_UCHAR buf[VERSION_SIZE];
r1 = pcre2_config(what, NULL);
r2 = pcre2_config(what, buf);
if (r1 < 0 || r1 != r2 || r1 >= VERSION_SIZE)
{
fprintf(stderr, "pcre2test: Error in pcre2_config(%d)\n", what);
exit(1);
}
while (r1-- > 0) where[r1] = (char)buf[r1];
}
/*************************************************
* Check a modifier and find its field *
*************************************************/
/* This function is called when a modifier has been identified. We check that
it is allowed here and find the field that is to be changed.
Arguments:
m the modifier list entry
ctx CTX_PAT => pattern context
CTX_POPPAT => pattern context for popped pattern
CTX_DEFPAT => default pattern context
CTX_DAT => data context
CTX_DEFDAT => default data context
pctl point to pattern control block
dctl point to data control block
c a single character or 0
Returns: a field pointer or NULL
*/
static void *
check_modifier(modstruct *m, int ctx, patctl *pctl, datctl *dctl, uint32_t c)
{
void *field = NULL;
PCRE2_SIZE offset = m->offset;
if (restrict_for_perl_test) switch(m->which)
{
case MOD_PNDP:
case MOD_PATP:
case MOD_DATP:
case MOD_PDP:
break;
default:
fprintf(outfile, "** \"%s\" is not allowed in a Perl-compatible test\n",
m->name);
return NULL;
}
switch (m->which)
{
case MOD_CTC: /* Compile context modifier */
if (ctx == CTX_DEFPAT) field = default_pat_context;
else if (ctx == CTX_PAT) field = pat_context;
break;
case MOD_CTM: /* Match context modifier */
if (ctx == CTX_DEFDAT) field = default_dat_context;
else if (ctx == CTX_DAT) field = dat_context;
break;
case MOD_DAT: /* Data line modifier */
case MOD_DATP: /* Allowed for Perl test */
if (dctl != NULL) field = dctl;
break;
case MOD_PAT: /* Pattern modifier */
case MOD_PATP: /* Allowed for Perl test */
if (pctl != NULL) field = pctl;
break;
case MOD_PD: /* Pattern or data line modifier */
case MOD_PDP: /* Ditto, allowed for Perl test */
case MOD_PND: /* Ditto, but not default pattern */
case MOD_PNDP: /* Ditto, allowed for Perl test */
if (dctl != NULL) field = dctl;
else if (pctl != NULL && (m->which == MOD_PD || m->which == MOD_PDP ||
ctx != CTX_DEFPAT))
field = pctl;
break;
}
if (field == NULL)
{
if (c == 0)
fprintf(outfile, "** \"%s\" is not valid here\n", m->name);
else
fprintf(outfile, "** /%c is not valid here\n", c);
return NULL;
}
return (char *)field + offset;
}
/*************************************************
* Decode a modifier list *
*************************************************/
/* A pointer to a control block is NULL when called in cases when that block is
not relevant. They are never all relevant in one call. At least one of patctl
and datctl is NULL. The second argument specifies which context to use for
modifiers that apply to contexts.
Arguments:
p point to modifier string
ctx CTX_PAT => pattern context
CTX_POPPAT => pattern context for popped pattern
CTX_DEFPAT => default pattern context
CTX_DAT => data context
CTX_DEFDAT => default data context
pctl point to pattern control block
dctl point to data control block
Returns: TRUE if successful decode, FALSE otherwise
*/
static BOOL
decode_modifiers(uint8_t *p, int ctx, patctl *pctl, datctl *dctl)
{
uint8_t *ep, *pp;
long li;
unsigned long uli;
BOOL first = TRUE;
for (;;)
{
void *field;
modstruct *m;
BOOL off = FALSE;
unsigned int i;
size_t len;
int index;
char *endptr;
/* Skip white space and commas. */
while (isspace(*p) || *p == ',') p++;
if (*p == 0) break;
/* Find the end of the item; lose trailing whitespace at end of line. */
for (ep = p; *ep != 0 && *ep != ','; ep++);
if (*ep == 0)
{
while (ep > p && isspace(ep[-1])) ep--;
*ep = 0;
}
/* Remember if the first character is '-'. */
if (*p == '-')
{
off = TRUE;
p++;
}
/* Find the length of a full-length modifier name, and scan for it. */
pp = p;
while (pp < ep && *pp != '=') pp++;
index = scan_modifiers(p, pp - p);
/* If the first modifier is unrecognized, try to interpret it as a sequence
of single-character abbreviated modifiers. None of these modifiers have any
associated data. They just set options or control bits. */
if (index < 0)
{
uint32_t cc;
uint8_t *mp = p;
if (!first)
{
fprintf(outfile, "** Unrecognized modifier \"%.*s\"\n", (int)(ep-p), p);
if (ep - p == 1)
fprintf(outfile, "** Single-character modifiers must come first\n");
return FALSE;
}
first = FALSE;
for (cc = *p; cc != ',' && cc != '\n' && cc != 0; cc = *(++p))
{
for (i = 0; i < C1MODLISTCOUNT; i++)
if (cc == c1modlist[i].onechar) break;
if (i >= C1MODLISTCOUNT)
{
fprintf(outfile, "** Unrecognized modifier '%c' in modifier string "
"\"%.*s\"\n", *p, (int)(ep-mp), mp);
return FALSE;
}
if (c1modlist[i].index >= 0)
{
index = c1modlist[i].index;
}
else
{
index = scan_modifiers((const uint8_t *)(c1modlist[i].fullname),
strlen(c1modlist[i].fullname));
if (index < 0)
{
fprintf(outfile, "** Internal error: single-character equivalent "
"modifier \"%s\" not found\n", c1modlist[i].fullname);
return FALSE;
}
c1modlist[i].index = index; /* Cache for next time */
}
field = check_modifier(modlist + index, ctx, pctl, dctl, *p);
if (field == NULL) return FALSE;
/* /x is a special case; a second appearance changes PCRE2_EXTENDED to
PCRE2_EXTENDED_MORE. */
if (cc == 'x' && (*((uint32_t *)field) & PCRE2_EXTENDED) != 0)
{
*((uint32_t *)field) &= ~PCRE2_EXTENDED;
*((uint32_t *)field) |= PCRE2_EXTENDED_MORE;
}
else
*((uint32_t *)field) |= modlist[index].value;
}
continue; /* With the next (fullname) modifier */
}
/* We have a match on a full-name modifier. Check for the existence of data
when needed. */
m = modlist + index; /* Save typing */
if (m->type != MOD_CTL && m->type != MOD_OPT && m->type != MOD_OPTMZ &&
(m->type != MOD_IND || *pp == '='))
{
if (*pp++ != '=')
{
fprintf(outfile, "** '=' expected after \"%s\"\n", m->name);
return FALSE;
}
if (off)
{
fprintf(outfile, "** '-' is not valid for \"%s\"\n", m->name);
return FALSE;
}
}
/* These on/off types have no data. */
else if (*pp != ',' && *pp != '\n' && *pp != ' ' && *pp != 0)
{
fprintf(outfile, "** Unrecognized modifier '%.*s'\n", (int)(ep-p), p);
return FALSE;
}
/* Set the data length for those types that have data. Then find the field
that is to be set. If check_modifier() returns NULL, it has already output an
error message. */
len = ep - pp;
field = check_modifier(m, ctx, pctl, dctl, 0);
if (field == NULL) return FALSE;
/* Process according to data type. */
switch (m->type)
{
case MOD_CTL:
case MOD_OPT:
if (off) *((uint32_t *)field) &= ~m->value;
else *((uint32_t *)field) |= m->value;
break;
case MOD_OPTMZ:
pcre2_set_optimize(field, m->value);
break;
case MOD_BSR:
if (len == 7 && strncmpic(pp, (const uint8_t *)"default", 7) == 0)
{
#ifdef BSR_ANYCRLF
*((uint16_t *)field) = PCRE2_BSR_ANYCRLF;
#else
*((uint16_t *)field) = PCRE2_BSR_UNICODE;
#endif
if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 &= ~CTL2_BSR_SET;
else dctl->control2 &= ~CTL2_BSR_SET;
}
else
{
if (len == 7 && strncmpic(pp, (const uint8_t *)"anycrlf", 7) == 0)
*((uint16_t *)field) = PCRE2_BSR_ANYCRLF;
else if (len == 7 && strncmpic(pp, (const uint8_t *)"unicode", 7) == 0)
*((uint16_t *)field) = PCRE2_BSR_UNICODE;
else goto INVALID_VALUE;
if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 |= CTL2_BSR_SET;
else dctl->control2 |= CTL2_BSR_SET;
}
pp = ep;
break;
case MOD_CHR: /* A single character */
*((uint32_t *)field) = *pp++;
break;
case MOD_CON: /* A convert type/options list */
for (;; pp++)
{
uint8_t *colon = (uint8_t *)strchr((const char *)pp, ':');
len = ((colon != NULL && colon < ep)? colon:ep) - pp;
for (i = 0; i < convertlistcount; i++)
{
if (strncmpic(pp, (const uint8_t *)convertlist[i].name, len) == 0)
{
if (*((uint32_t *)field) == CONVERT_UNSET)
*((uint32_t *)field) = convertlist[i].option;
else
*((uint32_t *)field) |= convertlist[i].option;
break;
}
}
if (i >= convertlistcount) goto INVALID_VALUE;
pp += len;
if (*pp != ':') break;
}
break;
case MOD_IN2: /* One or two unsigned integers */
if (!isdigit(*pp)) goto INVALID_VALUE;
uli = strtoul((const char *)pp, &endptr, 10);
if (U32OVERFLOW(uli)) goto INVALID_VALUE;
((uint32_t *)field)[0] = (uint32_t)uli;
if (*endptr == ':')
{
uli = strtoul((const char *)endptr+1, &endptr, 10);
if (U32OVERFLOW(uli)) goto INVALID_VALUE;
((uint32_t *)field)[1] = (uint32_t)uli;
}
else ((uint32_t *)field)[1] = 0;
pp = (uint8_t *)endptr;
break;
/* PCRE2_SIZE_MAX is usually SIZE_MAX, which may be greater, equal to, or
less than ULONG_MAX. So first test for overflowing the long int, and then
test for overflowing PCRE2_SIZE_MAX if it is smaller than ULONG_MAX. */
case MOD_SIZ: /* PCRE2_SIZE value */
if (!isdigit(*pp)) goto INVALID_VALUE;
uli = strtoul((const char *)pp, &endptr, 10);
if (uli == ULONG_MAX) goto INVALID_VALUE;
#if ULONG_MAX > PCRE2_SIZE_MAX
if (uli > PCRE2_SIZE_MAX) goto INVALID_VALUE;
#endif
*((PCRE2_SIZE *)field) = (PCRE2_SIZE)uli;
pp = (uint8_t *)endptr;
break;
case MOD_IND: /* Unsigned integer with default */
if (len == 0)
{
*((uint32_t *)field) = (uint32_t)(m->value);
break;
}
PCRE2_FALLTHROUGH /* Fall through */
case MOD_INT: /* Unsigned integer */
if (!isdigit(*pp)) goto INVALID_VALUE;
uli = strtoul((const char *)pp, &endptr, 10);
if (U32OVERFLOW(uli)) goto INVALID_VALUE;
*((uint32_t *)field) = (uint32_t)uli;
pp = (uint8_t *)endptr;
break;
case MOD_INS: /* Signed integer */
if (!isdigit(*pp) && *pp != '-') goto INVALID_VALUE;
li = strtol((const char *)pp, &endptr, 10);
if (S32OVERFLOW(li)) goto INVALID_VALUE;
*((int32_t *)field) = (int32_t)li;
pp = (uint8_t *)endptr;
break;
case MOD_NL:
for (i = 0; i < sizeof(newlines)/sizeof(char *); i++)
if (len == strlen(newlines[i]) &&
strncmpic(pp, (const uint8_t *)newlines[i], len) == 0) break;
if (i >= sizeof(newlines)/sizeof(char *)) goto INVALID_VALUE;
if (i == 0)
{
pcre2_set_newline(field, NEWLINE_DEFAULT);
if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 &= ~CTL2_NL_SET;
else dctl->control2 &= ~CTL2_NL_SET;
}
else
{
pcre2_set_newline(field, i);
if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 |= CTL2_NL_SET;
else dctl->control2 |= CTL2_NL_SET;
}
pp = ep;
break;
case MOD_NN: /* Name or (signed) number; may be several */
if (isdigit(*pp) || *pp == '-')
{
int ct = MAXCPYGET - 1;
int32_t value;
li = strtol((const char *)pp, &endptr, 10);
if (S32OVERFLOW(li)) goto INVALID_VALUE;
value = (int32_t)li;
field = (char *)field - m->offset + m->value; /* Adjust field ptr */
if (value >= 0) /* Add new number */
{
while (*((int32_t *)field) >= 0 && ct-- > 0) /* Skip previous */
field = (char *)field + sizeof(int32_t);
if (ct <= 0)
{
fprintf(outfile, "** Too many numeric \"%s\" modifiers\n", m->name);
return FALSE;
}
}
*((int32_t *)field) = value;
if (ct > 0) ((int32_t *)field)[1] = -1;
pp = (uint8_t *)endptr;
}
/* Multiple strings are put end to end. */
else
{
char *nn = (char *)field;
if (len > 0) /* Add new name */
{
if (len > MAX_NAME_SIZE)
{
fprintf(outfile, "** Group name in \"%s\" is too long\n", m->name);
return FALSE;
}
while (*nn != 0) nn += strlen(nn) + 1;
if (nn + len + 2 - (char *)field > LENCPYGET)
{
fprintf(outfile, "** Too many characters in named \"%s\" modifiers\n",
m->name);
return FALSE;
}
memcpy(nn, pp, len);
}
nn[len] = 0 ;
nn[len+1] = 0;
pp = ep;
}
break;
case MOD_STR:
if (len + 1 > m->value)
{
fprintf(outfile, "** Overlong value for \"%s\" (max %d code units)\n",
m->name, m->value - 1);
return FALSE;
}
memcpy(field, pp, len);
((uint8_t *)field)[len] = 0;
pp = ep;
break;
}
if (*pp != ',' && *pp != '\n' && *pp != ' ' && *pp != 0)
{
fprintf(outfile, "** Comma expected after modifier item \"%s\"\n", m->name);
return FALSE;
}
p = pp;
if (ctx == CTX_POPPAT &&
(pctl->options != 0 ||
pctl->tables_id != 0 ||
pctl->locale[0] != 0 ||
(pctl->control & NOTPOP_CONTROLS) != 0))
{
fprintf(outfile, "** \"%s\" is not valid here\n", m->name);
return FALSE;
}
}
return TRUE;
INVALID_VALUE:
fprintf(outfile, "** Invalid value in \"%.*s\"\n", (int)(ep-p), p);
return FALSE;
}
/*************************************************
* Get info from a pattern *
*************************************************/
/* A wrapped call to pcre2_pattern_info(), applied to the current compiled
pattern.
Arguments:
what code for the required information
where where to put the answer
unsetok PCRE2_ERROR_UNSET is an "expected" result
Returns: the return from pcre2_pattern_info()
*/
static int
pattern_info(int what, void *where, BOOL unsetok)
{
int rc;
rc = pcre2_pattern_info(compiled_code, what, NULL); /* Exercise the code */
rc = pcre2_pattern_info(compiled_code, what, where);
if (rc >= 0) return 0;
if (rc != PCRE2_ERROR_UNSET || !unsetok)
{
fprintf(outfile, "Error %d from "
"pcre2_pattern_info_" STR(PCRE2_CODE_UNIT_WIDTH) "(%d)\n", rc, what);
}
return rc;
}
/*************************************************
* Show memory usage info for a pattern *
*************************************************/
static void
show_memory_info(void)
{
uint32_t name_count, name_entry_size;
PCRE2_SIZE size, cblock_size, data_size;
cblock_size = sizeof(pcre2_real_code);
(void)pattern_info(PCRE2_INFO_SIZE, &size, FALSE);
(void)pattern_info(PCRE2_INFO_NAMECOUNT, &name_count, FALSE);
(void)pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size, FALSE);
/* The uint32_t variables are cast before multiplying to avoid potential
integer overflow. */
data_size = CU2BYTES((PCRE2_SIZE)name_count * (PCRE2_SIZE)name_entry_size);
fprintf(outfile, "Memory allocation - code size : %" SIZ_FORM "\n", size -
cblock_size - data_size);
if (data_size != 0)
fprintf(outfile, "Memory allocation - data size : %" SIZ_FORM "\n", data_size);
if (pat_patctl.jit != 0)
{
(void)pattern_info(PCRE2_INFO_JITSIZE, &size, FALSE);
fprintf(outfile, "Memory allocation - JIT code : %" SIZ_FORM "\n", size);
}
}
/*************************************************
* Show frame size info for a pattern *
*************************************************/
static void
show_framesize(void)
{
PCRE2_SIZE frame_size;
(void)pattern_info(PCRE2_INFO_FRAMESIZE, &frame_size, FALSE);
fprintf(outfile, "Frame size for pcre2_match(): %" SIZ_FORM "\n", frame_size);
}
/*************************************************
* Show heapframes size info for a match_data *
*************************************************/
static void
show_heapframes_size(void)
{
PCRE2_SIZE heapframes_size;
heapframes_size = pcre2_get_match_data_heapframes_size(match_data);
fprintf(outfile, "Heapframes size in match_data: %" SIZ_FORM "\n",
heapframes_size);
}
/*************************************************
* Get and output an error message *
*************************************************/
static BOOL
print_error_message_file(FILE *file, int errorcode, const char *before,
const char *after, BOOL badcode_ok)
{
int len;
PCRE2_UCHAR buf[128];
len = pcre2_get_error_message(errorcode, buf, sizeof(buf)/sizeof(*buf));
if (len == PCRE2_ERROR_BADDATA && badcode_ok)
{
fprintf(file, "%sPCRE2_ERROR_BADDATA (unknown error number)%s", before,
after);
}
else if (len < 0)
{
fprintf(file, "\n** pcre2test internal error: cannot interpret error "
"number\n** Unexpected return (%d) from pcre2_get_error_message()\n", len);
}
else if ((unsigned)len != pcre2_strlen(buf))
{
fprintf(file, "\n** pcre2test: unexpected length %d from pcre2_get_error_message()\n", len);
return FALSE;
}
else
{
fprintf(file, "%s", before);
pchars(buf, len, FALSE, file);
fprintf(file, "%s", after);
}
return len >= 0;
}
static BOOL
print_error_message(int errorcode, const char *before, const char *after)
{
return print_error_message_file(outfile, errorcode, before, after, FALSE);
}
/*************************************************
* Callback function for callout enumeration *
*************************************************/
/* Testing function to log data inside callout enumeration callbacks.
Argument:
cb pointer to enumerate block
callout_data user data
Returns: 0
*/
static int callout_enumerate_function(pcre2_callout_enumerate_block *cb,
void *callout_data)
{
uint32_t i;
PCRE2_SPTR pattern_string = pbuffer;
BOOL utf = (compiled_code->overall_options & PCRE2_UTF) != 0;
PCRE2_SIZE next_item_length = cb->next_item_length;
(void)callout_data; /* Not currently displayed */
fprintf(outfile, "Callout ");
if (cb->callout_string != NULL)
{
uint32_t delimiter = cb->callout_string[-1];
fprintf(outfile, "%c", CHAR_OUTPUT(delimiter));
pchars(cb->callout_string, cb->callout_string_length, utf, outfile);
for (i = 0; callout_start_delims[i] != 0; i++)
if (delimiter == callout_start_delims[i])
{
delimiter = callout_end_delims[i];
break;
}
fprintf(outfile, "%c ", CHAR_OUTPUT(delimiter));
}
else fprintf(outfile, "%d ", cb->callout_number);
if (next_item_length == 0 && pattern_string[cb->pattern_position] != 0)
next_item_length = 1;
pchars(pattern_string+cb->pattern_position, next_item_length, utf, outfile);
fprintf(outfile, "\n");
return 0;
}
static int callout_enumerate_function_void(pcre2_callout_enumerate_block *cb,
void *callout_data)
{
(void)cb;
(void)callout_data;
return 0;
}
static int callout_enumerate_function_fail(pcre2_callout_enumerate_block *cb,
void *callout_data)
{
(void)cb;
return *(int *)callout_data;
}
/*************************************************
* Show information about a pattern *
*************************************************/
/* This function is called after a pattern has been compiled if any of the
information-requesting controls have been set.
Arguments: none
Returns: PR_OK continue processing next line
PR_SKIP skip to a blank line
PR_ABEND abort the pcre2test run
*/
static int
show_pattern_info(void)
{
int rc;
uint32_t compile_options, overall_options, extra_options;
BOOL utf = (compiled_code->overall_options & PCRE2_UTF) != 0;
if ((pat_patctl.control & CTL_MEMORY) != 0)
show_memory_info();
if ((pat_patctl.control2 & CTL2_FRAMESIZE) != 0)
show_framesize();
if ((pat_patctl.control & (CTL_BINCODE|CTL_FULLBINCODE)) != 0)
{
fprintf(outfile, "------------------------------------------------------------------\n");
pcre2_printint(compiled_code, outfile,
(pat_patctl.control & CTL_FULLBINCODE) != 0);
}
if ((pat_patctl.control & CTL_INFO) != 0)
{
PCRE2_SPTR nametable;
uint8_t *start_bits;
BOOL heap_limit_set, match_limit_set, depth_limit_set;
uint32_t backrefmax, bsr_convention, capture_count, first_ctype, first_cunit,
hasbackslashc, hascrorlf, jchanged, last_ctype, last_cunit, match_empty,
depth_limit, heap_limit, match_limit, minlength, nameentrysize, namecount,
newline_convention;
/* These info requests may return PCRE2_ERROR_UNSET. */
switch(pattern_info(PCRE2_INFO_HEAPLIMIT, &heap_limit, TRUE))
{
case 0:
heap_limit_set = TRUE;
break;
case PCRE2_ERROR_UNSET:
heap_limit_set = FALSE;
break;
default:
return PR_ABEND;
}
switch(pattern_info(PCRE2_INFO_MATCHLIMIT, &match_limit, TRUE))
{
case 0:
match_limit_set = TRUE;
break;
case PCRE2_ERROR_UNSET:
match_limit_set = FALSE;
break;
default:
return PR_ABEND;
}
switch(pattern_info(PCRE2_INFO_DEPTHLIMIT, &depth_limit, TRUE))
{
case 0:
depth_limit_set = TRUE;
break;
case PCRE2_ERROR_UNSET:
depth_limit_set = FALSE;
break;
default:
return PR_ABEND;
}
/* These info requests should always succeed. */
if (pattern_info(PCRE2_INFO_BACKREFMAX, &backrefmax, FALSE) +
pattern_info(PCRE2_INFO_BSR, &bsr_convention, FALSE) +
pattern_info(PCRE2_INFO_CAPTURECOUNT, &capture_count, FALSE) +
pattern_info(PCRE2_INFO_FIRSTBITMAP, &start_bits, FALSE) +
pattern_info(PCRE2_INFO_FIRSTCODEUNIT, &first_cunit, FALSE) +
pattern_info(PCRE2_INFO_FIRSTCODETYPE, &first_ctype, FALSE) +
pattern_info(PCRE2_INFO_HASBACKSLASHC, &hasbackslashc, FALSE) +
pattern_info(PCRE2_INFO_HASCRORLF, &hascrorlf, FALSE) +
pattern_info(PCRE2_INFO_JCHANGED, &jchanged, FALSE) +
pattern_info(PCRE2_INFO_LASTCODEUNIT, &last_cunit, FALSE) +
pattern_info(PCRE2_INFO_LASTCODETYPE, &last_ctype, FALSE) +
pattern_info(PCRE2_INFO_MATCHEMPTY, &match_empty, FALSE) +
pattern_info(PCRE2_INFO_MINLENGTH, &minlength, FALSE) +
pattern_info(PCRE2_INFO_NAMECOUNT, &namecount, FALSE) +
pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &nameentrysize, FALSE) +
pattern_info(PCRE2_INFO_NAMETABLE, &nametable, FALSE) +
pattern_info(PCRE2_INFO_NEWLINE, &newline_convention, FALSE)
!= 0)
return PR_ABEND;
fprintf(outfile, "Capture group count = %d\n", capture_count);
if (backrefmax > 0)
fprintf(outfile, "Max back reference = %d\n", backrefmax);
if (maxlookbehind > 0)
fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
if (heap_limit_set)
fprintf(outfile, "Heap limit = %u\n", heap_limit);
if (match_limit_set)
fprintf(outfile, "Match limit = %u\n", match_limit);
if (depth_limit_set)
fprintf(outfile, "Depth limit = %u\n", depth_limit);
if (namecount > 0)
{
fprintf(outfile, "Named capture groups:\n");
for (; namecount > 0; namecount--)
{
size_t length = pcre2_strlen(nametable + IMM2_SIZE);
fprintf(outfile, " ");
/* In UTF mode the name may be a UTF string containing non-ASCII
letters and digits. We must output it as a UTF-8 string. In non-UTF mode,
use the normal string printing functions, which use escapes for all
non-ASCII characters. */
if (utf)
{
#if PCRE2_CODE_UNIT_WIDTH == 32
PCRE2_SPTR nameptr = nametable + IMM2_SIZE;
while (*nameptr != 0)
{
uint8_t u8buff[6];
int len = ord_to_utf8(*nameptr++, u8buff);
fprintf(outfile, "%.*s", len, u8buff);
}
#endif
#if PCRE2_CODE_UNIT_WIDTH == 16
PCRE2_SPTR nameptr = nametable + IMM2_SIZE;
PCRE2_SPTR nameptr_end = nameptr + pcre2_strlen(nameptr);
while (*nameptr != 0)
{
int len;
uint8_t u8buff[6];
uint32_t c;
int ord_rc = utf16_to_ord(nameptr, nameptr_end, &c);
if (ord_rc > 0) nameptr += ord_rc;
else c = *nameptr++;
len = ord_to_utf8(c, u8buff);
fprintf(outfile, "%.*s", len, u8buff);
}
#endif
#if PCRE2_CODE_UNIT_WIDTH == 8
fprintf(outfile, "%s", nametable + IMM2_SIZE);
#endif
}
else /* Not UTF mode */
{
pchars(nametable + IMM2_SIZE, length, FALSE, outfile);
}
while (length++ < nameentrysize - IMM2_SIZE) putc(' ', outfile);
fprintf(outfile, "%3d\n", GET2(nametable, 0));
nametable = nametable + nameentrysize;
}
}
if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
if (hasbackslashc) fprintf(outfile, "Contains \\C\n");
if (match_empty) fprintf(outfile, "May match empty string\n");
pattern_info(PCRE2_INFO_ARGOPTIONS, &compile_options, FALSE);
pattern_info(PCRE2_INFO_ALLOPTIONS, &overall_options, FALSE);
pattern_info(PCRE2_INFO_EXTRAOPTIONS, &extra_options, FALSE);
/* Remove UTF/UCP if they were there only because of forbid_utf. This saves
cluttering up the verification output of non-UTF test files. */
if ((pat_patctl.options & PCRE2_NEVER_UTF) == 0)
{
compile_options &= ~PCRE2_NEVER_UTF;
overall_options &= ~PCRE2_NEVER_UTF;
}
if ((pat_patctl.options & PCRE2_NEVER_UCP) == 0)
{
compile_options &= ~PCRE2_NEVER_UCP;
overall_options &= ~PCRE2_NEVER_UCP;
}
if ((compile_options|overall_options) != 0)
{
if (compile_options == overall_options)
show_compile_options(compile_options, "Options:", "\n");
else
{
show_compile_options(compile_options, "Compile options:", "\n");
show_compile_options(overall_options, "Overall options:", "\n");
}
}
if (extra_options != 0)
show_compile_extra_options(extra_options, "Extra options:", "\n");
if (compiled_code->optimization_flags != PCRE2_OPTIMIZATION_ALL)
show_optimize_flags(compiled_code->optimization_flags, "Optimizations: ", "\n");
if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
if ((pat_patctl.control2 & CTL2_BSR_SET) != 0 ||
(compiled_code->flags & PCRE2_BSR_SET) != 0)
fprintf(outfile, "\\R matches %s\n", (bsr_convention == PCRE2_BSR_UNICODE)?
"any Unicode newline" : "CR, LF, or CRLF");
if ((compiled_code->flags & PCRE2_NL_SET) != 0)
{
switch (newline_convention)
{
case PCRE2_NEWLINE_CR:
fprintf(outfile, "Forced newline is CR\n");
break;
case PCRE2_NEWLINE_LF:
fprintf(outfile, "Forced newline is LF\n");
break;
case PCRE2_NEWLINE_CRLF:
fprintf(outfile, "Forced newline is CRLF\n");
break;
case PCRE2_NEWLINE_ANYCRLF:
fprintf(outfile, "Forced newline is CR, LF, or CRLF\n");
break;
case PCRE2_NEWLINE_ANY:
fprintf(outfile, "Forced newline is any Unicode newline\n");
break;
case PCRE2_NEWLINE_NUL:
fprintf(outfile, "Forced newline is NUL\n");
break;
default:
break;
}
}
if (first_ctype == 2)
{
fprintf(outfile, "First code unit at start or follows newline\n");
}
else if (first_ctype == 1)
{
const char *caseless =
((compiled_code->flags & PCRE2_FIRSTCASELESS) == 0)?
"" : " (caseless)";
if (first_cunit != 0xff && PRINTABLE(first_cunit))
fprintf(outfile, "First code unit = \'%c\'%s\n", CHAR_OUTPUT(first_cunit),
caseless);
else
{
fprintf(outfile, "First code unit = ");
if (first_cunit == 0xff)
fprintf(outfile, "\\xff");
else
pchar(first_cunit, FALSE, outfile);
fprintf(outfile, "%s\n", caseless);
}
}
else if (start_bits != NULL)
{
int input;
int c = 24;
fprintf(outfile, "Starting code units:");
for (input = 0; input < 256; input++)
{
int i = CHAR_INPUT_HEX(input);
if ((start_bits[i/8] & (1u << (i&7))) != 0)
{
if (c > 75)
{
fprintf(outfile, "\n ");
c = 2;
}
if (PRINTABLE(i) && i != CHAR_SPACE)
{
fprintf(outfile, " %c", CHAR_OUTPUT(i));
c += 2;
}
else
{
fprintf(outfile, " \\x%02x", CHAR_OUTPUT_HEX(i));
c += 5;
}
}
}
fprintf(outfile, "\n");
}
if (last_ctype != 0)
{
const char *caseless =
((compiled_code->flags & PCRE2_LASTCASELESS) == 0)?
"" : " (caseless)";
if (PRINTABLE(last_cunit))
fprintf(outfile, "Last code unit = \'%c\'%s\n", CHAR_OUTPUT(last_cunit),
caseless);
else
{
fprintf(outfile, "Last code unit = ");
pchar(last_cunit, FALSE, outfile);
fprintf(outfile, "%s\n", caseless);
}
}
if ((compiled_code->optimization_flags & PCRE2_OPTIM_START_OPTIMIZE) != 0)
fprintf(outfile, "Subject length lower bound = %d\n", minlength);
if (pat_patctl.jit != 0 && (pat_patctl.control & CTL_JITVERIFY) != 0)
{
#ifdef SUPPORT_JIT
if (compiled_code->executable_jit != NULL)
fprintf(outfile, "JIT compilation was successful\n");
else
{
fprintf(outfile, "JIT compilation was not successful");
if (jitrc != 0 && !print_error_message(jitrc, " (", ")"))
return PR_ABEND;
fprintf(outfile, "\n");
}
#else
fprintf(outfile, "JIT support is not available in this version of PCRE2\n");
#endif
}
}
rc = pcre2_callout_enumerate(compiled_code,
((pat_patctl.control & CTL_CALLOUT_INFO) != 0)? callout_enumerate_function :
/* Exercise the callout enumeration code with a dummy callback to make sure
it works. */
callout_enumerate_function_void, NULL);
if (rc != 0)
{
fprintf(outfile, "Callout enumerate failed: error %d: ", rc);
if (rc < 0 && !print_error_message(rc, "", "\n"))
return PR_ABEND;
return PR_SKIP;
}
return PR_OK;
}
/*************************************************
* Handle serialization error *
*************************************************/
/* Print an error message after a serialization failure.
Arguments:
rc the error code
msg an initial message for what failed
Returns: FALSE if print_error_message() fails
*/
static BOOL
serial_error(int rc, const char *msg)
{
fprintf(outfile, "%s failed: error %d: ", msg, rc);
return print_error_message(rc, "", "\n");
}
/*************************************************
* Process command line *
*************************************************/
/* This function is called for lines beginning with # and a character that is
not ! or whitespace, when encountered between tests, which means that there is
no compiled pattern (compiled_code is NULL). The line is in buffer.
Arguments: none
Returns: PR_OK continue processing next line
PR_SKIP skip to a blank line
PR_ABEND abort the pcre2test run
*/
static int
process_command(void)
{
FILE *f;
PCRE2_SIZE serial_size;
size_t i;
int rc, cmd, yield;
uint16_t first_listed_newline;
const char *cmdname;
size_t cmdlen;
uint8_t *argptr, *serial;
BOOL if_inverted;
yield = PR_OK;
cmd = CMD_UNKNOWN;
cmdlen = 0;
for (i = 0; i < cmdlistcount; i++)
{
cmdname = cmdlist[i].name;
cmdlen = strlen(cmdname);
if (strncmp((char *)(buffer+1), cmdname, cmdlen) == 0 &&
(buffer[cmdlen+1] == 0 || isspace(buffer[cmdlen+1])))
{
cmd = cmdlist[i].value;
break;
}
}
if (preprocess_only && cmd != CMD_IF && cmd != CMD_ENDIF)
return PR_OK;
argptr = buffer + cmdlen + 1;
if (restrict_for_perl_test && cmd != CMD_PATTERN && cmd != CMD_SUBJECT &&
cmd != CMD_IF && cmd != CMD_ENDIF)
{
fprintf(outfile, "** #%s is not allowed after #perltest\n", cmdname);
return PR_ABEND;
}
switch(cmd)
{
case CMD_UNKNOWN:
fprintf(outfile, "** Unknown command: %s", buffer);
break;
case CMD_FORBID_UTF:
forbid_utf = PCRE2_NEVER_UTF|PCRE2_NEVER_UCP;
break;
case CMD_PERLTEST:
restrict_for_perl_test = TRUE;
break;
/* Set default pattern modifiers */
case CMD_PATTERN:
(void)decode_modifiers(argptr, CTX_DEFPAT, &def_patctl, NULL);
if (def_patctl.jit == 0 && (def_patctl.control & CTL_JITVERIFY) != 0)
def_patctl.jit = JIT_DEFAULT;
break;
/* Set default subject modifiers */
case CMD_SUBJECT:
(void)decode_modifiers(argptr, CTX_DEFDAT, NULL, &def_datctl);
break;
/* Check the default newline, and if not one of those listed, set up the
first one to be forced. An empty list unsets. */
case CMD_NEWLINE_DEFAULT:
local_newline_default = 0; /* Unset */
first_listed_newline = 0;
for (;;)
{
while (isspace(*argptr)) argptr++;
if (*argptr == 0) break;
for (uint16_t j = 1; j < sizeof(newlines)/sizeof(char *); j++)
{
size_t nlen = strlen(newlines[j]);
if (strncmpic(argptr, (const uint8_t *)newlines[j], nlen) == 0 &&
isspace(argptr[nlen]))
{
if (j == NEWLINE_DEFAULT) return PR_OK; /* Default is valid */
if (first_listed_newline == 0) first_listed_newline = j;
}
}
while (*argptr != 0 && !isspace(*argptr)) argptr++;
}
local_newline_default = first_listed_newline;
break;
/* Pop or copy a compiled pattern off the stack. Modifiers that do not affect
the compiled pattern (e.g. to give information) are permitted. The default
pattern modifiers are ignored. */
case CMD_POP:
case CMD_POPCOPY:
if (patstacknext <= 0)
{
fprintf(outfile, "** Can't pop off an empty stack\n");
return PR_SKIP;
}
memset(&pat_patctl, 0, sizeof(patctl)); /* Completely unset */
if (!decode_modifiers(argptr, CTX_POPPAT, &pat_patctl, NULL))
return PR_SKIP;
if (cmd == CMD_POP)
{
compiled_code = patstack[--patstacknext];
}
else
{
compiled_code = pcre2_code_copy(patstack[patstacknext - 1]);
}
if (pat_patctl.jit != 0)
{
jitrc = pcre2_jit_compile(compiled_code, pat_patctl.jit);
}
rc = show_pattern_info();
if (rc != PR_OK) return rc;
break;
/* Save the stack of compiled patterns to a file, then empty the stack. */
case CMD_SAVE:
if (patstacknext <= 0)
{
fprintf(outfile, "** No stacked patterns to save\n");
return PR_OK;
}
rc = open_file(argptr+1, BINARY_OUTPUT_MODE, &f, "#save");
if (rc != PR_OK) return rc;
rc = pcre2_serialize_encode((const pcre2_code **)patstack, patstacknext,
&serial, &serial_size, general_context);
if (rc < 0)
{
fclose(f);
if (!serial_error(rc, "Serialization")) return PR_ABEND;
break;
}
/* Write the length at the start of the file to make it straightforward to
get the right memory when re-loading. This saves having to read the file size
in different operating systems. To allow for different endianness (even
though reloading with the opposite endianness does not work), write the
length byte-by-byte. */
for (i = 0; i < 4; i++) fputc((serial_size >> (i*8)) & 255, f);
if (fwrite(serial, 1, serial_size, f) != serial_size)
{
fprintf(outfile, "** Wrong return from fwrite()\n");
fclose(f);
return PR_ABEND;
}
fclose(f);
pcre2_serialize_free(serial);
while(patstacknext > 0)
{
compiled_code = patstack[--patstacknext];
pcre2_code_free(compiled_code);
}
compiled_code = NULL;
break;
/* Load a set of compiled patterns from a file onto the stack */
case CMD_LOAD:
rc = open_file(argptr+1, BINARY_INPUT_MODE, &f, "#load");
if (rc != PR_OK) return rc;
serial_size = 0;
for (i = 0; i < 4; i++) serial_size |= fgetc(f) << (i*8);
serial = malloc(serial_size);
if (serial == NULL)
{
fprintf(outfile, "** Failed to get memory (size %" SIZ_FORM ") for #load\n",
serial_size);
fclose(f);
return PR_ABEND;
}
i = fread(serial, 1, serial_size, f);
fclose(f);
if (i != serial_size)
{
fprintf(outfile, "** Wrong return from fread()\n");
yield = PR_ABEND;
}
else
{
rc = pcre2_serialize_get_number_of_codes(serial);
if (rc < 0)
{
if (!serial_error(rc, "Get number of codes")) yield = PR_ABEND;
}
else
{
if (rc + patstacknext > PATSTACKSIZE)
{
fprintf(outfile, "** Not enough space on pattern stack for %d pattern%s\n",
rc, (rc == 1)? "" : "s");
rc = PATSTACKSIZE - patstacknext;
fprintf(outfile, "** Decoding %d pattern%s\n", rc,
(rc == 1)? "" : "s");
}
rc = pcre2_serialize_decode(patstack + patstacknext, rc, serial,
general_context);
if (rc < 0)
{
if (!serial_error(rc, "Deserialization")) yield = PR_ABEND;
}
else patstacknext += rc;
}
}
free(serial);
break;
/* Load a set of binary tables into tables3. */
case CMD_LOADTABLES:
rc = open_file(argptr+1, BINARY_INPUT_MODE, &f, "#loadtables");
if (rc != PR_OK) return rc;
if (tables3 == NULL)
{
int r;
r = pcre2_config(PCRE2_CONFIG_TABLES_LENGTH, &loadtables_length);
if (r >= 0) tables3 = malloc(loadtables_length);
}
if (tables3 == NULL)
{
fprintf(outfile, "** Failed: malloc/config for #loadtables\n");
yield = PR_ABEND;
}
else if (fread(tables3, 1, loadtables_length, f) != loadtables_length)
{
fprintf(outfile, "** Wrong return from fread()\n");
yield = PR_ABEND;
}
fclose(f);
break;
case CMD_IF:
if (inside_if)
{
fprintf(outfile, "** Nested #if not supported\n");
return PR_ABEND;
}
while (isspace(*argptr)) argptr++;
if_inverted = FALSE;
if (*argptr == '!')
{
argptr++;
if_inverted = TRUE;
}
while (isspace(*argptr)) argptr++;
for (i = 0; i < COPTLISTCOUNT; i++)
{
size_t optlen = strlen(coptlist[i].name);
const uint8_t *argptr_trail;
if (coptlist[i].type != CONF_FIX)
continue;
if (strncmp((const char*)argptr, coptlist[i].name, optlen) != 0)
continue;
argptr_trail = argptr + optlen;
while (isspace(*argptr_trail)) argptr_trail++;
if (*argptr_trail == 0 || *argptr_trail == '\n')
break;
}
if (i == COPTLISTCOUNT)
{
fprintf(outfile, "** Unknown condition: %s\n", buffer);
return PR_ABEND;
}
/* Condition FALSE - skip this line and everything until #endif. */
if ((coptlist[i].value != 0) == if_inverted)
yield = PR_ENDIF;
inside_if = TRUE;
break;
case CMD_ENDIF:
if (!inside_if)
{
fprintf(outfile, "** Unexpected #endif\n");
return PR_ABEND;
}
inside_if = FALSE;
break;
}
return yield;
}
/*************************************************
* Process pattern line *
*************************************************/
/* This function is called when the input buffer contains the start of a
pattern. The first character is known to be a valid delimiter. The pattern is
read, modifiers are interpreted, and a suitable local context is set up for
this test. The pattern is then compiled.
Arguments: none
Returns: PR_OK continue processing next line
PR_SKIP skip to a blank line
PR_ABEND abort the pcre2test run
*/
static int
process_pattern(void)
{
BOOL utf;
uint32_t k;
uint8_t *p = buffer;
unsigned int delimiter = *p++;
int rc, errorcode;
pcre2_compile_context *use_pat_context;
PCRE2_SPTR use_pbuffer = NULL;
uint32_t use_forbid_utf = forbid_utf;
PCRE2_SIZE patlen;
PCRE2_SIZE valgrind_access_length;
PCRE2_SIZE erroroffset;
/* The perltest.sh script supports only / as a delimiter. */
if (restrict_for_perl_test && delimiter != '/')
{
fprintf(outfile, "** The only allowed delimiter after #perltest is '/'\n");
return PR_ABEND;
}
/* Initialize the context and pattern/data controls for this test from the
defaults. */
memcpy(pat_context, default_pat_context, sizeof(pcre2_compile_context));
memcpy(&pat_patctl, &def_patctl, sizeof(patctl));
/* Find the end of the pattern, reading more lines if necessary. */
for(;;)
{
while (*p != 0)
{
if (*p == '\\' && p[1] != 0) p++;
else if (*p == delimiter) break;
p++;
}
if (*p != 0) break;
if ((p = extend_inputline(infile, p, " > ")) == NULL)
{
fprintf(outfile, "** Unexpected EOF\n");
return PR_ABEND;
}
if (!INTERACTIVE(infile)) fprintf(outfile, "%s", (char *)p);
}
/* If the first character after the delimiter is backslash, make the pattern
end with backslash. This is purely to provide a way of testing for the error
message when a pattern ends with backslash. */
if (p[1] == '\\') *p++ = '\\';
/* Terminate the pattern at the delimiter, and compute the length. */
*p++ = 0;
patlen = p - buffer - 2;
/* Look for modifiers and options after the final delimiter. */
if (!decode_modifiers(p, CTX_PAT, &pat_patctl, NULL)) return PR_SKIP;
/* Note that the match_invalid_utf option also sets utf when passed to
pcre2_compile(). */
utf = (pat_patctl.options & (PCRE2_UTF|PCRE2_MATCH_INVALID_UTF)) != 0;
/* The utf8_input modifier is not allowed in 8-bit mode, and is mutually
exclusive with the utf modifier. */
if ((pat_patctl.control & CTL_UTF8_INPUT) != 0)
{
#if PCRE2_CODE_UNIT_WIDTH == 8
fprintf(outfile, "** The utf8_input modifier is not allowed in 8-bit mode\n");
return PR_SKIP;
#else
if (utf)
{
fprintf(outfile, "** The utf and utf8_input modifiers are mutually exclusive\n");
return PR_SKIP;
}
#endif
}
/* The convert and posix modifiers are mutually exclusive. */
if (pat_patctl.convert_type != CONVERT_UNSET &&
(pat_patctl.control & CTL_POSIX) != 0)
{
fprintf(outfile, "** The convert and posix modifiers are mutually exclusive\n");
return PR_SKIP;
}
/* Check for mutually exclusive control modifiers. At present, these are all in
the first control word. */
for (k = 0; k < sizeof(exclusive_pat_controls)/sizeof(uint32_t); k++)
{
uint32_t c = pat_patctl.control & exclusive_pat_controls[k];
if (c != 0 && c != (c & (~c+1)))
{
show_controls(c, 0, "** Not allowed together:");
fprintf(outfile, "\n");
return PR_SKIP;
}
}
/* Assume full JIT compile for jitverify and/or jitfast if nothing else was
specified. */
if (pat_patctl.jit == 0 &&
(pat_patctl.control & (CTL_JITVERIFY|CTL_JITFAST)) != 0)
pat_patctl.jit = JIT_DEFAULT;
/* Now copy the pattern to pbuffer8 for use in 8-bit testing. Convert from hex
if requested (literal strings in quotes may be present within the hexadecimal
pairs). The result must necessarily be fewer characters so will always fit in
pbuffer8. */
if ((pat_patctl.control & CTL_HEXPAT) != 0)
{
uint8_t *pp, *pt;
uint32_t c, d;
pt = pbuffer8;
for (pp = buffer + 1; *pp != 0; pp++)
{
if (isspace(*pp)) continue;
c = *pp++;
/* Handle a literal substring */
if (c == '\'' || c == '"')
{
uint8_t *pq = pp;
for (;; pp++)
{
d = *pp;
if (d == 0)
{
fprintf(outfile, "** Missing closing quote in hex pattern: "
"opening quote is at offset %" PTR_FORM ".\n", pq - buffer - 2);
return PR_SKIP;
}
if (d == c) break;
*pt++ = d;
}
}
/* Expect a hex pair */
else
{
if (!isxdigit(c))
{
fprintf(outfile, "** Unexpected non-hex-digit '%c' at offset %"
PTR_FORM " in hex pattern: quote missing?\n", c, pp - buffer - 2);
return PR_SKIP;
}
if (*pp == 0)
{
fprintf(outfile, "** Odd number of digits in hex pattern\n");
return PR_SKIP;
}
d = *pp;
if (!isxdigit(d))
{
fprintf(outfile, "** Unexpected non-hex-digit '%c' at offset %"
PTR_FORM " in hex pattern: quote missing?\n", d, pp - buffer - 1);
return PR_SKIP;
}
c = toupper(c);
d = toupper(d);
c = isdigit(c)? (c - '0') : (c - 'A' + 10);
d = isdigit(d)? (d - '0') : (d - 'A' + 10);
*pt++ = CHAR_OUTPUT(CHAR_INPUT_HEX((c << 4) + d));
}
}
*pt = 0;
patlen = pt - pbuffer8;
}
/* If not a hex string, process for repetition expansion if requested. */
else if ((pat_patctl.control & CTL_EXPAND) != 0)
{
uint8_t *pp, *pt;
pt = pbuffer8;
for (pp = buffer + 1; *pp != 0; pp++)
{
uint8_t *pc = pp;
uint32_t count = 1;
size_t length = 1;
/* Check for replication syntax; if not found, the defaults just set will
prevail and one character will be copied. */
if (pp[0] == '\\' && pp[1] == '[')
{
uint8_t *pe;
for (pe = pp + 2; *pe != 0; pe++)
{
if (pe[0] == ']' && pe[1] == '{')
{
size_t clen = pe - pc - 2;
uint32_t i = 0;
unsigned long uli;
char *endptr;
pe += 2;
uli = strtoul((const char *)pe, &endptr, 10);
if (U32OVERFLOW(uli))
{
fprintf(outfile, "** Pattern repeat count too large\n");
return PR_SKIP;
}
i = (uint32_t)uli;
pe = (uint8_t *)endptr;
if (*pe == '}')
{
if (i == 0)
{
fprintf(outfile, "** Zero repeat not allowed\n");
return PR_SKIP;
}
pc += 2;
count = i;
length = clen;
pp = pe;
break;
}
}
}
}
/* Add to output. If the buffer is too small expand it. The function for
expanding buffers always keeps buffer and pbuffer8 in step as far as their
size goes. */
while (pt + count * length > pbuffer8 + pbuffer8_size)
{
size_t pc_offset = pc - buffer;
size_t pp_offset = pp - buffer;
size_t pt_offset = pt - pbuffer8;
expand_input_buffers();
pc = buffer + pc_offset;
pp = buffer + pp_offset;
pt = pbuffer8 + pt_offset;
}
for (; count > 0; count--)
{
memcpy(pt, pc, length);
pt += length;
}
}
*pt = 0;
patlen = pt - pbuffer8;
if ((pat_patctl.control & CTL_INFO) != 0)
fprintf(outfile, "Expanded: %s\n", pbuffer8);
}
/* Neither hex nor expanded, just copy the input verbatim. */
else
{
strncpy((char *)pbuffer8, (char *)(buffer+1), patlen + 1);
}
/* Sort out character tables */
if (pat_patctl.locale[0] != 0)
{
if (pat_patctl.tables_id != 0)
{
fprintf(outfile, "** 'Locale' and 'tables' must not both be set\n");
return PR_SKIP;
}
if (setlocale(LC_CTYPE, (const char *)pat_patctl.locale) == NULL)
{
fprintf(outfile, "** Failed to set locale \"%s\"\n", pat_patctl.locale);
return PR_SKIP;
}
if (strcmp((const char *)pat_patctl.locale, (const char *)locale_name) != 0)
{
snprintf((char *)locale_name, sizeof(locale_name), "%s", (char *)pat_patctl.locale);
if (locale_tables != NULL)
{
pcre2_maketables_free(general_context, locale_tables);
}
locale_tables = pcre2_maketables(general_context);
}
use_tables = locale_tables;
}
else switch (pat_patctl.tables_id)
{
case 0: use_tables = NULL; break;
case 1: use_tables = tables1; break;
case 2: use_tables = tables2; break;
case 3:
if (tables3 == NULL)
{
fprintf(outfile, "** 'Tables = 3' is invalid: binary tables have not "
"been loaded\n");
return PR_SKIP;
}
use_tables = tables3;
break;
default:
fprintf(outfile, "** 'Tables' must specify 0, 1, 2, or 3.\n");
return PR_SKIP;
}
pcre2_set_character_tables(pat_context, use_tables);
/* Set up for the stackguard test. */
if (pat_patctl.stackguard_test != 0)
{
pcre2_set_compile_recursion_guard(pat_context, stack_guard, NULL);
}
/* Handle compiling via the POSIX interface, which doesn't support the
timing, showing, or debugging options, nor the ability to pass over
local character tables. Neither does it have 16-bit or 32-bit support. */
if ((pat_patctl.control & CTL_POSIX) != 0)
{
#if PCRE2_CODE_UNIT_WIDTH != 8
fprintf(outfile, "** The POSIX interface is available only in 8-bit mode\n");
return PR_SKIP;
#else
int cflags = 0;
const char *msg = "** Ignored with POSIX interface:";
/* Check for features that the POSIX interface does not support. */
if (pat_patctl.locale[0] != 0) prmsg(&msg, "locale");
if (pat_patctl.replacement[0] != 0) prmsg(&msg, "replace");
if (pat_patctl.tables_id != 0) prmsg(&msg, "tables");
if (pat_patctl.stackguard_test != 0) prmsg(&msg, "stackguard");
if (timeit > 0) prmsg(&msg, "timing");
if (pat_patctl.jit != 0) prmsg(&msg, "JIT");
if ((pat_patctl.options & ~POSIX_SUPPORTED_COMPILE_OPTIONS) != 0)
{
show_compile_options(
pat_patctl.options & (uint32_t)(~POSIX_SUPPORTED_COMPILE_OPTIONS),
msg, "");
msg = "";
}
if ((pat_context->extra_options &
(uint32_t)(~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS)) != 0)
{
show_compile_extra_options(
pat_context->extra_options &
(uint32_t)(~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS), msg, "");
msg = "";
}
if ((pat_patctl.control & (uint32_t)(~POSIX_SUPPORTED_COMPILE_CONTROLS)) != 0 ||
(pat_patctl.control2 & (uint32_t)(~POSIX_SUPPORTED_COMPILE_CONTROLS2)) != 0)
{
show_controls(
pat_patctl.control & (uint32_t)(~POSIX_SUPPORTED_COMPILE_CONTROLS),
pat_patctl.control2 & (uint32_t)(~POSIX_SUPPORTED_COMPILE_CONTROLS2),
msg);
msg = "";
/* Remove ignored options so as not to get a repeated message for those
that are actually subject controls. */
pat_patctl.control &= (uint32_t)(POSIX_SUPPORTED_COMPILE_CONTROLS);
pat_patctl.control2 &= (uint32_t)(POSIX_SUPPORTED_COMPILE_CONTROLS2);
}
if (local_newline_default != 0) prmsg(&msg, "#newline_default");
if (pat_context->max_pattern_length != PCRE2_UNSET)
prmsg(&msg, "max_pattern_length");
if (pat_context->max_pattern_compiled_length != PCRE2_UNSET)
prmsg(&msg, "max_pattern_compiled_length");
if (pat_context->parens_nest_limit != PARENS_NEST_DEFAULT)
prmsg(&msg, "parens_nest_limit");
if (msg[0] == 0) fprintf(outfile, "\n");
/* Translate PCRE2 options to POSIX options and then compile. */
if (utf) cflags |= REG_UTF;
if ((pat_patctl.control & CTL_POSIX_NOSUB) != 0) cflags |= REG_NOSUB;
if ((pat_patctl.options & PCRE2_UCP) != 0) cflags |= REG_UCP;
if ((pat_patctl.options & PCRE2_CASELESS) != 0) cflags |= REG_ICASE;
if ((pat_patctl.options & PCRE2_LITERAL) != 0) cflags |= REG_NOSPEC;
if ((pat_patctl.options & PCRE2_MULTILINE) != 0) cflags |= REG_NEWLINE;
if ((pat_patctl.options & PCRE2_DOTALL) != 0) cflags |= REG_DOTALL;
if ((pat_patctl.options & PCRE2_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
if ((pat_patctl.control & (CTL_HEXPAT|CTL_USE_LENGTH)) != 0)
{
preg.re_endp = (char *)pbuffer8 + patlen;
cflags |= REG_PEND;
}
#if defined(EBCDIC) && !EBCDIC_IO
ascii_to_ebcdic_str(pbuffer8, patlen);
#endif
rc = regcomp(&preg, (char *)pbuffer8, cflags);
/* Compiling failed */
if (rc != 0)
{
char *regbuffer;
size_t bsize, usize, strsize;
preg.re_pcre2_code = NULL; /* In case something was left in there */
preg.re_match_data = NULL;
bsize = (pat_patctl.regerror_buffsize >= 0 &&
(unsigned)pat_patctl.regerror_buffsize <= pbuffer8_size)?
(unsigned)pat_patctl.regerror_buffsize : pbuffer8_size;
regbuffer = (char *)pbuffer8 + (pbuffer8_size - bsize);
usize = regerror(rc, &preg, regbuffer, bsize);
strsize = ((usize > bsize)? bsize : usize) - 1;
fprintf(outfile, "Failed: POSIX code %d: ", rc);
if (bsize > 0) pchars((PCRE2_SPTR8)regbuffer, strsize, utf, outfile);
fputs("\n", outfile);
if (usize > bsize)
{
fprintf(outfile, "** regerror() message truncated\n");
}
if (bsize > 0 && strlen(regbuffer) != strsize)
{
fprintf(outfile, "** regerror() strlen incorrect\n");
return PR_ABEND;
}
return PR_SKIP;
}
/* Compiling succeeded. Check that the values in the preg block are sensible.
It can happen that pcre2test is accidentally linked with a different POSIX
library which succeeds, but of course puts different things into preg. In
this situation, calling regfree() may cause a segfault (or invalid free() in
valgrind), so ensure that preg.re_pcre2_code is NULL, which suppresses the
calling of regfree() on exit. */
if (preg.re_pcre2_code == NULL ||
((pcre2_real_code_8 *)preg.re_pcre2_code)->magic_number != MAGIC_NUMBER ||
((pcre2_real_code_8 *)preg.re_pcre2_code)->top_bracket != preg.re_nsub ||
preg.re_match_data == NULL ||
preg.re_cflags != cflags)
{
fprintf(outfile,
"** The regcomp() function returned zero (success), but the values set\n"
"** in the preg block are not valid for PCRE2. Check that pcre2test is\n"
"** linked with PCRE2's pcre2posix module (-lpcre2-posix) and not with\n"
"** some other POSIX regex library.\n**\n");
preg.re_pcre2_code = NULL;
return PR_ABEND;
}
return PR_OK;
#endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
}
/* Handle compiling via the native interface. Controls that act later are
ignored with "push". Replacements are locked out. */
if ((pat_patctl.control & (CTL_PUSH|CTL_PUSHCOPY|CTL_PUSHTABLESCOPY)) != 0)
{
if (pat_patctl.replacement[0] != 0)
{
fprintf(outfile, "** Replacement text is not supported with 'push'.\n");
return PR_OK;
}
if ((pat_patctl.control & ~PUSH_SUPPORTED_COMPILE_CONTROLS) != 0 ||
(pat_patctl.control2 & ~PUSH_SUPPORTED_COMPILE_CONTROLS2) != 0)
{
show_controls(pat_patctl.control & ~PUSH_SUPPORTED_COMPILE_CONTROLS,
pat_patctl.control2 & ~PUSH_SUPPORTED_COMPILE_CONTROLS2,
"** Ignored when compiled pattern is stacked with 'push':");
fprintf(outfile, "\n");
}
if ((pat_patctl.control & PUSH_COMPILE_ONLY_CONTROLS) != 0 ||
(pat_patctl.control2 & PUSH_COMPILE_ONLY_CONTROLS2) != 0)
{
show_controls(pat_patctl.control & PUSH_COMPILE_ONLY_CONTROLS,
pat_patctl.control2 & PUSH_COMPILE_ONLY_CONTROLS2,
"** Applies only to compile when pattern is stacked with 'push':");
fprintf(outfile, "\n");
}
}
/* Convert the input in non-8-bit modes. */
errorcode = 0;
#if defined(EBCDIC) && !EBCDIC_IO
ascii_to_ebcdic_str(pbuffer8, patlen);
#endif
#if PCRE2_CODE_UNIT_WIDTH != 8
errorcode = G(to,PCRE2_CODE_UNIT_WIDTH)(pbuffer8, utf, &patlen);
switch(errorcode)
{
case -1:
fprintf(outfile, "** Failed: invalid UTF-8 string cannot be "
"converted to " STR(PCRE2_CODE_UNIT_WIDTH) "-bit string\n");
return PR_SKIP;
case -2:
fprintf(outfile, "** Failed: character value greater than 0x10ffff "
"cannot be converted to UTF\n");
return PR_SKIP;
case -3:
fprintf(outfile, "** Failed: character value greater than 0xffff "
"cannot be converted to 16-bit in non-UTF mode\n");
return PR_SKIP;
default:
break;
}
#endif
/* When valgrind is supported, detect accesses to the 8-bit buffer now that we
have finished with it. */
#if defined SUPPORT_VALGRIND && PCRE2_CODE_UNIT_WIDTH != 8
VALGRIND_MAKE_MEM_UNDEFINED(pbuffer8, pbuffer8_size);
#endif
/* The pattern is now in pbuffer[8|16|32], with the length in code units in
patlen. If it is to be converted, copy the result back afterwards so that it
ends up back in the usual place. */
if (pat_patctl.convert_type != CONVERT_UNSET)
{
int convert_return = PR_OK;
uint32_t convert_options = pat_patctl.convert_type;
PCRE2_UCHAR *converted_pattern;
PCRE2_SIZE converted_length;
// TODO No valgrind guards for out-of-bounds read in pcre2_pattern_convert(),
// nor do we appear to have a facility for testing zero-terminated patterns here.
if (pat_patctl.convert_length != 0)
{
converted_length = pat_patctl.convert_length;
converted_pattern = malloc(CU2BYTES(converted_length));
if (converted_pattern == NULL)
{
fprintf(outfile, "** Failed: malloc failed for converted pattern\n");
return PR_SKIP;
}
}
else converted_pattern = NULL; /* Let the library allocate */
if (utf) convert_options |= PCRE2_CONVERT_UTF;
if ((pat_patctl.options & PCRE2_NO_UTF_CHECK) != 0)
convert_options |= PCRE2_CONVERT_NO_UTF_CHECK;
memcpy(con_context, default_con_context, sizeof(pcre2_convert_context));
if (pat_patctl.convert_glob_escape != 0)
{
uint32_t escape = (pat_patctl.convert_glob_escape == '0')? 0 :
pat_patctl.convert_glob_escape;
rc = pcre2_set_glob_escape(con_context, CHAR_INPUT(escape));
if (rc != 0)
{
fprintf(outfile, "** Invalid glob escape '%c'\n",
pat_patctl.convert_glob_escape);
convert_return = PR_SKIP;
goto CONVERT_FINISH;
}
}
if (pat_patctl.convert_glob_separator != 0)
{
uint32_t separator = pat_patctl.convert_glob_separator;
rc = pcre2_set_glob_separator(con_context, CHAR_INPUT(separator));
if (rc != 0)
{
fprintf(outfile, "** Invalid glob separator '%c'\n",
pat_patctl.convert_glob_separator);
convert_return = PR_SKIP;
goto CONVERT_FINISH;
}
}
rc = pcre2_pattern_convert(pbuffer, patlen, convert_options,
&converted_pattern, &converted_length, con_context);
if (rc != 0)
{
fprintf(outfile, "** Pattern conversion error at offset %" SIZ_FORM ": ",
converted_length);
convert_return = print_error_message(rc, "", "\n")? PR_SKIP:PR_ABEND;
}
/* Output the converted pattern, then copy it. */
else
{
pchars(converted_pattern, converted_length, utf, outfile);
fprintf(outfile, "\n");
if (CU2BYTES(converted_length + 1) > pbuffer_size)
{
// TODO This seems... unfortunate? There must be some patterns that can
// expand when converted from glob to regex, but we aren't allowing for
// that here. Presumably we should expand the buffer rather than moan.
fprintf(outfile, "** Pattern conversion is too long for the buffer\n");
convert_return = PR_SKIP;
}
else
{
memcpy(pbuffer, converted_pattern, CU2BYTES(converted_length + 1));
patlen = converted_length;
}
}
/* Free the converted pattern. */
CONVERT_FINISH:
if (pat_patctl.convert_length != 0)
free(converted_pattern);
else
pcre2_converted_pattern_free(converted_pattern);
/* Return if conversion was unsuccessful. */
if (convert_return != PR_OK) return convert_return;
}
/* By default we pass a zero-terminated pattern, but a length is passed if
"use_length" was specified or this is a hex pattern (which might contain binary
zeros). When valgrind is supported, arrange for the unused part of the buffer
to be marked as no-access. */
valgrind_access_length = patlen;
if ((pat_patctl.control & (CTL_HEXPAT|CTL_USE_LENGTH)) == 0)
{
patlen = PCRE2_ZERO_TERMINATED;
valgrind_access_length += 1; /* For the terminating zero */
}
#ifdef SUPPORT_VALGRIND
VALGRIND_MAKE_MEM_NOACCESS(pbuffer + valgrind_access_length,
pbuffer_size - CU2BYTES(valgrind_access_length));
#else /* Valgrind not supported */
(void)valgrind_access_length; /* Avoid compiler warning */
#endif
/* If #newline_default has been used and the library was not compiled with an
appropriate default newline setting, local_newline_default will be non-zero. We
use this if there is no explicit newline modifier. */
if ((pat_patctl.control2 & CTL2_NL_SET) == 0 && local_newline_default != 0)
{
pcre2_set_newline(pat_context, local_newline_default);
}
/* The null_context modifier is used to test calling pcre2_compile() with a
NULL context. */
use_pat_context = ((pat_patctl.control & CTL_NULLCONTEXT) != 0)?
NULL : pat_context;
/* If PCRE2_LITERAL is set, set use_forbid_utf zero because PCRE2_NEVER_UTF
and PCRE2_NEVER_UCP are invalid with it. */
if ((pat_patctl.options & PCRE2_LITERAL) != 0) use_forbid_utf = 0;
/* Set use_pbuffer to the input buffer, or leave it as NULL if requested. */
if ((pat_patctl.control2 & CTL2_NULL_PATTERN) == 0)
{
use_pbuffer = pbuffer;
}
/* Compile many times when timing. */
if (timeit > 0)
{
int i;
clock_t time_taken = 0;
for (i = 0; i < timeit; i++)
{
clock_t start_time = clock();
compiled_code = pcre2_compile(use_pbuffer, patlen,
pat_patctl.options|use_forbid_utf, &errorcode, &erroroffset,
use_pat_context);
time_taken += clock() - start_time;
if (compiled_code != NULL)
pcre2_code_free(compiled_code);
}
total_compile_time += time_taken;
fprintf(outfile, "Compile time %8.4f microseconds\n",
((1000000 / CLOCKS_PER_SEC) * (double)time_taken) / timeit);
}
/* A final compile that is used "for real". */
mallocs_called = 0;
compiled_code = pcre2_compile(use_pbuffer, patlen,
pat_patctl.options|use_forbid_utf, &errorcode, &erroroffset, use_pat_context);
/* For malloc testing, we repeat the compilation. */
if (malloc_testing)
{
for (int i = 0, target_mallocs = mallocs_called; i <= target_mallocs; i++)
{
if (compiled_code != NULL)
pcre2_code_free(compiled_code);
errorcode = 0;
erroroffset = 0;
mallocs_until_failure = i;
compiled_code = pcre2_compile(use_pbuffer, patlen,
pat_patctl.options|use_forbid_utf, &errorcode, &erroroffset, use_pat_context);
mallocs_until_failure = INT_MAX;
if (i < target_mallocs &&
!(compiled_code == NULL && errorcode == PCRE2_ERROR_HEAP_FAILED))
{
fprintf(outfile, "** malloc() compile test did not fail as expected (%d)\n",
errorcode);
return PR_ABEND;
}
}
}
/* If valgrind is supported, mark the pbuffer as accessible again. We leave the
pattern in the test-mode's buffer defined because it may be read from a callout
during matching. */
#ifdef SUPPORT_VALGRIND
VALGRIND_MAKE_MEM_UNDEFINED(pbuffer + valgrind_access_length,
pbuffer_size - CU2BYTES(valgrind_access_length));
#endif
/* Call the JIT compiler if requested. When timing, or testing malloc failures,
we must free and recompile the pattern each time because that is the only way to
free the JIT compiled code. We know that compilation will always succeed. */
if (compiled_code != NULL && pat_patctl.jit != 0)
{
if (timeit > 0)
{
int i;
clock_t time_taken = 0;
for (i = 0; i < timeit; i++)
{
clock_t start_time = clock();
jitrc = pcre2_jit_compile(compiled_code, pat_patctl.jit);
time_taken += clock() - start_time;
pcre2_code_free(compiled_code);
compiled_code = pcre2_compile(use_pbuffer, patlen,
pat_patctl.options|use_forbid_utf, &errorcode, &erroroffset,
use_pat_context);
if (compiled_code == NULL)
{
fprintf(outfile, "** Unexpected - pattern compilation not successful\n");
return PR_ABEND;
}
if (jitrc != 0)
{
fprintf(outfile, "JIT compilation was not successful");
if (!print_error_message(jitrc, " (", ")\n")) return PR_ABEND;
break;
}
}
total_jit_compile_time += time_taken;
if (jitrc == 0)
fprintf(outfile, "JIT compile %8.4f microseconds\n",
((1000000 / CLOCKS_PER_SEC) * (double)time_taken) / timeit);
}
mallocs_called = 0;
jitrc = pcre2_jit_compile(compiled_code, pat_patctl.jit);
/* For malloc testing, we repeat the compilation. */
if (malloc_testing)
{
for (int i = 0, target_mallocs = mallocs_called; i <= target_mallocs; i++)
{
pcre2_code_free(compiled_code);
compiled_code = pcre2_compile(use_pbuffer, patlen,
pat_patctl.options|use_forbid_utf, &errorcode, &erroroffset,
use_pat_context);
if (compiled_code == NULL)
{
fprintf(outfile, "** Unexpected - pattern compilation not successful\n");
return PR_ABEND;
}
mallocs_until_failure = i;
jitrc = pcre2_jit_compile(compiled_code, pat_patctl.jit);
mallocs_until_failure = INT_MAX;
if (i < target_mallocs && jitrc != PCRE2_ERROR_NOMEMORY)
{
fprintf(outfile, "** malloc() JIT compile test did not fail as expected (%d)\n",
jitrc);
return PR_ABEND;
}
}
}
/* Check whether JIT compilation failed; but continue with an error message
if not. */
if (jitrc != 0 && (pat_patctl.control & CTL_JITVERIFY) != 0)
{
fprintf(outfile, "JIT compilation was not successful");
if (!print_error_message(jitrc, " (", ")\n")) return PR_ABEND;
}
}
/* Compilation failed; go back for another re, skipping to blank line
if non-interactive. */
if (compiled_code == NULL)
{
int direction = error_direction(errorcode, erroroffset);
fprintf(outfile, "Failed: error %d at offset %d: ", errorcode,
(int)erroroffset);
if (!print_error_message(errorcode, "", "\n")) return PR_ABEND;
/* It's important that the erroroffset doesn't slice halfway through a UTF-8
or UTF-16 character. We can verify this by checking that the input left of the
erroroffset is valid. Note that if the input is invalid (which is exercised in
some tests) then the offset will be positioned with the valid part to the left
of erroroffset. */
#if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
if (utf)
{
uint32_t cc;
int n = 1;
for (PCRE2_UCHAR *q = pbuffer, *q_end = q + erroroffset; q < q_end && n > 0; q += n)
n = utf_to_ord(q, q_end, &cc);
if (n <= 0)
{
fprintf(outfile, "** Erroroffset %d splits a UTF character\n", (int)erroroffset);
return PR_ABEND;
}
}
#endif
/* Print the surrounding context around the erroroffset. */
if (direction < 0)
{
fprintf(outfile, "** Error code %d not implemented in error_direction().\n", errorcode);
fprintf(outfile, " error_direction() should usually return '1' for newly-added errors,\n");
fprintf(outfile, " and the offset should be just to the right of the bad character.\n");
return PR_ABEND;
}
else if (direction != 0)
{
PCRE2_SIZE full_patlen = (patlen != PCRE2_ZERO_TERMINATED)? patlen :
pcre2_strlen(pbuffer);
fprintf(outfile, " here: ");
if (erroroffset > 0)
{
ptrunc(pbuffer, full_patlen, erroroffset, TRUE, utf, outfile);
fprintf(outfile, " ");
}
fprintf(outfile, (direction == 1)? "|<--|" : (direction == 2)? "|-->|" : "|<-->|");
if (erroroffset < full_patlen)
{
fprintf(outfile, " ");
ptrunc(pbuffer, full_patlen, erroroffset, FALSE, utf, outfile);
}
fprintf(outfile, "\n");
}
else if (erroroffset != 0)
{
fprintf(outfile, "** Unexpected non-zero erroroffset %d for error code %d\n",
(int)erroroffset, errorcode);
return PR_ABEND;
}
return PR_SKIP;
}
/* If forbid_utf is non-zero, we are running a non-UTF test. UTF and UCP are
locked out at compile time, but we must also check for occurrences of \P, \p,
and \X, which are only supported when Unicode is supported. */
if (forbid_utf != 0)
{
if ((compiled_code->flags & PCRE2_HASBKPORX) != 0)
{
fprintf(outfile, "** \\P, \\p, and \\X are not allowed after the "
"#forbid_utf command\n");
return PR_SKIP;
}
}
/* Remember the maximum lookbehind, for partial matching. */
if (pattern_info(PCRE2_INFO_MAXLOOKBEHIND, &maxlookbehind, FALSE) != 0)
return PR_ABEND;
/* Remember the number of captures. */
if (pattern_info(PCRE2_INFO_CAPTURECOUNT, &maxcapcount, FALSE) < 0)
return PR_ABEND;
/* If an explicit newline modifier was given, set the information flag in the
pattern so that it is preserved over push/pop. */
if ((pat_patctl.control2 & CTL2_NL_SET) != 0)
{
compiled_code->flags |= PCRE2_NL_SET;
}
/* Output code size and other information if requested. */
rc = show_pattern_info();
if (rc != PR_OK) return rc;
/* The "push" control requests that the compiled pattern be remembered on a
stack. This is mainly for testing the serialization functionality. */
if ((pat_patctl.control & CTL_PUSH) != 0)
{
if (patstacknext >= PATSTACKSIZE)
{
fprintf(outfile, "** Too many pushed patterns (max %d)\n", PATSTACKSIZE);
return PR_ABEND;
}
patstack[patstacknext++] = compiled_code;
compiled_code = NULL;
}
/* The "pushcopy" and "pushtablescopy" controls are similar, but push a
copy of the pattern, the latter with a copy of its character tables. This tests
the pcre2_code_copy() and pcre2_code_copy_with_tables() functions. */
if ((pat_patctl.control & (CTL_PUSHCOPY|CTL_PUSHTABLESCOPY)) != 0)
{
if (patstacknext >= PATSTACKSIZE)
{
fprintf(outfile, "** Too many pushed patterns (max %d)\n", PATSTACKSIZE);
return PR_ABEND;
}
if ((pat_patctl.control & CTL_PUSHCOPY) != 0)
{
patstack[patstacknext++] = pcre2_code_copy(compiled_code);
}
else
{
patstack[patstacknext++] = pcre2_code_copy_with_tables(compiled_code);
}
}
return PR_OK;
}
/* Helper to test for an active pattern. */
static BOOL
have_active_pattern(void)
{
return compiled_code != NULL;
}
/* Helper to free (and null-out) the active pattern. Safe to call even if there
is no active pattern. */
static void
free_active_pattern(void)
{
pcre2_code_free(compiled_code);
compiled_code = NULL;
}
/*************************************************
* Check heap, match or depth limit *
*************************************************/
/* This is used for DFA, normal, and JIT fast matching. For DFA matching it
should only be called with the third argument set to PCRE2_ERROR_DEPTHLIMIT.
Arguments:
pp the subject string
ulen length of subject or PCRE2_ZERO_TERMINATED
errnumber defines which limit to test
msg string to include in final message
Returns: the return from the final match function call
*/
static int
check_match_limit(PCRE2_SPTR pp, PCRE2_SIZE ulen, int errnumber, const char *msg)
{
int capcount;
uint32_t min = 0;
uint32_t mid = 64;
uint32_t max = UINT32_MAX;
pcre2_set_match_limit(dat_context, max);
pcre2_set_depth_limit(dat_context, max);
pcre2_set_heap_limit(dat_context, max);
for (;;)
{
uint32_t stack_start = 0;
/* If we are checking the heap limit, free any frames vector that is cached
in the match_data so we always start without one. */
if (errnumber == PCRE2_ERROR_HEAPLIMIT)
{
pcre2_set_heap_limit(dat_context, mid);
match_data->memctl.free(match_data->heapframes,
match_data->memctl.memory_data);
match_data->heapframes = NULL;
match_data->heapframes_size = 0;
}
/* No need to mess with the frames vector for match or depth limits. */
else if (errnumber == PCRE2_ERROR_MATCHLIMIT)
{
pcre2_set_match_limit(dat_context, mid);
}
else
{
pcre2_set_depth_limit(dat_context, mid);
}
/* Do the appropriate match */
if ((dat_datctl.control & CTL_DFA) != 0)
{
stack_start = DFA_START_RWS_SIZE/1024;
if (dfa_workspace == NULL)
dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
if (dfa_matched++ == 0)
dfa_workspace[0] = -1; /* To catch bad restart */
capcount = pcre2_dfa_match(compiled_code, pp, ulen, dat_datctl.offset,
dat_datctl.options, match_data,
dat_context, dfa_workspace, DFA_WS_DIMENSION);
}
else if ((pat_patctl.control & CTL_JITFAST) != 0)
capcount = pcre2_jit_match(compiled_code, pp, ulen, dat_datctl.offset,
dat_datctl.options, match_data, dat_context);
else
{
capcount = pcre2_match(compiled_code, pp, ulen, dat_datctl.offset,
dat_datctl.options, match_data, dat_context);
}
if (capcount == errnumber)
{
if ((mid & 0x80000000u) != 0)
{
fprintf(outfile, "Can't find minimum %s limit: check pattern for "
"restriction\n", msg);
break;
}
min = mid;
mid = (mid == max - 1)? max : (max != UINT32_MAX)? (min + max)/2 : mid*2;
}
else if (capcount >= 0 ||
capcount == PCRE2_ERROR_NOMATCH ||
capcount == PCRE2_ERROR_PARTIAL)
{
/* If we've not hit the error with a heap limit less than the size of the
initial stack frame vector (for pcre2_match()) or the initial stack
workspace vector (for pcre2_dfa_match()), the heap is not being used, so
the minimum limit is zero; there's no need to go on. The other limits are
always greater than zero. */
if (errnumber == PCRE2_ERROR_HEAPLIMIT && mid < stack_start)
{
fprintf(outfile, "Minimum %s limit = 0\n", msg);
break;
}
if (mid == min + 1)
{
fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
break;
}
max = mid;
mid = (min + max)/2;
}
else break; /* Some other error */
}
return capcount;
}
/*************************************************
* Substitute callout function *
*************************************************/
/* Called from pcre2_substitute() when the substitute_callout modifier is set.
Print out the data that is passed back.
Arguments:
scb pointer to substitute callout block
data_ptr callout data
Returns: nothing
*/
static int
substitute_callout_function(pcre2_substitute_callout_block *scb,
void *data_ptr)
{
int yield = 0;
BOOL utf = (compiled_code->overall_options & PCRE2_UTF) != 0;
(void)data_ptr; /* Not used */
fprintf(outfile, "%2d(%d) Old %" SIZ_FORM " %" SIZ_FORM " \"",
scb->subscount, scb->oveccount,
scb->ovector[0], scb->ovector[1]);
pchars(scb->input + scb->ovector[0], scb->ovector[1] - scb->ovector[0],
utf, outfile);
fprintf(outfile, "\" New %" SIZ_FORM " %" SIZ_FORM " \"",
scb->output_offsets[0], scb->output_offsets[1]);
pchars(scb->output + scb->output_offsets[0],
scb->output_offsets[1] - scb->output_offsets[0], utf, outfile);
if (scb->subscount == dat_datctl.substitute_stop)
{
yield = -1;
fprintf(outfile, " STOPPED");
}
else if (scb->subscount == dat_datctl.substitute_skip)
{
yield = +1;
fprintf(outfile, " SKIPPED");
}
fprintf(outfile, "\"\n");
return yield;
}
/*************************************************
* Substitute case callout function *
*************************************************/
/* Called from pcre2_substitute() when the substitute_case_callout
modifier is set. The substitute callout block is not identical for all code unit
widths, so we have to duplicate the function for each supported width.
Arguments:
input the input character
input_len the length of the input
output the output buffer
output_cap the output buffer capacity
to_case the case conversion type
data_ptr callout data (unused)
Returns: the number of code units of the output
*/
static PCRE2_SIZE
substitute_case_callout_function(
PCRE2_SPTR input, PCRE2_SIZE input_len,
PCRE2_UCHAR *output, PCRE2_SIZE output_cap,
int to_case, void *data_ptr)
{
PCRE2_UCHAR buf[16];
PCRE2_SPTR input_copy;
PCRE2_SIZE written = 0;
(void)data_ptr; /* Not used */
if (input_len > sizeof(buf)/sizeof(*buf))
{
PCRE2_UCHAR *input_buf = malloc(CU2BYTES(input_len));
if (input_buf == NULL) return ~(PCRE2_SIZE)0;
memcpy(input_buf, input, CU2BYTES(input_len));
input_copy = input_buf;
}
else
{
memcpy(buf, input, CU2BYTES(input_len));
input_copy = buf;
}
for (PCRE2_SIZE i = 0; i < input_len; )
{
int num_in = i + 1 < input_len ? 2 : 1;
uint32_t c1 = input_copy[i];
uint32_t c2 = i + 1 < input_len ? input_copy[i + 1] : 0;
int num_read;
int num_write;
if (!case_transform(to_case, num_in, &num_read, &num_write, &c1, &c2))
{
written = ~(PCRE2_SIZE)0;
goto END;
}
i += num_read;
if (to_case == PCRE2_SUBSTITUTE_CASE_TITLE_FIRST)
to_case = PCRE2_SUBSTITUTE_CASE_LOWER;
if (written + num_write > output_cap)
{
written += num_write;
}
else
{
if (num_write > 0) output[written++] = c1;
if (num_write > 1) output[written++] = c2;
}
}
END:
if (input_copy != buf) free((PCRE2_UCHAR *)input_copy);
/* Let's be maximally cruel. The case callout is allowed to leave the output
buffer in any state at all if it overflows, so let's use random garbage. */
if (written > output_cap)
memset(output, time(NULL) & 1 ? 0xcd : 0xdc,
CU2BYTES(output_cap));
return written;
}
/*************************************************
* Callout function *
*************************************************/
/* Called from a PCRE2 library as a result of the (?C) item. We print out where
we are in the match (unless suppressed). Yield zero unless more callouts than
the fail count, or the callout data is not zero. The only differences in the
callout block for different code unit widths are that the pointers to the
subject, the most recent MARK, and a callout argument string point to strings
of the appropriate width. Casts can be used to deal with this.
Arguments:
cb a pointer to a callout block
callout_data_ptr the provided callout data
Returns: 0 or 1 or an error, as determined by settings
*/
static int
callout_function(pcre2_callout_block *cb, void *callout_data_ptr)
{
FILE *f, *fdefault;
uint32_t i, pre_start, post_start, subject_length;
PCRE2_SIZE current_position;
BOOL utf = (compiled_code->overall_options & PCRE2_UTF) != 0;
BOOL callout_capture = (dat_datctl.control & CTL_CALLOUT_CAPTURE) != 0;
BOOL callout_where = (dat_datctl.control2 & CTL2_CALLOUT_NO_WHERE) == 0;
/* The FILE f is used for echoing the subject string if it is non-NULL. This
happens only once in simple cases, but we want to repeat after any additional
output caused by CALLOUT_EXTRA. */
fdefault = (!first_callout && !callout_capture && cb->callout_string == NULL)?
NULL : outfile;
if ((dat_datctl.control2 & CTL2_CALLOUT_EXTRA) != 0)
{
f = outfile;
switch (cb->callout_flags)
{
case PCRE2_CALLOUT_BACKTRACK:
fprintf(f, "Backtrack\n");
break;
case PCRE2_CALLOUT_STARTMATCH|PCRE2_CALLOUT_BACKTRACK:
fprintf(f, "Backtrack\nNo other matching paths\n");
PCRE2_FALLTHROUGH /* Fall through */
case PCRE2_CALLOUT_STARTMATCH:
fprintf(f, "New match attempt\n");
break;
default:
f = fdefault;
break;
}
}
else f = fdefault;
/* For a callout with a string argument, show the string first because there
isn't a tidy way to fit it in the rest of the data. */
if (cb->callout_string != NULL)
{
uint32_t delimiter = cb->callout_string[-1];
fprintf(outfile, "Callout (%" SIZ_FORM "): %c",
cb->callout_string_offset, CHAR_OUTPUT(delimiter));
pchars(cb->callout_string, cb->callout_string_length, utf, outfile);
for (i = 0; callout_start_delims[i] != 0; i++)
if (delimiter == callout_start_delims[i])
{
delimiter = callout_end_delims[i];
break;
}
fprintf(outfile, "%c", CHAR_OUTPUT(delimiter));
if (!callout_capture) fprintf(outfile, "\n");
}
/* Show captured strings if required */
if (callout_capture)
{
if (cb->callout_string == NULL)
fprintf(outfile, "Callout %d:", cb->callout_number);
fprintf(outfile, " last capture = %d\n", cb->capture_last);
for (i = 2; i < cb->capture_top * 2; i += 2)
{
fprintf(outfile, "%2d: ", i/2);
if (cb->offset_vector[i] == PCRE2_UNSET)
fprintf(outfile, "<unset>");
else
{
pchars(cb->subject + cb->offset_vector[i],
cb->offset_vector[i+1] - cb->offset_vector[i], utf, f);
}
fprintf(outfile, "\n");
}
}
/* Unless suppressed, re-print the subject in canonical form (with escapes for
non-printing characters), the first time, or if giving full details. On
subsequent calls in the same match, we use pchars() just to find the printed
lengths of the substrings. */
if (callout_where)
{
if (f != NULL) fprintf(f, "--->");
/* The subject before the match start. */
pre_start = pchars(cb->subject, cb->start_match, utf, f);
/* If a lookbehind is involved, the current position may be earlier than the
match start. If so, use the match start instead. */
current_position = (cb->current_position >= cb->start_match)?
cb->current_position : cb->start_match;
/* The subject between the match start and the current position. */
post_start = pchars(cb->subject + cb->start_match,
current_position - cb->start_match, utf, f);
/* Print from the current position to the end. */
pchars(cb->subject + current_position, cb->subject_length - current_position,
utf, f);
/* Calculate the total subject printed length (no print). */
subject_length = pchars(cb->subject, cb->subject_length, utf, NULL);
if (f != NULL) fprintf(f, "\n");
/* For automatic callouts, show the pattern offset. Otherwise, for a
numerical callout whose number has not already been shown with captured
strings, show the number here. A callout with a string argument has been
displayed above. */
if (cb->callout_number == 255)
{
fprintf(outfile, "%+3d ", (int)cb->pattern_position);
if (cb->pattern_position > 99) fprintf(outfile, "\n ");
}
else
{
if (callout_capture || cb->callout_string != NULL) fprintf(outfile, " ");
else fprintf(outfile, "%3d ", cb->callout_number);
}
/* Now show position indicators */
for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
fprintf(outfile, "^");
if (post_start > 0)
{
for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
fprintf(outfile, "^");
}
for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
fprintf(outfile, " ");
if (cb->next_item_length != 0)
{
pchars(pbuffer + cb->pattern_position, cb->next_item_length, utf, outfile);
}
else
fprintf(outfile, "End of pattern");
fprintf(outfile, "\n");
}
first_callout = FALSE;
/* Show any mark info */
if (cb->mark != last_callout_mark)
{
if (cb->mark == NULL)
fprintf(outfile, "Latest Mark: <unset>\n");
else
{
fprintf(outfile, "Latest Mark: ");
pchars(cb->mark - 1, -1, utf, outfile);
putc('\n', outfile);
}
last_callout_mark = cb->mark;
}
/* Show callout data */
if (callout_data_ptr != NULL)
{
int callout_data = *((int32_t *)callout_data_ptr);
if (callout_data != 0)
{
fprintf(outfile, "Callout data = %d\n", callout_data);
return callout_data;
}
}
/* Keep count and give the appropriate return code */
callout_count++;
if (cb->callout_number == dat_datctl.cerror[0] &&
callout_count >= dat_datctl.cerror[1])
return PCRE2_ERROR_CALLOUT;
if (cb->callout_number == dat_datctl.cfail[0] &&
callout_count >= dat_datctl.cfail[1])
return 1;
return 0;
}
/*************************************************
* Handle *MARK and copy/get tests *
*************************************************/
/* This function is called after complete and partial matches. It runs the
tests for substring extraction.
Arguments:
utf TRUE for utf
capcount return from pcre2_match()
Returns: FALSE if print_error_message() fails
*/
static BOOL
copy_and_get(BOOL utf, int capcount)
{
int i;
uint8_t *nptr;
/* Test copy strings by number */
for (i = 0; i < MAXCPYGET && dat_datctl.copy_numbers[i] >= 0; i++)
{
int rc, rc2;
PCRE2_SIZE length, length2;
PCRE2_UCHAR copybuffer[256];
uint32_t n = (uint32_t)(dat_datctl.copy_numbers[i]);
length = sizeof(copybuffer)/sizeof(*copybuffer);
rc = pcre2_substring_copy_bynumber(match_data, n, copybuffer, &length);
if (rc < 0)
{
fprintf(outfile, "Copy substring %d failed (%d): ", n, rc);
if (!print_error_message(rc, "", "\n")) return FALSE;
}
else
{
fprintf(outfile, "%2dC ", n);
pchars(copybuffer, length, utf, outfile);
fprintf(outfile, " (%" SIZ_FORM ")\n", length);
}
rc2 = pcre2_substring_length_bynumber(match_data, n, &length2);
if (rc2 < 0)
{
fprintf(outfile, "Get substring %d length failed (%d): ", n, rc2);
if (!print_error_message(rc2, "", "\n")) return FALSE;
}
else if (rc >= 0 && length2 != length)
{
fprintf(outfile, "Mismatched substring lengths: %"
SIZ_FORM " %" SIZ_FORM "\n", length, length2);
}
}
/* Test copy strings by name */
nptr = dat_datctl.copy_names;
for (;;)
{
int rc, rc2;
int groupnumber;
PCRE2_SIZE length, length2;
PCRE2_UCHAR copybuffer[256];
size_t namelen = strlen((const char *)nptr);
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
PCRE2_SIZE cnl = namelen;
#endif
if (namelen == 0) break;
#if PCRE2_CODE_UNIT_WIDTH == 8
strcpy((char *)pbuffer8, (char *)nptr);
#endif
#if defined(EBCDIC) && !EBCDIC_IO
ascii_to_ebcdic_str(pbuffer8, namelen);
#endif
#if PCRE2_CODE_UNIT_WIDTH == 16
(void)to16(nptr, utf, &cnl);
#endif
#if PCRE2_CODE_UNIT_WIDTH == 32
(void)to32(nptr, utf, &cnl);
#endif
groupnumber = pcre2_substring_number_from_name(compiled_code, pbuffer);
if (groupnumber < 0 && groupnumber != PCRE2_ERROR_NOUNIQUESUBSTRING)
fprintf(outfile, "Number not found for group \"%s\"\n", nptr);
length = sizeof(copybuffer)/sizeof(*copybuffer);
rc = pcre2_substring_copy_byname(match_data, pbuffer, copybuffer, &length);
if (rc < 0)
{
fprintf(outfile, "Copy substring \"%s\" failed (%d): ", nptr, rc);
if (!print_error_message(rc, "", "\n")) return FALSE;
}
else
{
fprintf(outfile, " C ");
pchars(copybuffer, length, utf, outfile);
fprintf(outfile, " (%" SIZ_FORM ") %s", length, nptr);
if (groupnumber >= 0) fprintf(outfile, " (group %d)\n", groupnumber);
else fprintf(outfile, " (non-unique)\n");
}
rc2 = pcre2_substring_length_byname(match_data, pbuffer, &length2);
if (rc2 < 0)
{
fprintf(outfile, "Get substring \"%s\" length failed (%d): ", nptr, rc2);
if (!print_error_message(rc2, "", "\n")) return FALSE;
}
else if (rc >= 0 && length2 != length)
{
fprintf(outfile, "Mismatched substring lengths: %"
SIZ_FORM " %" SIZ_FORM "\n", length, length2);
}
nptr += namelen + 1;
}
/* Test get strings by number */
for (i = 0; i < MAXCPYGET && dat_datctl.get_numbers[i] >= 0; i++)
{
int rc;
PCRE2_SIZE length;
PCRE2_UCHAR *gotbuffer;
uint32_t n = (uint32_t)(dat_datctl.get_numbers[i]);
rc = pcre2_substring_get_bynumber(match_data, n, &gotbuffer, &length);
if (rc < 0)
{
fprintf(outfile, "Get substring %d failed (%d): ", n, rc);
if (!print_error_message(rc, "", "\n")) return FALSE;
}
else
{
fprintf(outfile, "%2dG ", n);
pchars(gotbuffer, length, utf, outfile);
fprintf(outfile, " (%" SIZ_FORM ")\n", length);
pcre2_substring_free(gotbuffer);
}
}
/* Test get strings by name */
nptr = dat_datctl.get_names;
for (;;)
{
PCRE2_SIZE length;
PCRE2_UCHAR *gotbuffer;
int rc;
int groupnumber;
size_t namelen = strlen((const char *)nptr);
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
PCRE2_SIZE cnl = namelen;
#endif
if (namelen == 0) break;
#if PCRE2_CODE_UNIT_WIDTH == 8
strcpy((char *)pbuffer8, (char *)nptr);
#endif
#if defined(EBCDIC) && !EBCDIC_IO
ascii_to_ebcdic_str(pbuffer8, namelen);
#endif
#if PCRE2_CODE_UNIT_WIDTH == 16
(void)to16(nptr, utf, &cnl);
#endif
#if PCRE2_CODE_UNIT_WIDTH == 32
(void)to32(nptr, utf, &cnl);
#endif
groupnumber = pcre2_substring_number_from_name(compiled_code, pbuffer);
if (groupnumber < 0 && groupnumber != PCRE2_ERROR_NOUNIQUESUBSTRING)
fprintf(outfile, "Number not found for group \"%s\"\n", nptr);
rc = pcre2_substring_get_byname(match_data, pbuffer, &gotbuffer, &length);
if (rc < 0)
{
fprintf(outfile, "Get substring \"%s\" failed (%d): ", nptr, rc);
if (!print_error_message(rc, "", "\n")) return FALSE;
}
else
{
fprintf(outfile, " G ");
pchars(gotbuffer, length, utf, outfile);
fprintf(outfile, " (%" SIZ_FORM ") %s", length, nptr);
if (groupnumber >= 0) fprintf(outfile, " (group %d)\n", groupnumber);
else fprintf(outfile, " (non-unique)\n");
pcre2_substring_free(gotbuffer);
}
nptr += namelen + 1;
}
/* Test getting the complete list of captured strings. */
if ((dat_datctl.control & CTL_GETALL) != 0)
{
int rc;
PCRE2_UCHAR **stringlist;
PCRE2_SIZE *lengths;
rc = pcre2_substring_list_get(match_data, &stringlist, &lengths);
if (rc < 0)
{
fprintf(outfile, "get substring list failed (%d): ", rc);
if (!print_error_message(rc, "", "\n")) return FALSE;
}
else
{
for (i = 0; i < capcount; i++)
{
fprintf(outfile, "%2dL ", i);
pchars(stringlist[i], lengths[i], utf, outfile);
putc('\n', outfile);
}
if (stringlist[i] != NULL)
fprintf(outfile, "string list not terminated by NULL\n");
pcre2_substring_list_free(stringlist);
}
}
return TRUE;
}
/*************************************************
* Process a data line *
*************************************************/
/* The line is in buffer; it will not be empty.
Arguments: none
Returns: PR_OK continue processing next line
PR_SKIP skip to a blank line
PR_ABEND abort the pcre2test run
*/
static int
process_data(void)
{
PCRE2_SIZE ulen, arg_ulen;
uint32_t gmatched;
uint32_t c, k;
uint32_t g_notempty = 0;
uint8_t *p; /* Position within buffer (raw input line) */
size_t len;
size_t needlen; /* Bytes, for sizing dbuffer */
pcre2_match_context *use_dat_context;
BOOL utf;
BOOL subject_literal;
PCRE2_SIZE *ovector;
PCRE2_SPTR ovecsave[2] = { NULL, NULL };
uint32_t oveccount;
PCRE2_UCHAR *q = NULL; /* Typed pointer within dbuffer */
PCRE2_UCHAR *start_rep; /* Position within dbuffer; stashed value of q */
PCRE2_UCHAR *pp; /* Subject pointer within dbuffer */
subject_literal = (pat_patctl.control2 & CTL2_SUBJECT_LITERAL) != 0;
/* Copy the default context and data control blocks to the active ones. Then
copy from the pattern the controls that can be set in either the pattern or the
data. This allows them to be overridden in the data line. We do not do this for
options because those that are common apply separately to compiling and
matching. */
memcpy(dat_context, default_dat_context, sizeof(pcre2_match_context));
memcpy(&dat_datctl, &def_datctl, sizeof(datctl));
dat_datctl.control |= (pat_patctl.control & CTL_ALLPD);
dat_datctl.control2 |= (pat_patctl.control2 & CTL2_ALLPD);
strcpy((char *)dat_datctl.replacement, (char *)pat_patctl.replacement);
if (dat_datctl.jitstack == 0) dat_datctl.jitstack = pat_patctl.jitstack;
if (dat_datctl.substitute_skip == 0)
dat_datctl.substitute_skip = pat_patctl.substitute_skip;
if (dat_datctl.substitute_stop == 0)
dat_datctl.substitute_stop = pat_patctl.substitute_stop;
/* Initialize for scanning the data line. */
#if PCRE2_CODE_UNIT_WIDTH == 8
utf = ((((pat_patctl.control & CTL_POSIX) != 0)?
((pcre2_real_code *)preg.re_pcre2_code)->overall_options :
compiled_code->overall_options) & PCRE2_UTF) != 0;
#else
utf = (compiled_code->overall_options & PCRE2_UTF) != 0;
#endif
start_rep = NULL;
len = strlen((const char *)buffer);
while (len > 0 && isspace(buffer[len-1])) len--;
buffer[len] = 0;
p = buffer;
while (isspace(*p))
{
p++;
len--;
}
/* Check that the data is well-formed UTF-8 if we're in UTF mode. To create
invalid input to pcre2_match(), you must use \x?? or \x{} sequences. */
if (utf)
{
uint8_t *ptmp;
uint32_t cc;
int n = 1;
uint8_t *ptmp_end = p + len;
for (ptmp = p; n > 0 && *ptmp; ptmp += n)
n = utf8_to_ord(ptmp, ptmp_end, &cc);
if (n <= 0)
{
fprintf(outfile, "** Failed: invalid UTF-8 string cannot be used as input "
"in UTF mode\n");
return PR_OK;
}
}
#ifdef SUPPORT_VALGRIND
/* Mark the dbuffer as addressable but undefined again. */
if (dbuffer != NULL)
{
VALGRIND_MAKE_MEM_UNDEFINED(dbuffer, dbuffer_size);
}
#endif
/* Allocate a buffer to hold the data line; len+1 is an upper bound on
the number of code units that will be needed (though the buffer may have to be
extended if replication is involved). */
needlen = CU2BYTES(len+1);
if (dbuffer == NULL || needlen >= dbuffer_size)
{
while (needlen >= dbuffer_size)
{
if (dbuffer_size < SIZE_MAX/2) dbuffer_size *= 2;
else dbuffer_size = needlen + 1;
}
dbuffer = (uint8_t *)realloc(dbuffer, dbuffer_size);
if (dbuffer == NULL)
{
fprintf(stderr, "pcre2test: realloc(%" SIZ_FORM ") failed\n", dbuffer_size);
exit(1);
}
}
q = (PCRE2_UCHAR *)dbuffer;
/* Scan the data line, interpreting data escapes, and put the result into a
buffer of the appropriate width. In UTF mode, input is always UTF-8; otherwise,
in 16- and 32-bit modes, it can be forced to UTF-8 by the utf8_input modifier.
*/
while ((c = *p++) != 0)
{
int i = 0;
size_t replen; /* Bytes, for sizing dbuffer */
enum force_encoding encoding = FORCE_NONE;
/* ] may mark the end of a replicated sequence */
if (c == ']' && start_rep != NULL)
{
long li;
char *endptr;
if (*p++ != '{')
{
fprintf(outfile, "** Expected '{' after \\[....]\n");
return PR_OK;
}
li = strtol((const char *)p, &endptr, 10);
if (S32OVERFLOW(li))
{
fprintf(outfile, "** Repeat count too large\n");
return PR_OK;
}
i = (int)li;
p = (uint8_t *)endptr;
if (*p++ != '}')
{
fprintf(outfile, "** Expected '}' after \\[...]{...\n");
return PR_OK;
}
if (i-- <= 0)
{
fprintf(outfile, "** Zero or negative repeat not allowed\n");
return PR_OK;
}
replen = (uint8_t *)q - (uint8_t *)start_rep;
if (i > 0 && replen > (SIZE_MAX - needlen) / i)
{
fprintf(outfile, "** Expanded content too large\n");
return PR_OK;
}
needlen += replen * i;
if (needlen >= dbuffer_size)
{
size_t qoffset = (uint8_t *)q - dbuffer;
size_t rep_offset = (uint8_t *)start_rep - dbuffer;
while (needlen >= dbuffer_size)
{
if (dbuffer_size < SIZE_MAX/2) dbuffer_size *= 2;
else dbuffer_size = needlen + 1;
}
dbuffer = (uint8_t *)realloc(dbuffer, dbuffer_size);
if (dbuffer == NULL)
{
fprintf(stderr, "pcre2test: realloc(%" SIZ_FORM ") failed\n",
dbuffer_size);
exit(1);
}
q = (PCRE2_UCHAR *)(dbuffer + qoffset);
start_rep = (PCRE2_UCHAR *)(dbuffer + rep_offset);
}
while (i-- > 0)
{
memcpy(q, start_rep, replen);
q += BYTES2CU(replen);
}
start_rep = NULL;
continue;
}
/* Handle a non-escaped character. In non-UTF 32-bit mode with utf8_input
set, do the fudge for setting the top bit. */
if (c != '\\' || subject_literal)
{
uint32_t topbit = 0;
#if PCRE2_CODE_UNIT_WIDTH == 32
if (c == 0xff && *p != 0)
{
topbit = 0x80000000;
c = *p++;
}
#endif
if ((utf || (pat_patctl.control & CTL_UTF8_INPUT) != 0) &&
HASUTF8EXTRALEN(c))
{
GETUTF8INC(c, p);
}
c |= topbit;
}
/* Handle backslash escapes */
else switch ((c = *p++))
{
case '\\': break;
case 'a': c = '\a'; break;
case 'b': c = '\b'; break;
#if defined(EBCDIC) && !EBCDIC_IO
/* \e is the odd one out since it's not defined in the C standard,
precisely because of EBCDIC (apparently EBCDIC 'ESC' character isn't
an exact match to Latin-1 'ESC', hence '\e' isn't necessarily
supported by EBCDIC compilers). */
case 'e': c = '\x1b'; break;
#else
case 'e': c = CHAR_ESC; break;
#endif
case 'f': c = '\f'; break;
case 'n': c = '\n'; break;
case 'r': c = '\r'; break;
case 't': c = '\t'; break;
case 'v': c = '\v'; break;
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
c -= '0';
while (i++ < 2 && *p >= '0' && *p < '8')
c = c * 8 + (*p++ - '0');
c = CHAR_OUTPUT(CHAR_INPUT_HEX(c));
encoding = (utf && c > 255)? FORCE_UTF : FORCE_RAW;
break;
case 'o':
if (*p == '{')
{
uint8_t *pt = p;
c = 0;
for (pt++; isdigit(*pt) && *pt < '8'; ++i, pt++)
{
if (c >= 0x20000000u)
{
fprintf(outfile, "** \\o{ escape too large\n");
return PR_OK;
}
else c = c * 8 + (*pt - '0');
}
c = CHAR_OUTPUT(CHAR_INPUT_HEX(c));
if (i == 0 || *pt != '}')
{
fprintf(outfile, "** Malformed \\o{ escape\n");
return PR_OK;
}
else p = pt + 1;
}
break;
case 'x':
c = 0;
if (*p == '{')
{
uint8_t *pt = p;
/* We used to have "while (isxdigit(*(++pt)))" here, but it fails
when isxdigit() is a macro that refers to its argument more than
once. This is banned by the C Standard, but apparently happens in at
least one macOS environment. */
for (pt++; isxdigit(*pt); pt++)
{
if (++i == 9)
{
fprintf(outfile, "** Too many hex digits in \\x{...} item; "
"using only the first eight.\n");
while (isxdigit(*pt)) pt++;
break;
}
else c = c * 16 + (tolower(*pt) - (isdigit(*pt)? '0' : 'a' - 10));
}
c = CHAR_OUTPUT(CHAR_INPUT_HEX(c));
if (i == 0 || *pt != '}')
{
fprintf(outfile, "** Malformed \\x{ escape\n");
return PR_OK;
}
else p = pt + 1;
}
else
{
/* \x without {} always defines just one byte in 8-bit mode. This
allows UTF-8 characters to be constructed byte by byte, and also allows
invalid UTF-8 sequences to be made. Just copy the byte in UTF-8 mode.
Otherwise, pass it down as data. */
while (i++ < 2 && isxdigit(*p))
{
c = c * 16 + (tolower(*p) - (isdigit(*p)? '0' : 'a' - 10));
p++;
}
c = CHAR_OUTPUT(CHAR_INPUT_HEX(c));
#if PCRE2_CODE_UNIT_WIDTH == 8
if (utf) encoding = FORCE_RAW;
#endif
}
break;
case 'N':
#ifndef EBCDIC
if (memcmp(p, "{U+", 3) == 0 && isxdigit(p[3]))
{
char *endptr;
unsigned long uli;
p += 3;
errno = 0;
uli = strtoul((const char *)p, &endptr, 16);
if (errno == 0 && *endptr == '}' && uli <= UINT32_MAX)
{
c = (uint32_t)uli;
p = (uint8_t *)endptr + 1;
encoding = FORCE_UTF;
break;
}
}
#endif
fprintf(outfile, "** Malformed \\N{U+ escape\n");
return PR_OK;
case 0: /* \ followed by EOF allows for an empty line */
p--;
continue;
case '=': /* \= terminates the data, starts modifiers */
goto ENDSTRING;
case '[': /* \[ introduces a replicated character sequence */
if (start_rep != NULL)
{
fprintf(outfile, "** Nested replication is not supported\n");
return PR_OK;
}
start_rep = q;
continue;
default:
if (isalnum(c))
{
fprintf(outfile, "** Unrecognized escape sequence \"\\%c\"\n", c);
return PR_OK;
}
}
/* We now have a character value in c that may be greater than 255.
Depending of how we got it, the encoding enum could be set to tell
us how to encode it, otherwise follow the utf modifier. */
#if PCRE2_CODE_UNIT_WIDTH == 8
if (encoding == FORCE_RAW || !(utf || encoding == FORCE_UTF))
{
if (c > 0xffu)
{
fprintf(outfile, "** Character \\x{%x} is greater than 255 "
"and UTF-8 mode is not enabled.\n", c);
fprintf(outfile, "** Truncation will probably give the wrong "
"result.\n");
}
*q++ = (uint8_t)c;
}
else
{
if (c > 0x7fffffff)
{
fprintf(outfile, "** Character \\N{U+%x} is greater than 0x7fffffff "
"and therefore cannot be encoded as UTF-8\n", c);
return PR_OK;
}
else if (encoding == FORCE_UTF && c > MAX_UTF_CODE_POINT)
fprintf(outfile, "** Warning: character \\N{U+%x} is greater than "
"0x%x and should not be encoded as UTF-8\n",
c, MAX_UTF_CODE_POINT);
q += ord_to_utf8(c, q);
}
#endif
#if PCRE2_CODE_UNIT_WIDTH == 16
/* Unlike the 8-bit code, there are no forced raw suggestions for the
16-bit mode, so assume raw unless utf is preferred */
if (!(encoding == FORCE_UTF || utf))
{
if (c > 0xffffu)
{
fprintf(outfile, "** Character \\x{%x} is greater than 0xffff "
"and UTF-16 mode is not enabled.\n", c);
fprintf(outfile, "** Truncation will probably give the wrong "
"result.\n");
}
*q++ = (uint16_t)c;
}
else
{
if (c > MAX_UTF_CODE_POINT)
{
fprintf(outfile, "** Failed: character \\N{U+%x} is greater than "
"0x%x and therefore cannot be encoded as UTF-16\n",
c, MAX_UTF_CODE_POINT);
return PR_OK;
}
else if (c >= 0x10000u)
{
c -= 0x10000u;
*q++ = 0xd800 | (c >> 10);
*q++ = 0xdc00 | (c & 0x3ff);
}
else
{
if (encoding == FORCE_UTF && 0xe000u > c && c >= 0xd800u)
fprintf(outfile, "** Warning: character \\N{U+%x} is a surrogate "
"and should not be encoded as UTF-16\n", c);
*q++ = c;
}
}
#endif
#if PCRE2_CODE_UNIT_WIDTH == 32
if (encoding == FORCE_UTF && c > MAX_UTF_CODE_POINT)
fprintf(outfile, "** Warning: character \\N{U+%x} is greater than "
"0x%x and should not be encoded as UTF-32\n",
c, MAX_UTF_CODE_POINT);
*q++ = c;
#endif
}
ENDSTRING:
*q = 0;
len = (uint8_t *)q - dbuffer; /* Length in bytes */
ulen = BYTES2CU(len); /* Length in code units */
arg_ulen = ulen; /* Value to use in match arg */
/* If the string was terminated by \= we must now interpret modifiers. */
if (p[-1] != 0 && !decode_modifiers(p, CTX_DAT, NULL, &dat_datctl))
return PR_OK;
/* Setting substitute_{skip,fail} implies a substitute callout. */
if (dat_datctl.substitute_skip != 0 || dat_datctl.substitute_stop != 0)
dat_datctl.control2 |= CTL2_SUBSTITUTE_CALLOUT;
/* Check for mutually exclusive modifiers. At present, these are all in the
first control word. */
for (k = 0; k < sizeof(exclusive_dat_controls)/sizeof(uint32_t); k++)
{
c = dat_datctl.control & exclusive_dat_controls[k];
if (c != 0 && c != (c & (~c+1)))
{
show_controls(c, 0, "** Not allowed together:");
fprintf(outfile, "\n");
return PR_OK;
}
}
if (dat_datctl.replacement[0] != 0)
{
if ((dat_datctl.control2 & CTL2_SUBSTITUTE_CALLOUT) != 0 &&
(dat_datctl.control & CTL_NULLCONTEXT) != 0)
{
fprintf(outfile, "** Replacement callouts are not supported with null_context.\n");
return PR_OK;
}
if ((dat_datctl.control2 & CTL2_SUBSTITUTE_CASE_CALLOUT) != 0 &&
(dat_datctl.control & CTL_NULLCONTEXT) != 0)
{
fprintf(outfile, "** Replacement case callouts are not supported with null_context.\n");
return PR_OK;
}
if ((dat_datctl.control & CTL_ALLCAPTURES) != 0)
fprintf(outfile, "** Ignored with replacement text: allcaptures\n");
}
/* Warn for modifiers that are ignored for DFA. */
if ((dat_datctl.control & CTL_DFA) != 0)
{
if ((dat_datctl.control & CTL_ALLCAPTURES) != 0)
fprintf(outfile, "** Ignored for DFA matching: allcaptures\n");
if ((dat_datctl.control2 & CTL2_HEAPFRAMES_SIZE) != 0)
fprintf(outfile, "** Ignored for DFA matching: heapframes_size\n");
}
/* We now have the subject in dbuffer, with len containing the byte length, and
ulen containing the code unit length, with a copy in arg_ulen for use in match
function arguments (this gets changed to PCRE2_ZERO_TERMINATED when the
zero_terminate modifier is present).
Move the data to the end of the buffer so that a read over the end can be
caught by valgrind or other means. If we have explicit valgrind support, mark
the unused start of the buffer unaddressable. If we are using the POSIX
interface, or testing zero-termination, we must include the terminating zero in
the usable data. */
c = ((pat_patctl.control & CTL_POSIX) != 0 ||
(dat_datctl.control & CTL_ZERO_TERMINATE) != 0)? CU2BYTES(1) : 0;
pp = memmove(dbuffer + dbuffer_size - (len + c), dbuffer, len + c);
#ifdef SUPPORT_VALGRIND
VALGRIND_MAKE_MEM_NOACCESS(dbuffer, dbuffer_size - (len + c));
#endif
#if defined(EBCDIC) && !EBCDIC_IO
ascii_to_ebcdic_str(pp, len);
#endif
/* Now pp points to the subject string, but if null_subject was specified, set
it to NULL to test PCRE2's behaviour. */
if ((dat_datctl.control2 & CTL2_NULL_SUBJECT) != 0) pp = NULL;
/* POSIX matching is only possible in 8-bit mode, and it does not support
timing or other fancy features. Some were checked at compile time, but we need
to check the match-time settings here. */
#if PCRE2_CODE_UNIT_WIDTH == 8
if ((pat_patctl.control & CTL_POSIX) != 0)
{
int rc;
int eflags = 0;
regmatch_t *pmatch = NULL;
regmatch_t startend_buf;
const char *msg = "** Ignored with POSIX interface:";
if (dat_datctl.cerror[0] != CFORE_UNSET || dat_datctl.cerror[1] != CFORE_UNSET)
prmsg(&msg, "callout_error");
if (dat_datctl.cfail[0] != CFORE_UNSET || dat_datctl.cfail[1] != CFORE_UNSET)
prmsg(&msg, "callout_fail");
if (dat_datctl.copy_numbers[0] >= 0 || dat_datctl.copy_names[0] != 0)
prmsg(&msg, "copy");
if (dat_datctl.get_numbers[0] >= 0 || dat_datctl.get_names[0] != 0)
prmsg(&msg, "get");
if (dat_datctl.jitstack != 0) prmsg(&msg, "jitstack");
if (dat_datctl.offset != 0) prmsg(&msg, "offset");
if ((dat_datctl.options & ~POSIX_SUPPORTED_MATCH_OPTIONS) != 0)
{
fprintf(outfile, "%s", msg);
show_match_options(dat_datctl.options & ~POSIX_SUPPORTED_MATCH_OPTIONS);
msg = "";
}
if ((dat_datctl.control & ~POSIX_SUPPORTED_MATCH_CONTROLS) != 0 ||
(dat_datctl.control2 & ~POSIX_SUPPORTED_MATCH_CONTROLS2) != 0)
{
show_controls(dat_datctl.control & ~POSIX_SUPPORTED_MATCH_CONTROLS,
dat_datctl.control2 & ~POSIX_SUPPORTED_MATCH_CONTROLS2, msg);
msg = "";
}
if (msg[0] == 0) fprintf(outfile, "\n");
if (dat_datctl.oveccount > 0)
{
pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * dat_datctl.oveccount);
if (pmatch == NULL)
{
fprintf(outfile, "** Failed to get memory for recording matching "
"information (size set = %du)\n", dat_datctl.oveccount);
return PR_ABEND;
}
}
if (dat_datctl.startend[0] != CFORE_UNSET)
{
if (pmatch == NULL) pmatch = &startend_buf;
pmatch[0].rm_so = (regoff_t)dat_datctl.startend[0];
pmatch[0].rm_eo = (dat_datctl.startend[1] != 0)?
(regoff_t)dat_datctl.startend[1] : (regoff_t)len;
eflags |= REG_STARTEND;
}
if ((dat_datctl.options & PCRE2_NOTBOL) != 0) eflags |= REG_NOTBOL;
if ((dat_datctl.options & PCRE2_NOTEOL) != 0) eflags |= REG_NOTEOL;
if ((dat_datctl.options & PCRE2_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
rc = regexec(&preg, (const char *)pp, dat_datctl.oveccount, pmatch, eflags);
if (rc != 0)
{
size_t usize = regerror(rc, &preg, (char *)pbuffer8, pbuffer8_size);
fprintf(outfile, "No match: POSIX code %d: ", rc);
pchars((PCRE2_SPTR8)pbuffer8, usize - 1, utf, outfile);
fputs("\n", outfile);
}
else if ((pat_patctl.control & CTL_POSIX_NOSUB) != 0)
fprintf(outfile, "Matched with REG_NOSUB\n");
else if (dat_datctl.oveccount == 0)
fprintf(outfile, "Matched without capture\n");
else
{
size_t i, j;
size_t last_printed = (size_t)dat_datctl.oveccount;
for (i = 0; i < (size_t)dat_datctl.oveccount; i++)
{
if (pmatch[i].rm_so >= 0)
{
PCRE2_SIZE start = pmatch[i].rm_so;
PCRE2_SIZE end = pmatch[i].rm_eo;
for (j = last_printed + 1; j < i; j++)
fprintf(outfile, "%2d: <unset>\n", (int)j);
last_printed = i;
if (start > end)
{
start = pmatch[i].rm_eo;
end = pmatch[i].rm_so;
fprintf(outfile, "Start of matched string is beyond its end - "
"displaying from end to start.\n");
}
fprintf(outfile, "%2d: ", (int)i);
pchars(pp + start, end - start, utf, outfile);
fprintf(outfile, "\n");
if ((i == 0 && (dat_datctl.control & CTL_AFTERTEXT) != 0) ||
(dat_datctl.control & CTL_ALLAFTERTEXT) != 0)
{
fprintf(outfile, "%2d+ ", (int)i);
/* Note: don't use the start/end variables here because we want to
show the text from what is reported as the end. */
pchars(pp + pmatch[i].rm_eo, len - pmatch[i].rm_eo, utf, outfile);
fprintf(outfile, "\n");
}
}
}
}
if (pmatch != &startend_buf) free(pmatch);
return PR_OK;
}
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
/* Handle matching via the native interface. Check for consistency of
modifiers. */
if (dat_datctl.startend[0] != CFORE_UNSET)
fprintf(outfile, "** \\=posix_startend ignored for non-POSIX matching\n");
/* ALLUSEDTEXT is not supported with JIT, but JIT is not used with DFA
matching, even if the JIT compiler was used. */
if ((dat_datctl.control & (CTL_ALLUSEDTEXT|CTL_DFA)) == CTL_ALLUSEDTEXT &&
compiled_code->executable_jit != NULL)
{
fprintf(outfile, "** Showing all consulted text is not supported by JIT: ignored\n");
dat_datctl.control &= ~CTL_ALLUSEDTEXT;
}
/* Handle passing the subject as zero-terminated. */
if ((dat_datctl.control & CTL_ZERO_TERMINATE) != 0)
arg_ulen = PCRE2_ZERO_TERMINATED;
/* The nullcontext modifier is used to test calling pcre2_[jit_]match() with a
NULL context. */
use_dat_context = ((dat_datctl.control & CTL_NULLCONTEXT) != 0)?
NULL : dat_context;
/* Enable display of malloc/free if wanted. We can do this only if either the
pattern or the subject is processed with a context. */
show_memory = (dat_datctl.control & CTL_MEMORY) != 0;
if (show_memory &&
(pat_patctl.control & dat_datctl.control & CTL_NULLCONTEXT) != 0)
fprintf(outfile, "** \\=memory requires either a pattern or a subject "
"context: ignored\n");
/* Create and assign a JIT stack if requested. */
if (dat_datctl.jitstack != 0)
{
if (dat_datctl.jitstack != jit_stack_size)
{
pcre2_jit_stack_free(jit_stack);
jit_stack = pcre2_jit_stack_create(1, dat_datctl.jitstack * 1024, NULL);
jit_stack_size = dat_datctl.jitstack;
}
pcre2_jit_stack_assign(dat_context, jit_callback, jit_stack);
}
/* Or de-assign */
else if (jit_stack != NULL)
{
pcre2_jit_stack_assign(dat_context, NULL, NULL);
pcre2_jit_stack_free(jit_stack);
jit_stack = NULL;
jit_stack_size = 0;
}
/* When no JIT stack is assigned, we must ensure that there is a JIT callback
if we want to verify that JIT was actually used. */
if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_stack == NULL)
{
pcre2_jit_stack_assign(dat_context, jit_callback, NULL);
}
/* Adjust match_data according to size of offsets required. A size of zero
causes a new match data block to be obtained that exactly fits the pattern. */
if (dat_datctl.oveccount == 0)
{
pcre2_match_data_free(match_data);
match_data = pcre2_match_data_create_from_pattern(compiled_code,
general_context);
max_oveccount = pcre2_get_ovector_count(match_data);
}
else if (dat_datctl.oveccount <= max_oveccount)
{
match_data->oveccount = dat_datctl.oveccount;
}
else
{
max_oveccount = dat_datctl.oveccount;
pcre2_match_data_free(match_data);
match_data = pcre2_match_data_create(max_oveccount, general_context);
}
if (match_data == NULL)
{
fprintf(outfile, "** Failed to get memory for recording matching "
"information (size requested: %d)\n", dat_datctl.oveccount);
max_oveccount = 0;
return PR_ABEND;
}
ovector = match_data->ovector;
oveccount = pcre2_get_ovector_count(match_data);
/* Helper to clear any cached heap frames from the match_data. */
#define CLEAR_HEAP_FRAMES() \
do { \
void *heapframes = (void *)(match_data->heapframes); \
void *memory_data = match_data->memctl.memory_data; \
match_data->memctl.free(heapframes, memory_data); \
match_data->heapframes = NULL; \
match_data->heapframes_size = 0; \
} \
while (0)
/* Replacement processing is ignored for DFA matching. */
if (dat_datctl.replacement[0] != 0 && (dat_datctl.control & CTL_DFA) != 0)
{
fprintf(outfile, "** Ignored for DFA matching: replace\n");
dat_datctl.replacement[0] = 0;
}
/* If a replacement string is provided, call pcre2_substitute() instead of or
after one of the matching functions. First we have to convert the replacement
string to the appropriate width. */
if (dat_datctl.replacement[0] != 0)
{
int rc;
uint8_t *pr;
// TODO Move these buffers to the heap and use Valgrind macros to ensure no overread
PCRE2_UCHAR rbuffer[REPLACE_BUFFSIZE];
PCRE2_UCHAR nbuffer[REPLACE_BUFFSIZE];
PCRE2_UCHAR *rbptr;
PCRE2_UCHAR *r;
uint32_t xoptions;
uint32_t emoption; /* External match option */
PCRE2_SIZE j, rlen, nsize, nsize_input, erroroffset;
BOOL badutf = FALSE;
/* Fill the ovector with junk to detect elements that do not get set
when they should be (relevant only when "allvector" is specified). */
for (j = 0; j < 2*oveccount; j++) ovector[j] = JUNK_OFFSET;
if (timeitm)
fprintf(outfile, "** Timing is not supported with replace: ignored\n");
if ((dat_datctl.control & CTL_ALTGLOBAL) != 0)
fprintf(outfile, "** Altglobal is not supported with replace: ignored\n");
/* Check for a test that does substitution after an initial external match.
If this is set, we run the external match, but leave the interpretation of
its output to pcre2_substitute(). */
emoption = ((dat_datctl.control2 & CTL2_SUBSTITUTE_MATCHED) == 0)? 0 :
PCRE2_SUBSTITUTE_MATCHED;
if (emoption != 0)
{
if ((pat_patctl.control & CTL_JITFAST) != 0)
{
rc = pcre2_jit_match(compiled_code, pp, arg_ulen, dat_datctl.offset,
dat_datctl.options, match_data, use_dat_context);
}
else
{
rc = pcre2_match(compiled_code, pp, arg_ulen, dat_datctl.offset,
dat_datctl.options, match_data, use_dat_context);
}
}
xoptions = emoption |
(((dat_datctl.control & CTL_GLOBAL) == 0)? 0 :
PCRE2_SUBSTITUTE_GLOBAL) |
(((dat_datctl.control2 & CTL2_SUBSTITUTE_EXTENDED) == 0)? 0 :
PCRE2_SUBSTITUTE_EXTENDED) |
(((dat_datctl.control2 & CTL2_SUBSTITUTE_LITERAL) == 0)? 0 :
PCRE2_SUBSTITUTE_LITERAL) |
(((dat_datctl.control2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) == 0)? 0 :
PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) |
(((dat_datctl.control2 & CTL2_SUBSTITUTE_REPLACEMENT_ONLY) == 0)? 0 :
PCRE2_SUBSTITUTE_REPLACEMENT_ONLY) |
(((dat_datctl.control2 & CTL2_SUBSTITUTE_UNKNOWN_UNSET) == 0)? 0 :
PCRE2_SUBSTITUTE_UNKNOWN_UNSET) |
(((dat_datctl.control2 & CTL2_SUBSTITUTE_UNSET_EMPTY) == 0)? 0 :
PCRE2_SUBSTITUTE_UNSET_EMPTY);
r = rbuffer;
pr = dat_datctl.replacement;
/* If the replacement starts with '[<number>]' we interpret that as length
value for the replacement buffer. */
nsize = sizeof(nbuffer)/sizeof(*nbuffer);
if (*pr == '[')
{
PCRE2_SIZE n = 0;
while ((c = *(++pr)) >= '0' && c <= '9') n = n * 10 + (c - '0');
if (*pr++ != ']')
{
fprintf(outfile, "** Bad buffer size in replacement string\n");
return PR_OK;
}
if (n > nsize)
{
fprintf(outfile, "** Replacement buffer setting (%" SIZ_FORM ") is too "
"large (max %" SIZ_FORM ")\n", n, nsize);
return PR_OK;
}
nsize = n;
}
/* Now copy the replacement string to a buffer of the appropriate width. No
escape processing is done for replacements. In UTF mode, check for an invalid
UTF-8 input string, and if it is invalid, just copy its code units without
UTF interpretation. This provides a means of checking that an invalid string
is detected. Otherwise, UTF-8 can be used to include wide characters in a
replacement. */
if (utf) badutf = valid_utf(pr, strlen((const char *)pr), &erroroffset);
/* Not UTF or invalid UTF-8: just copy the code units. */
if (!utf || badutf)
{
while ((c = *pr++) != 0)
{
#if defined(EBCDIC) && !EBCDIC_IO
c = ascii_to_ebcdic(c);
#endif
*r++ = c;
}
}
/* Valid UTF-8 replacement string */
else while ((c = *pr++) != 0)
{
if (HASUTF8EXTRALEN(c)) { GETUTF8INC(c, pr); }
#if PCRE2_CODE_UNIT_WIDTH == 8
r += ord_to_utf8(c, r);
#elif PCRE2_CODE_UNIT_WIDTH == 16
if (c >= 0x10000u)
{
c-= 0x10000u;
*r++ = 0xd800 | (c >> 10);
*r++ = 0xdc00 | (c & 0x3ff);
}
else *r++ = c;
#elif PCRE2_CODE_UNIT_WIDTH == 32
*r++ = c;
#endif
}
*r = 0;
if ((dat_datctl.control & CTL_ZERO_TERMINATE) != 0)
rlen = PCRE2_ZERO_TERMINATED;
else
rlen = r - rbuffer;
if ((dat_datctl.control2 & CTL2_SUBSTITUTE_CALLOUT) != 0)
{
pcre2_set_substitute_callout(dat_context, substitute_callout_function, NULL);
}
else
{
pcre2_set_substitute_callout(dat_context, NULL, NULL); /* No callout */
}
if ((dat_datctl.control2 & CTL2_SUBSTITUTE_CASE_CALLOUT) != 0)
{
pcre2_set_substitute_case_callout(dat_context, substitute_case_callout_function, NULL);
}
else
{
pcre2_set_substitute_case_callout(dat_context, NULL, NULL); /* No callout */
}
/* There is a special option to set the replacement to NULL in order to test
that case. */
rbptr = ((dat_datctl.control2 & CTL2_NULL_REPLACEMENT) == 0)? rbuffer : NULL;
if (malloc_testing) CLEAR_HEAP_FRAMES();
mallocs_called = 0;
nsize_input = nsize;
rc = pcre2_substitute(compiled_code, pp, arg_ulen, dat_datctl.offset,
dat_datctl.options|xoptions, match_data, use_dat_context,
rbptr, rlen, nbuffer, &nsize);
/* For malloc testing, we repeat the substitution. */
if (malloc_testing && (dat_datctl.control2 & CTL2_SUBSTITUTE_CALLOUT) == 0)
{
for (int i = 0, target_mallocs = mallocs_called; i <= target_mallocs; i++)
{
CLEAR_HEAP_FRAMES();
mallocs_until_failure = i;
nsize = nsize_input;
rc = pcre2_substitute(compiled_code, pp, arg_ulen, dat_datctl.offset,
dat_datctl.options|xoptions, match_data, use_dat_context,
rbptr, rlen, nbuffer, &nsize);
mallocs_until_failure = INT_MAX;
if (i < target_mallocs && rc != PCRE2_ERROR_NOMEMORY)
{
fprintf(outfile, "** malloc() Substitution test did not fail as expected (%d)\n",
rc);
return PR_ABEND;
}
}
}
if (rc < 0)
{
fprintf(outfile, "Failed: error %d", rc);
if (rc != PCRE2_ERROR_NOMEMORY && nsize != PCRE2_UNSET)
fprintf(outfile, " at offset %ld in replacement", (long int)nsize);
fprintf(outfile, ": ");
if (!print_error_message(rc, "", "")) return PR_ABEND;
if (rc == PCRE2_ERROR_NOMEMORY &&
(xoptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) != 0)
fprintf(outfile, ": %ld code units are needed", (long int)nsize);
if (rc != PCRE2_ERROR_NOMEMORY && nsize != PCRE2_UNSET)
{
// TODO This is delicate here, and in the other places where we do this.
// Instead of re-measuring the length when the input is zero-terminated,
// we should save the original length somewhere.
PCRE2_SIZE full_rlen = (rlen != PCRE2_ZERO_TERMINATED)? rlen :
pcre2_strlen(rbptr);
fprintf(outfile, "\n here: ");
if (nsize > 0)
{
ptrunc(rbptr, full_rlen, nsize, TRUE, utf, outfile);
fprintf(outfile, " ");
}
fprintf(outfile, "|<--|");
if (nsize < full_rlen)
{
fprintf(outfile, " ");
ptrunc(rbptr, full_rlen, nsize, FALSE, utf, outfile);
}
}
}
else
{
fprintf(outfile, "%2d: ", rc);
pchars(nbuffer, nsize, utf, outfile);
}
fprintf(outfile, "\n");
show_memory = FALSE;
/* Show final ovector contents and resulting heapframe size if requested. */
if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0)
show_ovector(ovector, oveccount);
if ((dat_datctl.control2 & CTL2_HEAPFRAMES_SIZE) != 0 &&
(dat_datctl.control & CTL_DFA) == 0)
show_heapframes_size();
return PR_OK;
} /* End of substitution handling */
/* When a replacement string is not provided, run a loop for global matching
with one of the basic matching functions. */
for (gmatched = 0;; gmatched++)
{
PCRE2_SIZE j;
int capcount;
/* Fill the ovector with junk to detect elements that do not get set
when they should be. */
for (j = 0; j < 2*oveccount; j++) ovector[j] = JUNK_OFFSET;
/* When matching is via pcre2_match(), we will detect the use of JIT via the
stack callback function. */
jit_was_used = (pat_patctl.control & CTL_JITFAST) != 0;
/* Do timing if required. */
if (timeitm > 0)
{
int i;
clock_t start_time, time_taken;
if ((dat_datctl.control & CTL_DFA) != 0)
{
if ((dat_datctl.options & PCRE2_DFA_RESTART) != 0)
{
fprintf(outfile, "** Timing DFA restarts is not supported\n");
return PR_ABEND;
}
if (dfa_workspace == NULL)
dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
start_time = clock();
for (i = 0; i < timeitm; i++)
{
capcount = pcre2_dfa_match(compiled_code, pp, arg_ulen,
dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
use_dat_context, dfa_workspace, DFA_WS_DIMENSION);
}
}
else if ((pat_patctl.control & CTL_JITFAST) != 0)
{
start_time = clock();
for (i = 0; i < timeitm; i++)
{
capcount = pcre2_jit_match(compiled_code, pp, arg_ulen,
dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
use_dat_context);
}
}
else
{
start_time = clock();
for (i = 0; i < timeitm; i++)
{
capcount = pcre2_match(compiled_code, pp, arg_ulen,
dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
use_dat_context);
}
}
total_match_time += (time_taken = clock() - start_time);
fprintf(outfile, "Match time %7.4f microseconds\n",
((1000000 / CLOCKS_PER_SEC) * (double)time_taken) / timeitm);
}
/* Find the heap, match and depth limits if requested. The depth and heap
limits are not relevant for JIT. The return from check_match_limit() is the
return from the final call to pcre2_match() or pcre2_dfa_match(). */
if ((dat_datctl.control & (CTL_FINDLIMITS|CTL_FINDLIMITS_NOHEAP)) != 0)
{
if ((dat_datctl.control & CTL_FINDLIMITS_NOHEAP) == 0 &&
(compiled_code->executable_jit == NULL ||
(dat_datctl.options & PCRE2_NO_JIT) != 0))
{
(void)check_match_limit(pp, arg_ulen, PCRE2_ERROR_HEAPLIMIT, "heap");
}
capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_MATCHLIMIT,
"match");
if (compiled_code->executable_jit == NULL ||
(dat_datctl.options & PCRE2_NO_JIT) != 0 ||
(dat_datctl.control & CTL_DFA) != 0)
{
capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_DEPTHLIMIT,
"depth");
}
if (capcount == 0)
{
fprintf(outfile, "Matched, but offsets vector is too small to show all matches\n");
capcount = dat_datctl.oveccount;
}
}
/* Otherwise just run a single match, setting up a callout if required (the
default). The pattern remains in pbuffer8/16/32 after compilation, for use
by callouts. */
else
{
if ((dat_datctl.control & CTL_CALLOUT_NONE) == 0)
{
pcre2_set_callout(dat_context, callout_function,
(void *)(&dat_datctl.callout_data));
first_callout = TRUE;
last_callout_mark = NULL;
callout_count = 0;
}
else
{
pcre2_set_callout(dat_context, NULL, NULL); /* No callout */
}
/* Run a single DFA or NFA match. */
if (malloc_testing) CLEAR_HEAP_FRAMES();
mallocs_called = 0;
if ((dat_datctl.control & CTL_DFA) != 0)
{
if (dfa_workspace == NULL)
dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
if (dfa_matched++ == 0)
dfa_workspace[0] = -1; /* To catch bad restart */
capcount = pcre2_dfa_match(compiled_code, pp, arg_ulen,
dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
use_dat_context, dfa_workspace, DFA_WS_DIMENSION);
if (capcount == 0)
{
fprintf(outfile, "Matched, but offsets vector is too small to show all matches\n");
capcount = dat_datctl.oveccount;
}
}
else
{
if ((pat_patctl.control & CTL_JITFAST) != 0)
capcount = pcre2_jit_match(compiled_code, pp, arg_ulen, dat_datctl.offset,
dat_datctl.options | g_notempty, match_data, use_dat_context);
else
capcount = pcre2_match(compiled_code, pp, arg_ulen, dat_datctl.offset,
dat_datctl.options | g_notempty, match_data, use_dat_context);
if (capcount == 0)
{
fprintf(outfile, "Matched, but too many substrings\n");
capcount = dat_datctl.oveccount;
}
}
/* For malloc testing, we repeat the matching. */
if (malloc_testing && (dat_datctl.control & CTL_CALLOUT_NONE) != 0)
{
for (int i = 0, target_mallocs = mallocs_called; i <= target_mallocs; i++)
{
CLEAR_HEAP_FRAMES();
mallocs_until_failure = i;
if ((dat_datctl.control & CTL_DFA) != 0)
{
if (dfa_matched++ == 0)
dfa_workspace[0] = -1; /* To catch bad restart */
capcount = pcre2_dfa_match(compiled_code, pp, arg_ulen,
dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
use_dat_context, dfa_workspace, DFA_WS_DIMENSION);
}
else
{
if ((pat_patctl.control & CTL_JITFAST) != 0)
capcount = pcre2_jit_match(compiled_code, pp, arg_ulen, dat_datctl.offset,
dat_datctl.options | g_notempty, match_data, use_dat_context);
else
capcount = pcre2_match(compiled_code, pp, arg_ulen, dat_datctl.offset,
dat_datctl.options | g_notempty, match_data, use_dat_context);
}
mallocs_until_failure = INT_MAX;
if (capcount == 0)
capcount = dat_datctl.oveccount;
if (i < target_mallocs && capcount != PCRE2_ERROR_NOMEMORY)
{
fprintf(outfile, "** malloc() match test did not fail as expected (%d)\n",
capcount);
return PR_ABEND;
}
}
}
}
/* Verify that it's safe to call pcre2_next_match with rc < 0. */
if (capcount < 0 && (dat_datctl.control & CTL_ANYGLOB) != 0)
{
BOOL rc_nextmatch;
PCRE2_SIZE tmp_offset = 0xcd;
uint32_t tmp_options = 0xcd;
rc_nextmatch = pcre2_next_match(match_data, &tmp_offset, &tmp_options);
if (rc_nextmatch || tmp_offset != 0xcd || tmp_options != 0xcd)
{
fprintf(outfile, "** unexpected pcre2_next_match() for rc < 0\n");
return PR_ABEND;
}
}
/* The result of the match is now in capcount. First handle a successful
match. If pp was forced to be NULL (to test NULL handling) it will have been
treated as an empty string if the length was zero. So, re-create that for
outputting, preserving the invariant that pp is a valid pointer to a region
of length len followed by a null. */
if (capcount >= 0)
{
if (pp == NULL)
{
#ifdef SUPPORT_VALGRIND
/* Mark the start of dbuffer addressable again. */
VALGRIND_MAKE_MEM_UNDEFINED(dbuffer, CU2BYTES(1));
#endif
pp = (PCRE2_UCHAR *)dbuffer;
*pp = 0;
}
if ((unsigned)capcount > oveccount) /* Check for lunatic return value */
{
fprintf(outfile,
"** PCRE2 error: returned count %d is too big for ovector count %d\n",
capcount, oveccount);
return PR_ABEND;
}
/* If PCRE2_COPY_MATCHED_SUBJECT was set, check that things are as they
should be, but not for fast JIT, where it isn't supported. */
if ((dat_datctl.options & PCRE2_COPY_MATCHED_SUBJECT) != 0 &&
(pat_patctl.control & CTL_JITFAST) == 0)
{
if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) == 0)
fprintf(outfile,
"** PCRE2 error: flag not set after copy_matched_subject\n");
if (match_data->subject == pp)
fprintf(outfile,
"** PCRE2 error: copy_matched_subject has not copied\n");
if (memcmp(match_data->subject, pp, ulen) != 0)
fprintf(outfile,
"** PCRE2 error: copy_matched_subject mismatch\n");
}
/* If this is not the first time round a global loop, check that the
returned string has advanced.
There is one known case where this doesn't happen: when you have a
"badly-behaved" pattern which uses \K in a lookaround, and breaks the core
sanity rule that start_offset <= ovector[0] <= ovector[1]. An example would
be /(?<=\Ka)/g matching "aaa".
* first attempt, start_offset=0: ovector[0]=0, ovector[1]=1
* second attempt, start_offset=1: ovector[0]=0, ovector[1]=1
You can see that even though we *always* ensure that start_offset advances,
this doesn't guarantee to avoid duplicate matches.
The pcre2test behaviour is to return all the matches found, except in the
case where two adjacent matches are an exact duplicate. */
if (gmatched > 0 &&
!(dat_datctl.offset <= ovector[0] && ovector[0] <= ovector[1]) &&
pp + ovector[0] == ovecsave[0] && pp + ovector[1] == ovecsave[1])
{
fprintf(outfile, "global repeat returned the same match as previous\n");
goto NEXT_MATCH;
}
/* Outside of this exceptional case, we check that either we have a
"badly-behaved" match (note that not all badly-behaved matches are caught
above, only *duplicate* ones); or else in the well-behaved case the match
must make progress.
"Progress" is measured as ovector[1] strictly advancing, or, an empty match
after a non-empty match. */
if (gmatched > 0 &&
(dat_datctl.offset <= ovector[0] && ovector[0] <= ovector[1]) &&
!(pp + ovector[1] > ovecsave[1] ||
(ovector[1] == ovector[0] && ovecsave[1] != ovecsave[0] &&
pp + ovector[1] == ovecsave[1])))
{
fprintf(outfile,
"** PCRE2 error: global repeat did not make progress\n");
return PR_ABEND;
}
ovecsave[0] = pp + ovector[0];
ovecsave[1] = pp + ovector[1];
/* "allcaptures" requests showing of all captures in the pattern, to check
unset ones at the end. It may be set on the pattern or the data. Implement
by setting capcount to the maximum. This is not relevant for DFA matching,
so ignore it (warning given above). */
if ((dat_datctl.control & (CTL_ALLCAPTURES|CTL_DFA)) == CTL_ALLCAPTURES)
{
capcount = maxcapcount + 1; /* Allow for full match */
if ((unsigned)capcount > oveccount) capcount = oveccount;
}
/* "allvector" request showing the entire ovector. */
if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0) capcount = oveccount;
/* Output the captured substrings. Note that, for the matched string,
the use of \K in an assertion can make the start later than the end. */
for (int i = 0; i < 2*capcount; i += 2)
{
PCRE2_SIZE lleft, lmiddle, lright;
PCRE2_SIZE start = ovector[i];
PCRE2_SIZE end = ovector[i+1];
if (start > end)
{
start = ovector[i+1];
end = ovector[i];
fprintf(outfile, "Start of matched string is beyond its end - "
"displaying from end to start.\n");
}
fprintf(outfile, "%2d: ", i/2);
/* Check for an unset group */
if (start == PCRE2_UNSET && end == PCRE2_UNSET)
{
fprintf(outfile, "<unset>\n");
continue;
}
/* Check for silly offsets, in particular, values that have not been
set when they should have been. However, if we are past the end of the
captures for this pattern ("allvector" causes this), or if we are DFA
matching, it isn't an error if the entry is unchanged. */
if (start > ulen || end > ulen)
{
if (((dat_datctl.control & CTL_DFA) != 0 ||
i >= (int)(2*maxcapcount + 2)) &&
start == JUNK_OFFSET && end == JUNK_OFFSET)
fprintf(outfile, "<unchanged>\n");
else
fprintf(outfile, "ERROR: bad value(s) for offset(s): 0x%lx 0x%lx\n",
(unsigned long int)start, (unsigned long int)end);
continue;
}
/* When JIT is not being used, ALLUSEDTEXT may be set. (It if is set with
JIT, it is disabled above, with a comment.) When the match is done by the
interpreter, leftchar and rightchar are available, and if ALLUSEDTEXT is
set, and if the leftmost consulted character is before the start of the
match or the rightmost consulted character is past the end of the match,
we want to show all consulted characters for the main matched string, and
indicate which were lookarounds. */
if (i == 0)
{
BOOL showallused;
PCRE2_SIZE leftchar, rightchar;
if ((dat_datctl.control & CTL_ALLUSEDTEXT) != 0)
{
leftchar = match_data->leftchar;
rightchar = match_data->rightchar;
showallused = i == 0 && (leftchar < start || rightchar > end);
}
else showallused = FALSE;
if (showallused)
{
lleft = pchars(pp + leftchar, start - leftchar, utf, outfile);
lmiddle = pchars(pp + start, end - start, utf, outfile);
lright = pchars(pp + end, rightchar - end, utf, outfile);
if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
fprintf(outfile, " (JIT)");
fprintf(outfile, "\n ");
for (j = 0; j < lleft; j++) fprintf(outfile, "<");
for (j = 0; j < lmiddle; j++) fprintf(outfile, " ");
for (j = 0; j < lright; j++) fprintf(outfile, ">");
}
/* When a pattern contains \K, the start of match position may be
different to the start of the matched string. When this is the case,
show it when requested. */
else if ((dat_datctl.control & CTL_STARTCHAR) != 0)
{
PCRE2_SIZE startchar;
startchar = pcre2_get_startchar(match_data);
lleft = pchars(pp + startchar, start - startchar, utf, outfile);
pchars(pp+start, end - start, utf, outfile);
if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
fprintf(outfile, " (JIT)");
if (startchar != start)
{
fprintf(outfile, "\n ");
for (j = 0; j < lleft; j++) fprintf(outfile, "^");
}
}
/* Otherwise, just show the matched string. */
else
{
pchars(pp + start, end - start, utf, outfile);
if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
fprintf(outfile, " (JIT)");
}
}
/* Not the main matched string. Just show it unadorned. */
else
{
pchars(pp + start, end - start, utf, outfile);
}
fprintf(outfile, "\n");
/* Note: don't use the start/end variables here because we want to
show the text from what is reported as the end. */
if ((dat_datctl.control & CTL_ALLAFTERTEXT) != 0 ||
(i == 0 && (dat_datctl.control & CTL_AFTERTEXT) != 0))
{
fprintf(outfile, "%2d+ ", i/2);
pchars(pp + ovector[i+1], ulen - ovector[i+1], utf, outfile);
fprintf(outfile, "\n");
}
}
/* Output (*MARK) data if requested */
if ((dat_datctl.control & CTL_MARK) != 0 &&
match_data->mark != NULL)
{
fprintf(outfile, "MK: ");
pchars(match_data->mark - 1, -1, utf, outfile);
fprintf(outfile, "\n");
}
/* Process copy/get strings */
if (!copy_and_get(utf, capcount)) return PR_ABEND;
} /* End of handling a successful match */
/* There was a partial match. The value of ovector[0] is the bumpalong point,
that is, startchar, not any \K point that might have been passed. When JIT is
not in use, "allusedtext" may be set, in which case we indicate the leftmost
consulted character. */
else if (capcount == PCRE2_ERROR_PARTIAL)
{
PCRE2_SIZE leftchar;
int backlength;
int rubriclength = 0;
if ((dat_datctl.control & CTL_ALLUSEDTEXT) != 0)
{
leftchar = match_data->leftchar;
}
else leftchar = ovector[0];
fprintf(outfile, "Partial match");
if ((dat_datctl.control & CTL_MARK) != 0 &&
match_data->mark != NULL)
{
fprintf(outfile, ", mark=");
rubriclength = pchars(match_data->mark - 1, -1, utf, outfile);
rubriclength += 7;
}
fprintf(outfile, ": ");
rubriclength += 15;
backlength = pchars(pp + leftchar, ovector[0] - leftchar, utf, outfile);
pchars(pp + ovector[0], ovector[1] - ovector[0], utf, outfile);
if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
fprintf(outfile, " (JIT)");
fprintf(outfile, "\n");
if (backlength != 0)
{
for (int i = 0; i < rubriclength; i++) fprintf(outfile, " ");
for (int i = 0; i < backlength; i++) fprintf(outfile, "<");
fprintf(outfile, "\n");
}
if (ulen != ovector[1])
fprintf(outfile, "** ovector[1] is not equal to the subject length: "
"%ld != %ld\n", (unsigned long int)ovector[1], (unsigned long int)ulen);
/* Process copy/get strings */
if (!copy_and_get(utf, 1)) return PR_ABEND;
/* "allvector" outputs the entire vector */
if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0)
show_ovector(ovector, oveccount);
break; /* Out of the /g loop */
} /* End of handling partial match */
/* A "normal" match failure. There will be a negative error number in
capcount. */
else
{
switch(capcount)
{
case PCRE2_ERROR_NOMATCH:
if (gmatched == 0)
{
fprintf(outfile, "No match");
if ((dat_datctl.control & CTL_MARK) != 0 &&
match_data->mark != NULL)
{
fprintf(outfile, ", mark = ");
pchars(match_data->mark - 1, -1, utf, outfile);
}
if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
fprintf(outfile, " (JIT)");
fprintf(outfile, "\n");
/* "allvector" outputs the entire vector */
if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0)
show_ovector(ovector, oveccount);
}
break;
case PCRE2_ERROR_BADUTFOFFSET:
fprintf(outfile, "Error %d (bad UTF-" STR(PCRE2_CODE_UNIT_WIDTH)
" offset)\n", capcount);
break;
default:
fprintf(outfile, "Failed: error %d: ", capcount);
if (!print_error_message(capcount, "", "")) return PR_ABEND;
if (capcount <= PCRE2_ERROR_UTF8_ERR1 &&
capcount >= PCRE2_ERROR_UTF32_ERR2)
{
PCRE2_SIZE startchar;
startchar = pcre2_get_startchar(match_data);
fprintf(outfile, " at offset %" SIZ_FORM, startchar);
}
fprintf(outfile, "\n");
break;
}
break; /* Out of the /g loop */
} /* End of failed match handling */
/* Control reaches here after a match. If we are not doing a global search,
we are done. Otherwise, we adjust the parameters for the next match and
continue the matching loop. */
NEXT_MATCH:
if ((dat_datctl.control & CTL_ANYGLOB) == 0)
break;
else
{
PCRE2_SIZE new_start_offset = (PCRE2_SIZE)-1;
BOOL rc_nextmatch;
/* Use pcre2_next_match() to safely advance. This guarantees that the start
offset will advance, except after an empty match, in which case it sets
the PCRE2_NOTEMPTY_ATSTART flag to ensure the next match does not return a
duplicate. */
rc_nextmatch = pcre2_next_match(match_data, &new_start_offset, &g_notempty);
if (!rc_nextmatch) break; /* Out of the /g loop */
/* For a normal global (/g) iteration, update the start offset, leaving
other parameters alone. */
if ((dat_datctl.control & CTL_GLOBAL) != 0)
{
dat_datctl.offset = new_start_offset;
}
/* For altglobal, just update the pointer and length. */
else
{
pp += new_start_offset;
len -= CU2BYTES(new_start_offset);
ulen -= new_start_offset;
if (arg_ulen != PCRE2_ZERO_TERMINATED) arg_ulen -= new_start_offset;
}
}
} /* End of global loop */
/* All matching is done; show the resulting heapframe size if requested. */
if ((dat_datctl.control2 & CTL2_HEAPFRAMES_SIZE) != 0 &&
(dat_datctl.control & CTL_DFA) == 0)
show_heapframes_size();
show_memory = FALSE;
return PR_OK;
}
/*************************************************
* Initialise the mode-dependent globals *
*************************************************/
/* Sets up the global variables used for the current test mode. */
static void
init_globals(void)
{
general_context = pcre2_general_context_create(&my_malloc, &my_free, NULL);
general_context_copy = pcre2_general_context_copy(general_context);
default_pat_context = pcre2_compile_context_create(general_context);
pat_context = pcre2_compile_context_copy(default_pat_context);
default_dat_context = pcre2_match_context_create(general_context);
dat_context = pcre2_match_context_copy(default_dat_context);
default_con_context = pcre2_convert_context_create(general_context);
con_context = pcre2_convert_context_copy(default_con_context);
match_data = pcre2_match_data_create(max_oveccount, general_context);
/* Set a default parentheses nest limit that is large enough to run the
standard tests (this also exercises the function). */
pcre2_set_parens_nest_limit(default_pat_context, PARENS_NEST_DEFAULT);
}
/* Frees the global variables used for the current test mode. */
static void
free_globals(void)
{
pcre2_maketables_free(general_context, locale_tables);
pcre2_match_data_free(match_data);
pcre2_code_free(compiled_code);
while(patstacknext-- > 0)
{
compiled_code = patstack[patstacknext];
pcre2_code_free(compiled_code);
}
pcre2_jit_free_unused_memory(general_context);
if (jit_stack != NULL)
{
pcre2_jit_stack_free(jit_stack);
}
pcre2_general_context_free(general_context);
pcre2_general_context_free(general_context_copy);
pcre2_compile_context_free(pat_context);
pcre2_compile_context_free(default_pat_context);
pcre2_match_context_free(dat_context);
pcre2_match_context_free(default_dat_context);
pcre2_convert_context_free(default_con_context);
pcre2_convert_context_free(con_context);
}
/*************************************************
* Specific function tests *
*************************************************/
/* For tests exercising a mismatched bitmode, identify a suitable API. */
#if (defined(SUPPORT_PCRE2_8) + defined(SUPPORT_PCRE2_16) + \
defined(SUPPORT_PCRE2_32)) >= 2
#if defined(SUPPORT_PCRE2_8) && PCRE2_CODE_UNIT_WIDTH != 8
#define BITOTHER 8
#elif defined(SUPPORT_PCRE2_16) && PCRE2_CODE_UNIT_WIDTH != 16
#define BITOTHER 16
#elif defined(SUPPORT_PCRE2_32) && PCRE2_CODE_UNIT_WIDTH != 32
#define BITOTHER 32
#else
#error "One other bit width must be supported"
#endif
#endif
/* These are tests of the public API functions in PCRE2, which wouldn't
otherwise be covered by pcre2test. This usually implies they are error cases,
or edge cases that are hard to hit in the standard flow of compile-match or
compile-substitute.
I think of them as perhaps more like unit tests, although they are still testing
the public API, rather than internal modules.
Inside pcre2test, which can be dynamically linked to lib-pcreX.so, we don't
have access to any non-exported functions. */
static void
unittest(void)
{
int rc;
uint32_t uval;
PCRE2_SIZE sizeval;
PCRE2_UCHAR *sptrval;
const char *failure = NULL;
pcre2_general_context *test_gen_context = NULL, *test_gen_context_copy = NULL;
pcre2_compile_context *test_pat_context = NULL, *test_pat_context_copy = NULL;
pcre2_match_context *test_dat_context = NULL, *test_dat_context_copy = NULL;
pcre2_convert_context *test_con_context = NULL, *test_con_context_copy = NULL;
pcre2_match_data *test_match_data = NULL;
pcre2_code *test_compiled_code = NULL;
PCRE2_UCHAR pattern[] = { CHAR_A, CHAR_B, CHAR_C, 0 };
PCRE2_UCHAR callout_int_pattern[] = {
CHAR_LEFT_PARENTHESIS, CHAR_QUESTION_MARK, CHAR_C, CHAR_RIGHT_PARENTHESIS, 0 };
PCRE2_UCHAR callout_str_pattern[] = {
CHAR_LEFT_PARENTHESIS, CHAR_QUESTION_MARK, CHAR_C, CHAR_QUOTATION_MARK,
CHAR_Z, CHAR_QUOTATION_MARK, CHAR_RIGHT_PARENTHESIS, 0 };
PCRE2_UCHAR capture_pattern[] = {
CHAR_A, CHAR_LEFT_PARENTHESIS, CHAR_QUESTION_MARK, CHAR_LESS_THAN_SIGN,
CHAR_N, CHAR_GREATER_THAN_SIGN, CHAR_DOT, CHAR_ASTERISK,
CHAR_RIGHT_PARENTHESIS, CHAR_Z, 0 };
PCRE2_UCHAR subject_abcz[] = {
CHAR_A, CHAR_B, CHAR_C, CHAR_Z, 0 };
PCRE2_UCHAR name_n[] = { CHAR_N, 0 };
#ifdef BITOTHER
G(pcre2_code_,BITOTHER) *bitother_code = NULL;
G(PCRE2_,G(UCHAR,BITOTHER)) bitother_pattern[] = { CHAR_A, CHAR_B, CHAR_C, 0 };
#endif
int errorcode;
PCRE2_SIZE erroroffset;
PCRE2_UCHAR errorbuffer[256];
#if PCRE2_CODE_UNIT_WIDTH == 8
char errorbuffer8[256];
regex_t test_preg;
#endif
void *invalid_code = NULL;
const uint8_t *test_tables = NULL;
PCRE2_UCHAR copy_buf[64];
PCRE2_UCHAR **stringlist;
PCRE2_SIZE *lengthslist;
#if PCRE2_CODE_UNIT_WIDTH == 8
memset(&test_preg, 0, sizeof(test_preg));
#endif
#if defined PCRE2_DEBUG && !defined NDEBUG
#define ASSERT(cond, msg) \
do { \
if (!(cond)) { failure = msg " at " __FILE__ ":" STR(__LINE__); goto EXIT; } \
} while (0)
#else
#define ASSERT(cond, msg) \
do { \
if (!(cond)) { failure = msg; goto EXIT; } \
} while (0)
#endif
/* -------------------------- pcre2_config --------------------------------- */
rc = pcre2_config(PCRE2_CONFIG_BSR, NULL);
ASSERT(rc == (int)sizeof(uint32_t), "pcre2_config(NULL)");
rc = pcre2_config(PCRE2_CONFIG_COMPILED_WIDTHS, NULL);
ASSERT(rc == (int)sizeof(uint32_t), "pcre2_config(NULL)");
rc = pcre2_config(PCRE2_CONFIG_DEPTHLIMIT, NULL);
ASSERT(rc == (int)sizeof(uint32_t), "pcre2_config(NULL)");
rc = pcre2_config(PCRE2_CONFIG_EFFECTIVE_LINKSIZE, NULL);
ASSERT(rc == (int)sizeof(uint32_t), "pcre2_config(NULL)");
rc = pcre2_config(PCRE2_CONFIG_HEAPLIMIT, NULL);
ASSERT(rc == (int)sizeof(uint32_t), "pcre2_config(NULL)");
rc = pcre2_config(PCRE2_CONFIG_JIT, NULL);
ASSERT(rc == (int)sizeof(uint32_t), "pcre2_config(NULL)");
rc = pcre2_config(PCRE2_CONFIG_LINKSIZE, NULL);
ASSERT(rc == (int)sizeof(uint32_t), "pcre2_config(NULL)");
rc = pcre2_config(PCRE2_CONFIG_MATCHLIMIT, NULL);
ASSERT(rc == (int)sizeof(uint32_t), "pcre2_config(NULL)");
rc = pcre2_config(PCRE2_CONFIG_NEVER_BACKSLASH_C, NULL);
ASSERT(rc == (int)sizeof(uint32_t), "pcre2_config(NULL)");
rc = pcre2_config(PCRE2_CONFIG_NEWLINE, NULL);
ASSERT(rc == (int)sizeof(uint32_t), "pcre2_config(NULL)");
rc = pcre2_config(PCRE2_CONFIG_PARENSLIMIT, NULL);
ASSERT(rc == (int)sizeof(uint32_t), "pcre2_config(NULL)");
rc = pcre2_config(PCRE2_CONFIG_STACKRECURSE, NULL);
ASSERT(rc == (int)sizeof(uint32_t), "pcre2_config(NULL)");
rc = pcre2_config(PCRE2_CONFIG_TABLES_LENGTH, NULL);
ASSERT(rc == (int)sizeof(uint32_t), "pcre2_config(NULL)");
rc = pcre2_config(PCRE2_CONFIG_UNICODE, NULL);
ASSERT(rc == (int)sizeof(uint32_t), "pcre2_config(NULL)");
#ifdef SUPPORT_JIT
rc = pcre2_config(PCRE2_CONFIG_JITTARGET, NULL);
ASSERT(rc > 0, "pcre2_config(NULL)");
#endif
rc = pcre2_config(PCRE2_CONFIG_UNICODE_VERSION, NULL);
ASSERT(rc > 4, "pcre2_config(NULL)");
rc = pcre2_config(PCRE2_CONFIG_VERSION, NULL);
ASSERT(rc > 4, "pcre2_config(NULL)");
rc = pcre2_config(PCRE2_CONFIG_MATCHLIMIT, &uval);
ASSERT(rc == 0, "pcre2_config(PCRE2_CONFIG_MATCHLIMIT)");
rc = pcre2_config(999, NULL);
ASSERT(rc == PCRE2_ERROR_BADOPTION, "pcre2_config(bad option)");
rc = pcre2_config(999, &uval);
ASSERT(rc == PCRE2_ERROR_BADOPTION, "pcre2_config(bad option)");
rc = pcre2_config(PCRE2_CONFIG_STACKRECURSE, &uval);
ASSERT(rc == 0, "pcre2_config(PCRE2_CONFIG_STACKRECURSE)");
rc = pcre2_config(PCRE2_CONFIG_LINKSIZE, &uval);
ASSERT(rc == 0, "pcre2_config(PCRE2_CONFIG_LINKSIZE)");
/* ------------------------ Context functions ------------------------------ */
test_gen_context = pcre2_general_context_create(NULL, NULL, NULL);
ASSERT(test_gen_context != NULL, "pcre2_general_context_create(null)");
pcre2_general_context_free(test_gen_context);
mallocs_until_failure = 0;
test_gen_context = pcre2_general_context_create(&my_malloc, &my_free, NULL);
ASSERT(test_gen_context == NULL, "pcre2_general_context_create(malloc)");
mallocs_until_failure = 1;
test_gen_context = pcre2_general_context_create(&my_malloc, &my_free, NULL);
ASSERT(test_gen_context != NULL, "pcre2_general_context_create(malloc)");
test_pat_context = pcre2_compile_context_create(test_gen_context);
ASSERT(test_pat_context == NULL, "pcre2_compile_context_create()");
test_dat_context = pcre2_match_context_create(test_gen_context);
ASSERT(test_dat_context == NULL, "pcre2_match_context_create()");
test_con_context = pcre2_convert_context_create(test_gen_context);
ASSERT(test_con_context == NULL, "pcre2_convert_context_create()");
test_pat_context = pcre2_compile_context_create(NULL);
ASSERT(test_pat_context != NULL, "pcre2_compile_context_create(null)");
pcre2_compile_context_free(test_pat_context);
test_dat_context = pcre2_match_context_create(NULL);
ASSERT(test_dat_context != NULL, "pcre2_match_context_create(null)");
pcre2_match_context_free(test_dat_context);
test_con_context = pcre2_convert_context_create(NULL);
ASSERT(test_con_context != NULL, "pcre2_convert_context_create(null)");
pcre2_convert_context_free(test_con_context);
mallocs_until_failure = INT_MAX;
test_pat_context = pcre2_compile_context_create(test_gen_context);
ASSERT(test_pat_context != NULL, "pcre2_compile_context_create()");
test_dat_context = pcre2_match_context_create(test_gen_context);
ASSERT(test_dat_context != NULL, "pcre2_match_context_create()");
test_con_context = pcre2_convert_context_create(test_gen_context);
ASSERT(test_con_context != NULL, "pcre2_convert_context_create()");
mallocs_until_failure = 0;
test_gen_context_copy = pcre2_general_context_copy(test_gen_context);
ASSERT(test_gen_context_copy == NULL, "pcre2_general_context_copy()");
test_pat_context_copy = pcre2_compile_context_copy(test_pat_context);
ASSERT(test_pat_context_copy == NULL, "pcre2_compile_context_copy()");
test_dat_context_copy = pcre2_match_context_copy(test_dat_context);
ASSERT(test_dat_context_copy == NULL, "pcre2_match_context_copy()");
test_con_context_copy = pcre2_convert_context_copy(test_con_context);
ASSERT(test_con_context_copy == NULL, "pcre2_convert_context_copy()");
mallocs_until_failure = INT_MAX;
test_gen_context_copy = pcre2_general_context_copy(test_gen_context);
ASSERT(test_gen_context_copy != NULL, "pcre2_general_context_copy()");
test_pat_context_copy = pcre2_compile_context_copy(test_pat_context);
ASSERT(test_pat_context_copy != NULL, "pcre2_compile_context_copy()");
test_dat_context_copy = pcre2_match_context_copy(test_dat_context);
ASSERT(test_dat_context_copy != NULL, "pcre2_match_context_copy()");
test_con_context_copy = pcre2_convert_context_copy(test_con_context);
ASSERT(test_con_context_copy != NULL, "pcre2_convert_context_copy()");
rc = pcre2_set_compile_extra_options(test_pat_context, 0);
ASSERT(rc == 0, "pcre2_set_compile_extra_options()");
rc = pcre2_set_max_pattern_length(test_pat_context, 10);
ASSERT(rc == 0, "pcre2_set_max_pattern_length()");
rc = pcre2_set_max_pattern_compiled_length(test_pat_context, 256);
ASSERT(rc == 0, "pcre2_set_max_pattern_compiled_length()");
rc = pcre2_set_max_varlookbehind(test_pat_context, 0);
ASSERT(rc == 0, "pcre2_set_max_varlookbehind()");
rc = pcre2_set_offset_limit(test_dat_context, 0);
ASSERT(rc == 0, "pcre2_set_offset_limit()");
rc = pcre2_set_bsr(test_pat_context, 999);
ASSERT(rc == PCRE2_ERROR_BADDATA, "pcre2_set_bsr()");
rc = pcre2_set_newline(test_pat_context, 999);
ASSERT(rc == PCRE2_ERROR_BADDATA, "pcre2_set_newline()");
rc = pcre2_set_recursion_limit(test_dat_context, 10);
ASSERT(rc == 0, "pcre2_set_recursion_limit()");
rc = pcre2_set_recursion_memory_management(test_dat_context, NULL, NULL, NULL);
ASSERT(rc == 0, "pcre2_set_recursion_memory_management()");
rc = pcre2_set_optimize(NULL, PCRE2_OPTIMIZATION_NONE);
ASSERT(rc == PCRE2_ERROR_NULL, "pcre2_set_optimize(null)");
rc = pcre2_set_optimize(test_pat_context, PCRE2_AUTO_POSSESS - 1);
ASSERT(rc == PCRE2_ERROR_BADOPTION, "pcre2_set_optimize(bad option)");
rc = pcre2_set_optimize(test_pat_context, PCRE2_START_OPTIMIZE_OFF + 1);
ASSERT(rc == PCRE2_ERROR_BADOPTION, "pcre2_set_optimize(bad option)");
rc = pcre2_set_glob_escape(test_con_context, 0);
ASSERT(rc == 0, "pcre2_set_glob_escape(0)");
rc = pcre2_set_glob_escape(test_con_context, 1);
ASSERT(rc == PCRE2_ERROR_BADDATA, "pcre2_set_glob_escape(1)");
rc = pcre2_set_glob_escape(test_con_context, 256);
ASSERT(rc == PCRE2_ERROR_BADDATA, "pcre2_set_glob_escape(256)");
/* ----------------------- pcre2_compile ----------------------------------- */
test_compiled_code = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED,
0, NULL, &erroroffset, test_pat_context);
ASSERT(test_compiled_code == NULL, "test pattern compilation");
test_compiled_code = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED,
0, &errorcode, NULL, test_pat_context);
ASSERT(test_compiled_code == NULL && errorcode == PCRE2_ERROR_NULL_ERROROFFSET, "test pattern compilation");
test_compiled_code = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED,
0, &errorcode, &erroroffset, test_pat_context);
ASSERT(test_compiled_code != NULL && errorcode == 100 && erroroffset == 0, "test pattern compilation");
#ifdef BITOTHER
bitother_code = G(pcre2_compile_,BITOTHER)(bitother_pattern,
PCRE2_ZERO_TERMINATED, 0, &errorcode, &erroroffset, NULL);
ASSERT(bitother_code != NULL, "bitmode mismatch compile");
#endif
/* ---------------------- Match data functions ----------------------------- */
mallocs_until_failure = 0;
test_match_data = pcre2_match_data_create(10, test_gen_context);
ASSERT(test_match_data == NULL, "pcre2_match_data_create()");
test_match_data = pcre2_match_data_create(10, NULL);
ASSERT(test_match_data != NULL, "pcre2_match_data_create()");
ASSERT(pcre2_get_ovector_count(test_match_data) == 10, "pcre2_get_ovector_count()");
sizeval = pcre2_get_match_data_size(test_match_data);
ASSERT(sizeval >= 2, "pcre2_get_match_data_size()");
mallocs_until_failure = INT_MAX;
pcre2_match_data_free(test_match_data);
test_match_data = pcre2_match_data_create(0, test_gen_context);
ASSERT(test_match_data != NULL, "pcre2_match_data_create()");
ASSERT(pcre2_get_ovector_count(test_match_data) == 1, "pcre2_get_ovector_count()");
pcre2_match_data_free(test_match_data);
test_match_data = pcre2_match_data_create_from_pattern(NULL, NULL);
ASSERT(test_match_data == NULL, "pcre2_match_data_create_from_pattern(null)");
test_match_data = pcre2_match_data_create_from_pattern(test_compiled_code, NULL);
ASSERT(test_match_data != NULL, "pcre2_match_data_create_from_pattern()");
ASSERT(pcre2_get_ovector_count(test_match_data) == 1, "pcre2_get_ovector_count()");
mallocs_until_failure = 0;
pcre2_match_data_free(test_match_data);
test_match_data = pcre2_match_data_create_from_pattern(test_compiled_code,
test_gen_context);
ASSERT(test_match_data == NULL, "pcre2_match_data_create_from_pattern()");
mallocs_until_failure = INT_MAX;
pcre2_match_data_free(test_match_data);
test_match_data = pcre2_match_data_create_from_pattern(test_compiled_code,
test_gen_context);
ASSERT(test_match_data != NULL, "pcre2_match_data_create_from_pattern()");
rc = pcre2_match(test_compiled_code, pattern, PCRE2_ZERO_TERMINATED, 0,
PCRE2_COPY_MATCHED_SUBJECT, test_match_data, NULL);
ASSERT(rc == 1, "pcre2_match()");
pcre2_match_data_free(test_match_data);
test_match_data = NULL;
/* ----------------------- pcre2_pattern_info ------------------------------ */
rc = pcre2_pattern_info(NULL, PCRE2_INFO_NEWLINE, &uval);
ASSERT(rc == PCRE2_ERROR_NULL, "pcre2_pattern_info(null)");
rc = pcre2_pattern_info(test_compiled_code, 999, NULL);
ASSERT(rc == PCRE2_ERROR_BADOPTION, "pcre2_pattern_info(bad option)");
rc = pcre2_pattern_info(test_compiled_code, 999, &uval);
ASSERT(rc == PCRE2_ERROR_BADOPTION, "pcre2_pattern_info(bad option)");
invalid_code = malloc(1024);
ASSERT(invalid_code != NULL, "malloc()");
memset(invalid_code, 0, 1024);
rc = pcre2_pattern_info(invalid_code, PCRE2_INFO_NEWLINE, &uval);
ASSERT(rc == PCRE2_ERROR_BADMAGIC, "pcre2_pattern_info(bad magic)");
#ifdef BITOTHER
rc = pcre2_pattern_info((pcre2_code *)bitother_code, PCRE2_INFO_NEWLINE, &uval);
ASSERT(rc == PCRE2_ERROR_BADMODE, "pcre2_pattern_info(bitmode mismatch)");
#endif
#ifdef SUPPORT_JIT
sizeval = 0xcdcdcdcd;
rc = pcre2_pattern_info(test_compiled_code, PCRE2_INFO_JITSIZE, &sizeval);
ASSERT(rc == 0 && sizeval == 0, "pcre2_pattern_info(JIT)");
if (pcre2_jit_compile(test_compiled_code, PCRE2_JIT_COMPLETE) == 0)
{
rc = pcre2_pattern_info(test_compiled_code, PCRE2_INFO_JITSIZE, &sizeval);
ASSERT(rc == 0 && sizeval > 0, "pcre2_pattern_info(JIT after compile)");
}
#endif
/* ----------------------- POSIX functions --------------------------------- */
#if PCRE2_CODE_UNIT_WIDTH == 8
#if defined(EBCDIC) && !EBCDIC_IO
#define BUFFER_OUTPUT ebcdic_to_ascii_str((uint8_t *)errorbuffer8, sizeof(errorbuffer8));
#else
#define BUFFER_OUTPUT
#endif
rc = pcre2_regcomp(&test_preg, "abc", 0);
ASSERT(rc == 0, "pcre2_regcomp()");
rc = pcre2_regexec(&test_preg, "zabcz", 0, NULL, 0);
ASSERT(rc == 0, "pcre2_regexec(0)");
rc = pcre2_regexec(&test_preg, "zabcz", 0, NULL, REG_STARTEND);
ASSERT(rc == REG_INVARG, "pcre2_regexec(REG_STARTEND)");
memset(errorbuffer8, 0, sizeof(errorbuffer8));
rc = regerror(REG_ASSERT, NULL, errorbuffer8, sizeof(errorbuffer8));
BUFFER_OUTPUT
ASSERT(rc > 0 && rc <= (int)sizeof(errorbuffer8) && rc == (int)strlen(errorbuffer8) + 1, "regerror()");
rc = regerror(REG_NOMATCH, NULL, errorbuffer8, sizeof(errorbuffer8));
BUFFER_OUTPUT
ASSERT(rc > 0 && rc <= (int)sizeof(errorbuffer8) && rc == (int)strlen(errorbuffer8) + 1, "regerror()");
rc = regerror(REG_ASSERT-1, NULL, errorbuffer8, sizeof(errorbuffer8));
BUFFER_OUTPUT
ASSERT(rc == (int)strlen("unknown error code")+1 && strcmp(errorbuffer8, "unknown error code") == 0, "regerror(bad error code)");
rc = regerror(REG_NOMATCH+1, NULL, errorbuffer8, sizeof(errorbuffer8));
BUFFER_OUTPUT
ASSERT(rc == (int)strlen("unknown error code")+1 && strcmp(errorbuffer8, "unknown error code") == 0, "regerror(bad error code)");
#undef BUFFER_OUTPUT
#endif
/* -------------------- pcre2_get_error_message ---------------------------- */
#if defined(EBCDIC) && !EBCDIC_IO
#define BUFFER_OUTPUT ebcdic_to_ascii_str(errorbuffer, sizeof(errorbuffer));
#else
#define BUFFER_OUTPUT
#endif
rc = pcre2_get_error_message(PCRE2_ERROR_BADDATA, NULL, 0);
ASSERT(rc == PCRE2_ERROR_NOMEMORY, "pcre2_get_error_message(null)");
memset(errorbuffer, 0, sizeof(errorbuffer));
rc = pcre2_get_error_message(PCRE2_ERROR_BADDATA, errorbuffer, 0);
BUFFER_OUTPUT
ASSERT(rc == PCRE2_ERROR_NOMEMORY, "pcre2_get_error_message(null)");
rc = pcre2_get_error_message(PCRE2_ERROR_BADDATA, errorbuffer, 4);
BUFFER_OUTPUT
ASSERT(rc == PCRE2_ERROR_NOMEMORY && pcre2_strcmp_c8(errorbuffer, "bad") == 0, "pcre2_get_error_message(null)");
rc = pcre2_get_error_message(PCRE2_ERROR_BADDATA, errorbuffer, 14);
BUFFER_OUTPUT
ASSERT(rc == PCRE2_ERROR_NOMEMORY && pcre2_strcmp_c8(errorbuffer, "bad data valu") == 0, "pcre2_get_error_message(null)");
rc = pcre2_get_error_message(PCRE2_ERROR_BADDATA, errorbuffer, 15);
BUFFER_OUTPUT
ASSERT(rc == 14 && pcre2_strcmp_c8(errorbuffer, "bad data value") == 0, "pcre2_get_error_message(null)");
#undef BUFFER_OUTPUT
/* ----------------------- pcre2_maketables -------------------------------- */
test_tables = pcre2_maketables(NULL);
ASSERT(test_tables != NULL, "pcre2_maketables(null)");
pcre2_maketables_free(NULL, test_tables);
test_tables = pcre2_maketables(test_gen_context);
ASSERT(test_tables != NULL, "pcre2_maketables()");
pcre2_maketables_free(test_gen_context, test_tables);
mallocs_until_failure = 0;
test_tables = pcre2_maketables(test_gen_context);
ASSERT(test_tables == NULL, "pcre2_maketables()");
mallocs_until_failure = INT_MAX;
/* -------------------- pcre2_callout_enumerate ---------------------------- */
rc = pcre2_callout_enumerate(NULL, callout_enumerate_function_void, NULL);
ASSERT(rc == PCRE2_ERROR_NULL, "pcre2_callout_enumerate(null)");
rc = pcre2_callout_enumerate(invalid_code, callout_enumerate_function_void, NULL);
ASSERT(rc == PCRE2_ERROR_BADMAGIC, "pcre2_callout_enumerate(invalid)");
#ifdef BITOTHER
rc = pcre2_callout_enumerate((pcre2_code *)bitother_code, callout_enumerate_function_void, NULL);
ASSERT(rc == PCRE2_ERROR_BADMODE, "pcre2_callout_enumerate(bitmode mismatch)");
#endif
pcre2_code_free(test_compiled_code);
test_compiled_code = pcre2_compile(callout_int_pattern, PCRE2_ZERO_TERMINATED,
0, &errorcode, &erroroffset, NULL);
ASSERT(test_compiled_code != NULL, "test pattern compilation");
rc = pcre2_callout_enumerate(test_compiled_code, callout_enumerate_function_void, &errorcode);
ASSERT(rc == 0, "pcre2_callout_enumerate(void)");
errorcode = -12;
rc = pcre2_callout_enumerate(test_compiled_code, callout_enumerate_function_fail, &errorcode);
ASSERT(rc == -12, "pcre2_callout_enumerate(fail)");
pcre2_code_free(test_compiled_code);
test_compiled_code = pcre2_compile(callout_str_pattern, PCRE2_ZERO_TERMINATED,
0, &errorcode, &erroroffset, NULL);
ASSERT(test_compiled_code != NULL, "test pattern compilation");
errorcode = -123;
rc = pcre2_callout_enumerate(test_compiled_code, callout_enumerate_function_fail, &errorcode);
ASSERT(rc == -123, "pcre2_callout_enumerate(fail)");
/* ---------------------- Substring functions ------------------------------ */
/* Must handle NULL without crashing. */
pcre2_substring_free(NULL);
pcre2_substring_list_free(NULL);
pcre2_code_free(test_compiled_code);
test_compiled_code = pcre2_compile(capture_pattern, PCRE2_ZERO_TERMINATED,
0, &errorcode, &erroroffset, NULL);
ASSERT(test_compiled_code != NULL, "test pattern compilation");
pcre2_match_data_free(test_match_data);
test_match_data = pcre2_match_data_create_from_pattern(
test_compiled_code, test_gen_context);
ASSERT(test_match_data != NULL, "pcre2_match_data_create()");
rc = pcre2_match(test_compiled_code, subject_abcz, PCRE2_ZERO_TERMINATED, 0,
0, test_match_data, NULL);
ASSERT(rc == 2, "pcre2_match()");
/* Test the functions with insufficient buffer size. It hardly seems worth
adding controls to the pcre2test input file format to exercise this case. */
sizeval = 2;
rc = pcre2_substring_copy_byname(test_match_data, name_n, copy_buf, &sizeval);
ASSERT(rc == PCRE2_ERROR_NOMEMORY && sizeval == 2, "pcre2_substring_copy_byname(small buffer)");
sizeval = 3;
rc = pcre2_substring_copy_byname(test_match_data, name_n, copy_buf, &sizeval);
ASSERT(rc == 0 && sizeval == 2, "pcre2_substring_copy_byname(small buffer)");
sizeval = 4;
rc = pcre2_substring_copy_byname(test_match_data, name_n, copy_buf, &sizeval);
ASSERT(rc == 0 && sizeval == 2, "pcre2_substring_copy_byname(small buffer)");
sizeval = 2;
rc = pcre2_substring_copy_bynumber(test_match_data, 1, copy_buf, &sizeval);
ASSERT(rc == PCRE2_ERROR_NOMEMORY && sizeval == 2, "pcre2_substring_copy_bynumber(small buffer)");
sizeval = 3;
rc = pcre2_substring_copy_bynumber(test_match_data, 1, copy_buf, &sizeval);
ASSERT(rc == 0 && sizeval == 2, "pcre2_substring_copy_bynumber(small buffer)");
mallocs_until_failure = 0;
sizeval = 0;
sptrval = NULL;
rc = pcre2_substring_get_byname(test_match_data, name_n, &sptrval, &sizeval);
ASSERT(rc == PCRE2_ERROR_NOMEMORY && sptrval == NULL, "pcre2_substring_get_byname(small buffer)");
sizeval = 0;
rc = pcre2_substring_get_bynumber(test_match_data, 1, &sptrval, &sizeval);
ASSERT(rc == PCRE2_ERROR_NOMEMORY && sptrval == NULL, "pcre2_substring_get_bynumber(small buffer)");
mallocs_until_failure = INT_MAX;
/* Test some unusual conditions, for which again it doesn't seem worth adding
pcre2test controls. */
sizeval = 0;
rc = pcre2_substring_length_bynumber(test_match_data, 1, &sizeval);
ASSERT(rc == 0 && sizeval == 2, "pcre2_substring_length_bynumber()");
rc = pcre2_substring_length_bynumber(test_match_data, 1, NULL);
ASSERT(rc == 0, "pcre2_substring_length_bynumber()");
sizeval = 0;
rc = pcre2_substring_length_byname(test_match_data, name_n, &sizeval);
ASSERT(rc == 0 && sizeval == 2, "pcre2_substring_length_byname()");
rc = pcre2_substring_length_byname(test_match_data, name_n, NULL);
ASSERT(rc == 0, "pcre2_substring_length_byname()");
/* Test pcre2_substring_list_get() with some NULL inputs. */
rc = pcre2_substring_list_get(test_match_data, &stringlist, &lengthslist);
ASSERT(rc == 0 && stringlist != NULL && lengthslist != NULL, "pcre2_substring_list_get()");
pcre2_substring_list_free(stringlist);
stringlist = NULL;
rc = pcre2_substring_list_get(test_match_data, &stringlist, NULL);
ASSERT(rc == 0 && stringlist != NULL, "pcre2_substring_list_get()");
pcre2_substring_list_free(stringlist);
mallocs_until_failure = 0;
stringlist = NULL;
rc = pcre2_substring_list_get(test_match_data, &stringlist, &lengthslist);
ASSERT(rc == PCRE2_ERROR_NOMEMORY && stringlist == NULL, "pcre2_substring_list_get()");
mallocs_until_failure = INT_MAX;
/* Test after an unsuccessful match. */
rc = pcre2_match(test_compiled_code, subject_abcz, PCRE2_ZERO_TERMINATED, 2,
0, test_match_data, NULL);
ASSERT(rc == PCRE2_ERROR_NOMATCH, "pcre2_match()");
sizeval = 4;
rc = pcre2_substring_copy_byname(test_match_data, name_n, copy_buf, &sizeval);
ASSERT(rc == PCRE2_ERROR_NOMATCH, "pcre2_substring_copy_byname(no match)");
rc = pcre2_substring_copy_bynumber(test_match_data, 1, copy_buf, &sizeval);
ASSERT(rc == PCRE2_ERROR_NOMATCH, "pcre2_substring_copy_bynumber(no match)");
rc = pcre2_substring_get_byname(test_match_data, name_n, &sptrval, &sizeval);
ASSERT(rc == PCRE2_ERROR_NOMATCH && sptrval == NULL, "pcre2_substring_get_byname(no match)");
rc = pcre2_substring_get_bynumber(test_match_data, 1, &sptrval, &sizeval);
ASSERT(rc == PCRE2_ERROR_NOMATCH && sptrval == NULL, "pcre2_substring_get_bynumber(no match)");
/* ------------------------------------------------------------------------- */
#undef ASSERT
EXIT:
mallocs_until_failure = INT_MAX;
#if PCRE2_CODE_UNIT_WIDTH == 8
pcre2_regfree(&test_preg);
#endif
if (test_compiled_code != NULL) pcre2_code_free(test_compiled_code);
#ifdef BITOTHER
if (bitother_code != NULL) G(pcre2_code_free_,BITOTHER)(bitother_code);
#endif
if (test_match_data != NULL) pcre2_match_data_free(test_match_data);
if (test_con_context_copy != NULL) pcre2_convert_context_free(test_con_context_copy);
if (test_dat_context_copy != NULL) pcre2_match_context_free(test_dat_context_copy);
if (test_pat_context_copy != NULL) pcre2_compile_context_free(test_pat_context_copy);
if (test_gen_context_copy != NULL) pcre2_general_context_free(test_gen_context_copy);
if (test_con_context != NULL) pcre2_convert_context_free(test_con_context);
if (test_dat_context != NULL) pcre2_match_context_free(test_dat_context);
if (test_pat_context != NULL) pcre2_compile_context_free(test_pat_context);
if (test_gen_context != NULL) pcre2_general_context_free(test_gen_context);
free(invalid_code);
if (failure != NULL)
{
fprintf(stderr, "pcre2test: Unit test error in %s\n", failure);
exit(1);
}
}
#undef BITOTHER
/* -------------------- Undo the macro definitions --------------------------*/
#undef pbuffer
#undef pbuffer_size
#undef utf_to_ord
#undef compiled_code
#undef general_context
#undef general_context_copy
#undef pat_context
#undef default_pat_context
#undef con_context
#undef default_con_context
#undef dat_context
#undef default_dat_context
#undef match_data
#undef jit_stack
#undef jit_stack_size
#undef patstack
#undef patstacknext
#undef jit_callback
#undef pcre2_strcmp_c8
#undef pcre2_strlen
#undef pchars
#undef ptrunc
#undef config_str
#undef check_modifier
#undef decode_modifiers
#undef pattern_info
#undef show_memory_info
#undef show_framesize
#undef show_heapframes_size
#undef print_error_message_file
#undef print_error_message
#undef callout_enumerate_function
#undef callout_enumerate_function_void
#undef callout_enumerate_function_fail
#undef show_pattern_info
#undef serial_error
#undef process_command
#undef process_pattern
#undef have_active_pattern
#undef free_active_pattern
#undef check_match_limit
#undef substitute_callout_function
#undef substitute_case_callout_function
#undef callout_function
#undef copy_and_get
#undef process_data
#undef init_globals
#undef free_globals
#undef unittest
/* End of pcre2test_inc.h */