src/pcre2_compile_class.c - third_party/github.com/PCRE2Project/pcre2 - Git at Google

 /*************************************************
 *      Perl-Compatible Regular Expressions       *
 *************************************************/

 /* PCRE is a library of functions to support regular expressions whose syntax
 and semantics are as close as possible to those of the Perl 5 language.

                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
           New API code Copyright (c) 2016-2024 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:

     * Redistributions of source code must retain the above copyright notice,
       this list of conditions and the following disclaimer.

     * Redistributions in binary form must reproduce the above copyright
       notice, this list of conditions and the following disclaimer in the
       documentation and/or other materials provided with the distribution.

     * Neither the name of the University of Cambridge nor the names of its
       contributors may be used to endorse or promote products derived from
       this software without specific prior written permission.

 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 -----------------------------------------------------------------------------
 */


 #include "pcre2_compile.h"


 typedef struct {
   /* Option bits for eclass. */
   uint32_t options;
   uint32_t xoptions;
   /* Rarely used members. */
   int *errorcodeptr;
   compile_block *cb;
   /* Bitmap is needed. */
   BOOL needs_bitmap;
 } eclass_context;

 /* Checks the allowed tokens at the end of a class structure in debug mode.
 When a new token is not processed by all loops, and the token is equals to
 a) one of the cases here:
    the compiler will complain about a duplicated case value.
 b) none of the cases here:
    the loop without the handler will stop with an assertion failure. */

 #ifdef PCRE2_DEBUG
 #define CLASS_END_CASES(meta) \
   default: \
   PCRE2_ASSERT((meta) <= META_END); \
   PCRE2_FALLTHROUGH /* Fall through */ \
   case META_CLASS: \
   case META_CLASS_NOT: \
   case META_CLASS_EMPTY: \
   case META_CLASS_EMPTY_NOT: \
   case META_CLASS_END: \
   case META_ECLASS_AND: \
   case META_ECLASS_OR: \
   case META_ECLASS_SUB: \
   case META_ECLASS_XOR: \
   case META_ECLASS_NOT:
 #else
 #define CLASS_END_CASES(meta) \
   default:
 #endif

 #ifdef SUPPORT_WIDE_CHARS

 /* Heapsort algorithm. */

 static void do_heapify(uint32_t *buffer, size_t size, size_t i)
 {
 size_t max;
 size_t left;
 size_t right;
 uint32_t tmp1, tmp2;

 while (TRUE)
   {
   max = i;
   left = (i << 1) + 2;
   right = left + 2;

   if (left < size && buffer[left] > buffer[max]) max = left;
   if (right < size && buffer[right] > buffer[max]) max = right;
   if (i == max) return;

   /* Swap items. */
   tmp1 = buffer[i];
   tmp2 = buffer[i + 1];
   buffer[i] = buffer[max];
   buffer[i + 1] = buffer[max + 1];
   buffer[max] = tmp1;
   buffer[max + 1] = tmp2;
   i = max;
   }
 }

 #ifdef SUPPORT_UNICODE

 #define PARSE_CLASS_UTF               0x1
 #define PARSE_CLASS_CASELESS_UTF      0x2
 #define PARSE_CLASS_RESTRICTED_UTF    0x4
 #define PARSE_CLASS_TURKISH_UTF       0x8

 /* Get the range of nocase characters which includes the
 'c' character passed as argument, or directly follows 'c'. */

 static const uint32_t*
 get_nocase_range(uint32_t c)
 {
 uint32_t left = 0;
 uint32_t right = PRIV(ucd_nocase_ranges_size);
 uint32_t middle;

 if (c > MAX_UTF_CODE_POINT) return PRIV(ucd_nocase_ranges) + right;

 while (TRUE)
   {
   /* Range end of the middle element. */
   middle = ((left + right) >> 1) | 0x1;

   if (PRIV(ucd_nocase_ranges)[middle] <= c)
     left = middle + 1;
   else if (middle > 1 && PRIV(ucd_nocase_ranges)[middle - 2] > c)
     right = middle - 1;
   else
     return PRIV(ucd_nocase_ranges) + (middle - 1);
   }
 }

 /* Get the list of othercase characters, which belongs to the passed range.
 Create ranges from these characters, and append them to the buffer argument. */

 static size_t
 utf_caseless_extend(uint32_t start, uint32_t end, uint32_t options,
   uint32_t *buffer)
 {
 uint32_t new_start = start;
 uint32_t new_end = end;
 uint32_t c = start;
 const uint32_t *list;
 uint32_t tmp[3];
 size_t result = 2;
 const uint32_t *skip_range = get_nocase_range(c);
 uint32_t skip_start = skip_range[0];

 #if PCRE2_CODE_UNIT_WIDTH == 8
 PCRE2_ASSERT(options & PARSE_CLASS_UTF);
 #endif

 #if PCRE2_CODE_UNIT_WIDTH == 32
 if (end > MAX_UTF_CODE_POINT) end = MAX_UTF_CODE_POINT;
 #endif

 while (c <= end)
   {
   uint32_t co;

   if (c > skip_start)
     {
     c = skip_range[1];
     skip_range += 2;
     skip_start = skip_range[0];
     continue;
     }

   /* Compute caseless set. */

   if ((options & (PARSE_CLASS_TURKISH_UTF|PARSE_CLASS_RESTRICTED_UTF)) ==
         PARSE_CLASS_TURKISH_UTF &&
       UCD_ANY_I(c))
     {
     co = PRIV(ucd_turkish_dotted_i_caseset) + (UCD_DOTTED_I(c)? 0 : 3);
     }
   else if ((co = UCD_CASESET(c)) != 0 &&
            (options & PARSE_CLASS_RESTRICTED_UTF) != 0 &&
            PRIV(ucd_caseless_sets)[co] < 128)
     {
     co = 0;  /* Ignore the caseless set if it's restricted. */
     }

   if (co != 0)
     list = PRIV(ucd_caseless_sets) + co;
   else
     {
     co = UCD_OTHERCASE(c);
     list = tmp;
     tmp[0] = c;
     tmp[1] = NOTACHAR;

     if (co != c)
       {
       tmp[1] = co;
       tmp[2] = NOTACHAR;
       }
     }
   c++;

   /* Add characters. */
   do
     {
 #if PCRE2_CODE_UNIT_WIDTH == 16
     if (!(options & PARSE_CLASS_UTF) && *list > 0xffff) continue;
 #endif

     if (*list < new_start)
       {
       if (*list + 1 == new_start)
         {
         new_start--;
         continue;
         }
       }
     else if (*list > new_end)
       {
       if (*list - 1 == new_end)
         {
         new_end++;
         continue;
         }
       }
     else continue;

     result += 2;
     if (buffer != NULL)
       {
       buffer[0] = *list;
       buffer[1] = *list;
       buffer += 2;
       }
     }
   while (*(++list) != NOTACHAR);
   }

   if (buffer != NULL)
     {
     buffer[0] = new_start;
     buffer[1] = new_end;
     buffer += 2;
     (void)buffer;
     }
   return result;
 }

 #endif

 /* Add a character list to a buffer. */

 static size_t
 append_char_list(const uint32_t *p, uint32_t *buffer)
 {
 const uint32_t *n;
 size_t result = 0;

 while (*p != NOTACHAR)
   {
   n = p;
   while (n[0] == n[1] - 1) n++;

   PCRE2_ASSERT(*p < 0xffff);

   if (buffer != NULL)
     {
     buffer[0] = *p;
     buffer[1] = *n;
     buffer += 2;
     }

   result += 2;
   p = n + 1;
   }

   return result;
 }

 static uint32_t
 get_highest_char(uint32_t options)
 {
 (void)options; /* Avoid compiler warning. */

 #if PCRE2_CODE_UNIT_WIDTH == 8
 return MAX_UTF_CODE_POINT;
 #else
 #ifdef SUPPORT_UNICODE
 return GET_MAX_CHAR_VALUE((options & PARSE_CLASS_UTF) != 0);
 #else
 return MAX_UCHAR_VALUE;
 #endif
 #endif
 }

 /* Add a negated character list to a buffer. */
 static size_t
 append_negated_char_list(const uint32_t *p, uint32_t options, uint32_t *buffer)
 {
 const uint32_t *n;
 uint32_t start = 0;
 size_t result = 2;

 PCRE2_ASSERT(*p > 0);

 while (*p != NOTACHAR)
   {
   n = p;
   while (n[0] == n[1] - 1) n++;

   PCRE2_ASSERT(*p < 0xffff);

   if (buffer != NULL)
     {
     buffer[0] = start;
     buffer[1] = *p - 1;
     buffer += 2;
     }

   result += 2;
   start = *n + 1;
   p = n + 1;
   }

   if (buffer != NULL)
     {
     buffer[0] = start;
     buffer[1] = get_highest_char(options);
     buffer += 2;
     (void)buffer;
     }

   return result;
 }

 static uint32_t *
 append_non_ascii_range(uint32_t options, uint32_t *buffer)
 {
   if (buffer == NULL) return NULL;

   buffer[0] = 0x100;
   buffer[1] = get_highest_char(options);
   return buffer + 2;
 }

 static size_t
 parse_class(uint32_t *ptr, uint32_t options, uint32_t *buffer)
 {
 size_t total_size = 0;
 size_t size;
 uint32_t meta_arg;
 uint32_t start_char;

 while (TRUE)
   {
   switch (META_CODE(*ptr))
     {
     case META_ESCAPE:
       meta_arg = META_DATA(*ptr);
       switch (meta_arg)
         {
         case ESC_D:
         case ESC_W:
         case ESC_S:
         buffer = append_non_ascii_range(options, buffer);
         total_size += 2;
         break;

         case ESC_h:
         size = append_char_list(PRIV(hspace_list), buffer);
         total_size += size;
         if (buffer != NULL) buffer += size;
         break;

         case ESC_H:
         size = append_negated_char_list(PRIV(hspace_list), options, buffer);
         total_size += size;
         if (buffer != NULL) buffer += size;
         break;

         case ESC_v:
         size = append_char_list(PRIV(vspace_list), buffer);
         total_size += size;
         if (buffer != NULL) buffer += size;
         break;

         case ESC_V:
         size = append_negated_char_list(PRIV(vspace_list), options, buffer);
         total_size += size;
         if (buffer != NULL) buffer += size;
         break;

         case ESC_p:
         case ESC_P:
         ptr++;
         if (meta_arg == ESC_p && (*ptr >> 16) == PT_ANY)
           {
           if (buffer != NULL)
             {
             buffer[0] = 0;
             buffer[1] = get_highest_char(options);
             buffer += 2;
             }
           total_size += 2;
           }
         break;
         }
       ptr++;
       continue;
     case META_POSIX_NEG:
       buffer = append_non_ascii_range(options, buffer);
       total_size += 2;
       ptr += 2;
       continue;
     case META_POSIX:
       ptr += 2;
       continue;
     case META_BIGVALUE:
       /* Character literal */
       ptr++;
       break;
     CLASS_END_CASES(*ptr)
       if (*ptr >= META_END) return total_size;
       break;
     }

     start_char = *ptr;

     if (ptr[1] == META_RANGE_LITERAL || ptr[1] == META_RANGE_ESCAPED)
       {
       ptr += 2;
       PCRE2_ASSERT(*ptr < META_END || *ptr == META_BIGVALUE);

       if (*ptr == META_BIGVALUE) ptr++;

 #ifdef EBCDIC
 #error "Missing EBCDIC support"
 #endif
       }

 #ifdef SUPPORT_UNICODE
     if (options & PARSE_CLASS_CASELESS_UTF)
       {
       size = utf_caseless_extend(start_char, *ptr++, options, buffer);
       if (buffer != NULL) buffer += size;
       total_size += size;
       continue;
       }
 #endif

     if (buffer != NULL)
       {
       buffer[0] = start_char;
       buffer[1] = *ptr;
       buffer += 2;
       }

     ptr++;
     total_size += 2;
   }

   return total_size;
 }

 /* Extra uint32_t values for storing the lengths of range lists in
 the worst case. Two uint32_t lengths and a range end for a range
 starting before 255 */
 #define CHAR_LIST_EXTRA_SIZE 3

 /* Starting character values for each character list. */

 static const uint32_t char_list_starts[] = {
 #if PCRE2_CODE_UNIT_WIDTH == 32
   XCL_CHAR_LIST_HIGH_32_START,
 #endif
 #if PCRE2_CODE_UNIT_WIDTH == 32 || defined SUPPORT_UNICODE
   XCL_CHAR_LIST_LOW_32_START,
 #endif
   XCL_CHAR_LIST_HIGH_16_START,
   /* Must be terminated by XCL_CHAR_LIST_LOW_16_START,
   which also represents the end of the bitset. */
   XCL_CHAR_LIST_LOW_16_START,
 };

 static class_ranges *
 compile_optimize_class(uint32_t *start_ptr, uint32_t options,
   uint32_t xoptions, compile_block *cb)
 {
 class_ranges* cranges;
 uint32_t *ptr;
 uint32_t *buffer;
 uint32_t *dst;
 uint32_t class_options = 0;
 size_t range_list_size = 0, total_size, i;
 uint32_t tmp1, tmp2;
 const uint32_t *char_list_next;
 uint16_t *next_char;
 uint32_t char_list_start, char_list_end;
 uint32_t range_start, range_end;

 #ifdef SUPPORT_UNICODE
 if (options & PCRE2_UTF)
   class_options |= PARSE_CLASS_UTF;

 if ((options & PCRE2_CASELESS) && (options & (PCRE2_UTF|PCRE2_UCP)))
   class_options |= PARSE_CLASS_CASELESS_UTF;

 if (xoptions & PCRE2_EXTRA_CASELESS_RESTRICT)
   class_options |= PARSE_CLASS_RESTRICTED_UTF;

 if (xoptions & PCRE2_EXTRA_TURKISH_CASING)
   class_options |= PARSE_CLASS_TURKISH_UTF;
 #else
 (void)options;   /* Avoid compiler warning. */
 (void)xoptions;  /* Avoid compiler warning. */
 #endif

 /* Compute required space for the range. */

 range_list_size = parse_class(start_ptr, class_options, NULL);
 PCRE2_ASSERT((range_list_size & 0x1) == 0);

 /* Allocate buffer. The total_size also represents the end of the buffer. */

 total_size = range_list_size +
    ((range_list_size >= 2) ? CHAR_LIST_EXTRA_SIZE : 0);

 cranges = cb->cx->memctl.malloc(
   sizeof(class_ranges) + total_size * sizeof(uint32_t),
   cb->cx->memctl.memory_data);

 if (cranges == NULL) return NULL;

 cranges->header.next = NULL;
 #ifdef PCRE2_DEBUG
 cranges->header.type = CDATA_CRANGE;
 #endif
 cranges->range_list_size = (uint16_t)range_list_size;
 cranges->char_lists_types = 0;
 cranges->char_lists_size = 0;
 cranges->char_lists_start = 0;

 if (range_list_size == 0) return cranges;

 buffer = (uint32_t*)(cranges + 1);
 parse_class(start_ptr, class_options, buffer);

 /* Using <= instead of == to help static analysis. */
 if (range_list_size <= 2) return cranges;

 /* In-place sorting of ranges. */

 i = (((range_list_size >> 2) - 1) << 1);
 while (TRUE)
   {
   do_heapify(buffer, range_list_size, i);
   if (i == 0) break;
   i -= 2;
   }

 i = range_list_size - 2;
 while (TRUE)
   {
   tmp1 = buffer[i];
   tmp2 = buffer[i + 1];
   buffer[i] = buffer[0];
   buffer[i + 1] = buffer[1];
   buffer[0] = tmp1;
   buffer[1] = tmp2;

   do_heapify(buffer, i, 0);
   if (i == 0) break;
   i -= 2;
   }

 /* Merge ranges whenever possible. */
 dst = buffer;
 ptr = buffer + 2;
 range_list_size -= 2;

 /* The second condition is a very rare corner case, where the end of the last
 range is the maximum character. This range cannot be extended further. */

 while (range_list_size > 0 && dst[1] != ~(uint32_t)0)
   {
   if (dst[1] + 1 < ptr[0])
     {
     dst += 2;
     dst[0] = ptr[0];
     dst[1] = ptr[1];
     }
   else if (dst[1] < ptr[1]) dst[1] = ptr[1];

   ptr += 2;
   range_list_size -= 2;
   }

 PCRE2_ASSERT(dst[1] <= get_highest_char(class_options));

 /* When the number of ranges are less than six,
 they are not converted to range lists. */

 ptr = buffer;
 while (ptr < dst && ptr[1] < 0x100) ptr += 2;
 if (dst - ptr < (2 * (6 - 1)))
   {
   cranges->range_list_size = (uint16_t)(dst + 2 - buffer);
   return cranges;
   }

 /* Compute character lists structures. */

 char_list_next = char_list_starts;
 char_list_start = *char_list_next++;
 #if PCRE2_CODE_UNIT_WIDTH == 32
 char_list_end = XCL_CHAR_LIST_HIGH_32_END;
 #elif defined SUPPORT_UNICODE
 char_list_end = XCL_CHAR_LIST_LOW_32_END;
 #else
 char_list_end = XCL_CHAR_LIST_HIGH_16_END;
 #endif
 next_char = (uint16_t*)(buffer + total_size);

 tmp1 = 0;
 tmp2 = ((sizeof(char_list_starts) / sizeof(uint32_t)) - 1) * XCL_TYPE_BIT_LEN;
 PCRE2_ASSERT(tmp2 <= 3 * XCL_TYPE_BIT_LEN && tmp2 >= XCL_TYPE_BIT_LEN);
 range_start = dst[0];
 range_end = dst[1];

 while (TRUE)
   {
   if (range_start >= char_list_start)
     {
     if (range_start == range_end || range_end < char_list_end)
       {
       tmp1++;
       next_char--;

       if (char_list_start < XCL_CHAR_LIST_LOW_32_START)
         *next_char = (uint16_t)((range_end << XCL_CHAR_SHIFT) | XCL_CHAR_END);
       else
         *(uint32_t*)(--next_char) =
           (range_end << XCL_CHAR_SHIFT) | XCL_CHAR_END;
       }

     if (range_start < range_end)
       {
       if (range_start > char_list_start)
         {
         tmp1++;
         next_char--;

         if (char_list_start < XCL_CHAR_LIST_LOW_32_START)
           *next_char = (uint16_t)(range_start << XCL_CHAR_SHIFT);
         else
           *(uint32_t*)(--next_char) = (range_start << XCL_CHAR_SHIFT);
         }
       else
         cranges->char_lists_types |= XCL_BEGIN_WITH_RANGE << tmp2;
       }

     PCRE2_ASSERT((uint32_t*)next_char >= dst + 2);

     if (dst > buffer)
       {
       dst -= 2;
       range_start = dst[0];
       range_end = dst[1];
       continue;
       }

     range_start = 0;
     range_end = 0;
     }

   if (range_end >= char_list_start)
     {
     PCRE2_ASSERT(range_start < char_list_start);

     if (range_end < char_list_end)
       {
       tmp1++;
       next_char--;

       if (char_list_start < XCL_CHAR_LIST_LOW_32_START)
         *next_char = (uint16_t)((range_end << XCL_CHAR_SHIFT) | XCL_CHAR_END);
       else
         *(uint32_t*)(--next_char) =
           (range_end << XCL_CHAR_SHIFT) | XCL_CHAR_END;

       PCRE2_ASSERT((uint32_t*)next_char >= dst + 2);
       }

     cranges->char_lists_types |= XCL_BEGIN_WITH_RANGE << tmp2;
     }

   if (tmp1 >= XCL_ITEM_COUNT_MASK)
     {
     cranges->char_lists_types |= XCL_ITEM_COUNT_MASK << tmp2;
     next_char--;

     if (char_list_start < XCL_CHAR_LIST_LOW_32_START)
       *next_char = (uint16_t)tmp1;
     else
       *(uint32_t*)(--next_char) = tmp1;
     }
   else
     cranges->char_lists_types |= tmp1 << tmp2;

   if (range_start < XCL_CHAR_LIST_LOW_16_START) break;

   PCRE2_ASSERT(tmp2 >= XCL_TYPE_BIT_LEN);
   char_list_end = char_list_start - 1;
   char_list_start = *char_list_next++;
   tmp1 = 0;
   tmp2 -= XCL_TYPE_BIT_LEN;
   }

 if (dst[0] < XCL_CHAR_LIST_LOW_16_START) dst += 2;
 PCRE2_ASSERT((uint16_t*)dst <= next_char);

 cranges->char_lists_size =
   (size_t)((uint8_t*)(buffer + total_size) - (uint8_t*)next_char);
 cranges->char_lists_start = (size_t)((uint8_t*)next_char - (uint8_t*)buffer);
 cranges->range_list_size = (uint16_t)(dst - buffer);
 return cranges;
 }

 #endif /* SUPPORT_WIDE_CHARS */

 #ifdef SUPPORT_UNICODE

 void PRIV(update_classbits)(uint32_t ptype, uint32_t pdata, BOOL negated,
   uint8_t *classbits)
 {
 /* Update PRIV(xclass) when this function is changed. */
 int c, chartype;
 const ucd_record *prop;
 uint32_t gentype;
 BOOL set_bit;

 if (ptype == PT_ANY)
   {
   if (!negated) memset(classbits, 0xff, 32);
   return;
   }

 for (c = 0; c < 256; c++)
   {
   prop = GET_UCD(c);
   set_bit = FALSE;
   (void)set_bit;

   switch (ptype)
     {
     case PT_LAMP:
     chartype = prop->chartype;
     set_bit = (chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt);
     break;

     case PT_GC:
     set_bit = (PRIV(ucp_gentype)[prop->chartype] == pdata);
     break;

     case PT_PC:
     set_bit = (prop->chartype == pdata);
     break;

     case PT_SC:
     set_bit = (prop->script == pdata);
     break;

     case PT_SCX:
     set_bit = (prop->script == pdata ||
       MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), pdata) != 0);
     break;

     case PT_ALNUM:
     gentype = PRIV(ucp_gentype)[prop->chartype];
     set_bit = (gentype == ucp_L || gentype == ucp_N);
     break;

     case PT_SPACE:    /* Perl space */
     case PT_PXSPACE:  /* POSIX space */
     switch(c)
       {
       HSPACE_BYTE_CASES:
       VSPACE_BYTE_CASES:
       set_bit = TRUE;
       break;

       default:
       set_bit = (PRIV(ucp_gentype)[prop->chartype] == ucp_Z);
       break;
       }
     break;

     case PT_WORD:
     chartype = prop->chartype;
     gentype = PRIV(ucp_gentype)[chartype];
     set_bit = (gentype == ucp_L || gentype == ucp_N ||
                chartype == ucp_Mn || chartype == ucp_Pc);
     break;

     case PT_UCNC:
     set_bit = (c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
                c == CHAR_GRAVE_ACCENT || c >= 0xa0);
     break;

     case PT_BIDICL:
     set_bit = (UCD_BIDICLASS_PROP(prop) == pdata);
     break;

     case PT_BOOL:
     set_bit = MAPBIT(PRIV(ucd_boolprop_sets) +
                      UCD_BPROPS_PROP(prop), pdata) != 0;
     break;

     case PT_PXGRAPH:
     chartype = prop->chartype;
     gentype = PRIV(ucp_gentype)[chartype];
     set_bit = (gentype != ucp_Z && (gentype != ucp_C || chartype == ucp_Cf));
     break;

     case PT_PXPRINT:
     chartype = prop->chartype;
     set_bit = (chartype != ucp_Zl && chartype != ucp_Zp &&
        (PRIV(ucp_gentype)[chartype] != ucp_C || chartype == ucp_Cf));
     break;

     case PT_PXPUNCT:
     gentype = PRIV(ucp_gentype)[prop->chartype];
     set_bit = (gentype == ucp_P || (c < 128 && gentype == ucp_S));
     break;

     default:
     PCRE2_ASSERT(ptype == PT_PXXDIGIT);
     set_bit = (c >= CHAR_0 && c <= CHAR_9) ||
               (c >= CHAR_A && c <= CHAR_F) ||
               (c >= CHAR_a && c <= CHAR_f);
     break;
     }

   if (negated) set_bit = !set_bit;
   if (set_bit) *classbits |= (uint8_t)(1 << (c & 0x7));
   if ((c & 0x7) == 0x7) classbits++;
   }
 }

 #endif /* SUPPORT_UNICODE */


 #ifdef SUPPORT_WIDE_CHARS

 /*************************************************
 *           XClass related properties            *
 *************************************************/

 /* XClass needs to be generated. */
 #define XCLASS_REQUIRED 0x1
 /* XClass has 8 bit character. */
 #define XCLASS_HAS_8BIT_CHARS 0x2
 /* XClass has properties. */
 #define XCLASS_HAS_PROPS 0x4
 /* XClass has character lists. */
 #define XCLASS_HAS_CHAR_LISTS 0x8
 /* XClass matches to all >= 256 characters. */
 #define XCLASS_HIGH_ANY 0x10

 #endif


 /*************************************************
 *   Internal entry point for add range to class  *
 *************************************************/

 /* This function sets the overall range for characters < 256.
 It also handles non-utf case folding.

 Arguments:
   options       the options bits
   xoptions      the extra options bits
   cb            compile data
   start         start of range character
   end           end of range character

 Returns:        cb->classbits is updated
 */

 static void
 add_to_class(uint32_t options, uint32_t xoptions, compile_block *cb,
   uint32_t start, uint32_t end)
 {
 uint8_t *classbits = cb->classbits.classbits;
 uint32_t c, byte_start, byte_end;
 uint32_t classbits_end = (end <= 0xff ? end : 0xff);

 #ifndef SUPPORT_UNICODE
 (void)xoptions; /* Avoid compiler warning. */
 #endif

 /* If caseless matching is required, scan the range and process alternate
 cases. In Unicode, there are 8-bit characters that have alternate cases that
 are greater than 255 and vice-versa (though these may be ignored if caseless
 restriction is in force). Sometimes we can just extend the original range. */

 if ((options & PCRE2_CASELESS) != 0)
   {
 #ifdef SUPPORT_UNICODE
   /* UTF mode. This branch is taken if we don't support wide characters (e.g.
   8-bit library, without UTF), but we do treat those characters as Unicode
   (if UCP flag is set). In this case, we only need to expand the character class
   set to include the case pairs which are in the 0-255 codepoint range. */
   if ((options & (PCRE2_UTF|PCRE2_UCP)) != 0)
     {
       BOOL turkish_i = (xoptions & (PCRE2_EXTRA_TURKISH_CASING|PCRE2_EXTRA_CASELESS_RESTRICT)) ==
         PCRE2_EXTRA_TURKISH_CASING;
       if (start < 128)
         {
         uint32_t lo_end = (classbits_end < 127 ? classbits_end : 127);
         for (c = start; c <= lo_end; c++)
           {
           if (turkish_i && UCD_ANY_I(c)) continue;
           SETBIT(classbits, cb->fcc[c]);
           }
         }
       if (classbits_end >= 128)
         {
         uint32_t hi_start = (start > 128 ? start : 128);
         for (c = hi_start; c <= classbits_end; c++)
           {
           uint32_t co = UCD_OTHERCASE(c);
           if (co <= 0xff) SETBIT(classbits, co);
           }
         }
     }

   else
 #endif  /* SUPPORT_UNICODE */

   /* Not UTF mode */
     {
     for (c = start; c <= classbits_end; c++)
       SETBIT(classbits, cb->fcc[c]);
     }
   }

 /* Use the bitmap for characters < 256. Otherwise use extra data. */

 byte_start = (start + 7) >> 3;
 byte_end = (classbits_end + 1) >> 3;

 if (byte_start >= byte_end)
   {
   for (c = start; c <= classbits_end; c++)
     /* Regardless of start, c will always be <= 255. */
     SETBIT(classbits, c);
   return;
   }

 for (c = byte_start; c < byte_end; c++)
   classbits[c] = 0xff;

 byte_start <<= 3;
 byte_end <<= 3;

 for (c = start; c < byte_start; c++)
   SETBIT(classbits, c);

 for (c = byte_end; c <= classbits_end; c++)
   SETBIT(classbits, c);
 }


 #if PCRE2_CODE_UNIT_WIDTH == 8
 /*************************************************
 *   Internal entry point for add list to class   *
 *************************************************/

 /* This function is used for adding a list of horizontal or vertical whitespace
 characters to a class. The list must be in order so that ranges of characters
 can be detected and handled appropriately. This function sets the overall range
 so that the internal functions can try to avoid duplication when handling
 case-independence.

 Arguments:
   options       the options bits
   xoptions      the extra options bits
   cb            contains pointers to tables etc.
   p             points to row of 32-bit values, terminated by NOTACHAR

 Returns:        cb->classbits is updated
 */

 static void
 add_list_to_class(uint32_t options, uint32_t xoptions, compile_block *cb,
   const uint32_t *p)
 {
 while (p[0] < 256)
   {
   unsigned int n = 0;

   while(p[n+1] == p[0] + n + 1) n++;
   add_to_class(options, xoptions, cb, p[0], p[n]);

   p += n + 1;
   }
 }


 /*************************************************
 *    Add characters not in a list to a class     *
 *************************************************/

 /* This function is used for adding the complement of a list of horizontal or
 vertical whitespace to a class. The list must be in order.

 Arguments:
   options       the options bits
   xoptions      the extra options bits
   cb            contains pointers to tables etc.
   p             points to row of 32-bit values, terminated by NOTACHAR

 Returns:        cb->classbits is updated
 */

 static void
 add_not_list_to_class(uint32_t options, uint32_t xoptions, compile_block *cb,
   const uint32_t *p)
 {
 if (p[0] > 0)
   add_to_class(options, xoptions, cb, 0, p[0] - 1);
 while (p[0] < 256)
   {
   while (p[1] == p[0] + 1) p++;
   add_to_class(options, xoptions, cb, p[0] + 1, (p[1] > 255) ? 255 : p[1] - 1);
   p++;
   }
 }
 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */


 /*************************************************
 *  Main entry-point to compile a character class *
 *************************************************/

 /* This function consumes a "leaf", which is a set of characters that will
 become a single OP_CLASS OP_NCLASS, OP_XCLASS, or OP_ALLANY. */

 uint32_t *
 PRIV(compile_class_not_nested)(uint32_t options, uint32_t xoptions,
   uint32_t *start_ptr, PCRE2_UCHAR **pcode, BOOL negate_class, BOOL* has_bitmap,
   int *errorcodeptr, compile_block *cb, PCRE2_SIZE *lengthptr)
 {
 uint32_t *pptr = start_ptr;
 PCRE2_UCHAR *code = *pcode;
 BOOL should_flip_negation;
 const uint8_t *cbits = cb->cbits;
 /* Some functions such as add_to_class() or eclass processing
 expects that the bitset is stored in cb->classbits.classbits. */
 uint8_t *const classbits = cb->classbits.classbits;

 #ifdef SUPPORT_UNICODE
 BOOL utf = (options & PCRE2_UTF) != 0;
 #else  /* No Unicode support */
 BOOL utf = FALSE;
 #endif

 /* Helper variables for OP_XCLASS opcode (for characters > 255). */

 #ifdef SUPPORT_WIDE_CHARS
 uint32_t xclass_props;
 PCRE2_UCHAR *class_uchardata;
 class_ranges* cranges;
 #else
 (void)has_bitmap;    /* Avoid compiler warning. */
 (void)errorcodeptr;  /* Avoid compiler warning. */
 (void)lengthptr;     /* Avoid compiler warning. */
 #endif

 /* If an XClass contains a negative special such as \S, we need to flip the
 negation flag at the end, so that support for characters > 255 works correctly
 (they are all included in the class). An XClass may need to insert specific
 matching or non-matching code for wide characters.
 */

 should_flip_negation = FALSE;

 /* XClass will be used when characters > 255 might match. */

 #ifdef SUPPORT_WIDE_CHARS
 xclass_props = 0;

 #if PCRE2_CODE_UNIT_WIDTH == 8
 cranges = NULL;

 if (utf)
 #endif
   {
   if (lengthptr != NULL)
     {
     cranges = compile_optimize_class(pptr, options, xoptions, cb);

     if (cranges == NULL)
       {
       *errorcodeptr = ERR21;
       return NULL;
       }

     /* Caching the pre-processed character ranges. */
     if (cb->last_data != NULL)
       cb->last_data->next = &cranges->header;
     else
       cb->first_data = &cranges->header;

     cb->last_data = &cranges->header;
     }
   else
     {
     /* Reuse the pre-processed character ranges. */
     cranges = (class_ranges*)cb->first_data;
     PCRE2_ASSERT(cranges != NULL && cranges->header.type == CDATA_CRANGE);
     cb->first_data = cranges->header.next;
     }

   if (cranges->range_list_size > 0)
     {
     const uint32_t *ranges = (const uint32_t*)(cranges + 1);

     if (ranges[0] <= 255)
       xclass_props |= XCLASS_HAS_8BIT_CHARS;

     if (ranges[cranges->range_list_size - 1] == GET_MAX_CHAR_VALUE(utf) &&
         ranges[cranges->range_list_size - 2] <= 256)
       xclass_props |= XCLASS_HIGH_ANY;
     }
   }

 class_uchardata = code + LINK_SIZE + 2;   /* For XCLASS items */
 #endif /* SUPPORT_WIDE_CHARS */

 /* Initialize the 256-bit (32-byte) bit map to all zeros. We build the map
 in a temporary bit of memory, in case the class contains fewer than two
 8-bit characters because in that case the compiled code doesn't use the bit
 map. */

 memset(classbits, 0, 32);

 /* Process items until end_ptr is reached. */

 while (TRUE)
   {
   uint32_t meta = *(pptr++);
   BOOL local_negate;
   int posix_class;
   int taboffset, tabopt;
   class_bits_storage pbits;
   uint32_t escape, c;

   /* Handle POSIX classes such as [:alpha:] etc. */
   switch (META_CODE(meta))
     {
     case META_POSIX:
     case META_POSIX_NEG:

     local_negate = (meta == META_POSIX_NEG);
     posix_class = *(pptr++);

     if (local_negate) should_flip_negation = TRUE;  /* Note negative special */

     /* If matching is caseless, upper and lower are converted to alpha.
     This relies on the fact that the class table starts with alpha,
     lower, upper as the first 3 entries. */

     if ((options & PCRE2_CASELESS) != 0 && posix_class <= 2)
       posix_class = 0;

     /* When PCRE2_UCP is set, some of the POSIX classes are converted to
     different escape sequences that use Unicode properties \p or \P.
     Others that are not available via \p or \P have to generate
     XCL_PROP/XCL_NOTPROP directly, which is done here. */

 #ifdef SUPPORT_UNICODE
     /* TODO This entire block of code here appears to be unreachable!? I simply
     can't see how it can be hit, given that the frontend parser doesn't emit
     META_POSIX for GRAPH/PRINT/PUNCT when UCP is set. */
     if ((options & PCRE2_UCP) != 0 &&
         (xoptions & PCRE2_EXTRA_ASCII_POSIX) == 0)
       {
       uint32_t ptype;

       switch(posix_class)
         {
         case PC_GRAPH:
         case PC_PRINT:
         case PC_PUNCT:
         ptype = (posix_class == PC_GRAPH)? PT_PXGRAPH :
                 (posix_class == PC_PRINT)? PT_PXPRINT : PT_PXPUNCT;

         PRIV(update_classbits)(ptype, 0, local_negate, classbits);

         if ((xclass_props & XCLASS_HIGH_ANY) == 0)
           {
           if (lengthptr != NULL)
             *lengthptr += 3;
           else
             {
             *class_uchardata++ = local_negate? XCL_NOTPROP : XCL_PROP;
             *class_uchardata++ = (PCRE2_UCHAR)ptype;
             *class_uchardata++ = 0;
             }
           xclass_props |= XCLASS_REQUIRED | XCLASS_HAS_PROPS;
           }
         continue;

         /* For the other POSIX classes (ex: ascii) we are going to
         fall through to the non-UCP case and build a bit map for
         characters with code points less than 256. However, if we are in
         a negated POSIX class, characters with code points greater than
         255 must either all match or all not match, depending on whether
         the whole class is not or is negated. For example, for
         [[:^ascii:]... they must all match, whereas for [^[:^ascii:]...
         they must not.

         In the special case where there are no xclass items, this is
         automatically handled by the use of OP_CLASS or OP_NCLASS, but an
         explicit range is needed for OP_XCLASS. Setting a flag here
         causes the range to be generated later when it is known that
         OP_XCLASS is required. In the 8-bit library this is relevant only in
         utf mode, since no wide characters can exist otherwise. */

         default:
         break;
         }
       }
 #endif  /* SUPPORT_UNICODE */

     /* In the non-UCP case, or when UCP makes no difference, we build the
     bit map for the POSIX class in a chunk of local store because we may
     be adding and subtracting from it, and we don't want to subtract bits
     that may be in the main map already. At the end we or the result into
     the bit map that is being built. */

     posix_class *= 3;

     /* Copy in the first table (always present) */

     memcpy(pbits.classbits, cbits + PRIV(posix_class_maps)[posix_class], 32);

     /* If there is a second table, add or remove it as required. */

     taboffset = PRIV(posix_class_maps)[posix_class + 1];
     tabopt = PRIV(posix_class_maps)[posix_class + 2];

     if (taboffset >= 0)
       {
       if (tabopt >= 0)
         for (int i = 0; i < 32; i++)
           pbits.classbits[i] |= cbits[i + taboffset];
       else
         for (int i = 0; i < 32; i++)
           pbits.classbits[i] &= (uint8_t)(~cbits[i + taboffset]);
       }

     /* Now see if we need to remove any special characters. An option
     value of 1 removes vertical space and 2 removes underscore. */

     if (tabopt < 0) tabopt = -tabopt;
 #ifdef EBCDIC
       {
       uint8_t posix_vertical[4] = { CHAR_LF, CHAR_VT, CHAR_FF, CHAR_CR };
       uint8_t posix_underscore = CHAR_UNDERSCORE;
       uint8_t *chars = NULL;
       int n = 0;

       if (tabopt == 1) { chars = posix_vertical; n = 4; }
       else if (tabopt == 2) { chars = &posix_underscore; n = 1; }

       for (; n > 0; ++chars, --n)
         pbits.classbits[*chars/8] &= ~(1u << (*chars&7));
       }
 #else
     if (tabopt == 1) pbits.classbits[1] &= ~0x3c;
     else if (tabopt == 2) pbits.classbits[11] &= 0x7f;
 #endif

     /* Add the POSIX table or its complement into the main table that is
     being built and we are done. */

       {
       uint32_t *classwords = cb->classbits.classwords;

       if (local_negate)
         for (int i = 0; i < 8; i++)
           classwords[i] |= (uint32_t)(~pbits.classwords[i]);
       else
         for (int i = 0; i < 8; i++)
           classwords[i] |= pbits.classwords[i];
       }

 #ifdef SUPPORT_WIDE_CHARS
     /* Every class contains at least one < 256 character. */
     xclass_props |= XCLASS_HAS_8BIT_CHARS;
 #endif
     continue;               /* End of POSIX handling */

     /* Other than POSIX classes, the only items we should encounter are
     \d-type escapes and literal characters (possibly as ranges). */
     case META_BIGVALUE:
     meta = *(pptr++);
     break;

     case META_ESCAPE:
     escape = META_DATA(meta);

     switch(escape)
       {
       case ESC_d:
       for (int i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_digit];
       break;

       case ESC_D:
       should_flip_negation = TRUE;
       for (int i = 0; i < 32; i++)
         classbits[i] |= (uint8_t)(~cbits[i+cbit_digit]);
       break;

       case ESC_w:
       for (int i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_word];
       break;

       case ESC_W:
       should_flip_negation = TRUE;
       for (int i = 0; i < 32; i++)
         classbits[i] |= (uint8_t)(~cbits[i+cbit_word]);
       break;

       /* Perl 5.004 onwards omitted VT from \s, but restored it at Perl
       5.18. Before PCRE 8.34, we had to preserve the VT bit if it was
       previously set by something earlier in the character class.
       Luckily, the value of CHAR_VT is 0x0b in both ASCII and EBCDIC, so
       we could just adjust the appropriate bit. From PCRE 8.34 we no
       longer treat \s and \S specially. */

       case ESC_s:
       for (int i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_space];
       break;

       case ESC_S:
       should_flip_negation = TRUE;
       for (int i = 0; i < 32; i++)
         classbits[i] |= (uint8_t)(~cbits[i+cbit_space]);
       break;

       /* When adding the horizontal or vertical space lists to a class, or
       their complements, disable PCRE2_CASELESS, because it justs wastes
       time, and in the "not-x" UTF cases can create unwanted duplicates in
       the XCLASS list (provoked by characters that have more than one other
       case and by both cases being in the same "not-x" sublist). */

       case ESC_h:
 #if PCRE2_CODE_UNIT_WIDTH == 8
 #ifdef SUPPORT_UNICODE
       if (cranges != NULL) break;
 #endif
       add_list_to_class(options & ~PCRE2_CASELESS, xoptions,
         cb, PRIV(hspace_list));
 #else
       PCRE2_ASSERT(cranges != NULL);
 #endif
       break;

       case ESC_H:
 #if PCRE2_CODE_UNIT_WIDTH == 8
 #ifdef SUPPORT_UNICODE
       if (cranges != NULL) break;
 #endif
       add_not_list_to_class(options & ~PCRE2_CASELESS, xoptions,
         cb, PRIV(hspace_list));
 #else
       PCRE2_ASSERT(cranges != NULL);
 #endif
       break;

       case ESC_v:
 #if PCRE2_CODE_UNIT_WIDTH == 8
 #ifdef SUPPORT_UNICODE
       if (cranges != NULL) break;
 #endif
       add_list_to_class(options & ~PCRE2_CASELESS, xoptions,
         cb, PRIV(vspace_list));
 #else
       PCRE2_ASSERT(cranges != NULL);
 #endif
       break;

       case ESC_V:
 #if PCRE2_CODE_UNIT_WIDTH == 8
 #ifdef SUPPORT_UNICODE
       if (cranges != NULL) break;
 #endif
       add_not_list_to_class(options & ~PCRE2_CASELESS, xoptions,
         cb, PRIV(vspace_list));
 #else
       PCRE2_ASSERT(cranges != NULL);
 #endif
       break;

       /* If Unicode is not supported, \P and \p are not allowed and are
       faulted at parse time, so will never appear here. */

 #ifdef SUPPORT_UNICODE
       case ESC_p:
       case ESC_P:
         {
         uint32_t ptype = *pptr >> 16;
         uint32_t pdata = *(pptr++) & 0xffff;

         /* The "Any" is processed by PRIV(update_classbits)(). */
         if (ptype == PT_ANY)
           {
 #if PCRE2_CODE_UNIT_WIDTH == 8
           if (!utf && escape == ESC_p) memset(classbits, 0xff, 32);
 #endif
           continue;
           }

         PRIV(update_classbits)(ptype, pdata, (escape == ESC_P), classbits);

         if ((xclass_props & XCLASS_HIGH_ANY) == 0)
           {
           if (lengthptr != NULL)
             *lengthptr += 3;
           else
             {
             *class_uchardata++ = (escape == ESC_p)? XCL_PROP : XCL_NOTPROP;
             *class_uchardata++ = ptype;
             *class_uchardata++ = pdata;
             }
           xclass_props |= XCLASS_REQUIRED | XCLASS_HAS_PROPS;
           }
         }
       continue;
 #endif
       }

 #ifdef SUPPORT_WIDE_CHARS
     /* Every non-property class contains at least one < 256 character. */
     xclass_props |= XCLASS_HAS_8BIT_CHARS;
 #endif
     /* End handling \d-type escapes */
     continue;

     CLASS_END_CASES(meta)
     /* Literals. */
     if (meta < META_END) break;
     /* Non-literals: end of class contents. */
     goto END_PROCESSING;
     }

   /* A literal character may be followed by a range meta. At parse time
   there are checks for out-of-order characters, for ranges where the two
   characters are equal, and for hyphens that cannot indicate a range. At
   this point, therefore, no checking is needed. */

   c = meta;

   /* Remember if \r or \n were explicitly used */

   if (c == CHAR_CR || c == CHAR_NL) cb->external_flags |= PCRE2_HASCRORLF;

   /* Process a character range */

   if (*pptr == META_RANGE_LITERAL || *pptr == META_RANGE_ESCAPED)
     {
     uint32_t d;

 #ifdef EBCDIC
     BOOL range_is_literal = (*pptr == META_RANGE_LITERAL);
 #endif
     ++pptr;
     d = *(pptr++);
     if (d == META_BIGVALUE) d = *(pptr++);

     /* Remember an explicit \r or \n, and add the range to the class. */

     if (d == CHAR_CR || d == CHAR_NL) cb->external_flags |= PCRE2_HASCRORLF;

 #if PCRE2_CODE_UNIT_WIDTH == 8
 #ifdef SUPPORT_UNICODE
     if (cranges != NULL) continue;
     xclass_props |= XCLASS_HAS_8BIT_CHARS;
 #endif

     /* In an EBCDIC environment, Perl treats alphabetic ranges specially
     because there are holes in the encoding, and simply using the range
     A-Z (for example) would include the characters in the holes. This
     applies only to literal ranges; [\xC1-\xE9] is different to [A-Z]. */

 #ifdef EBCDIC
     if (range_is_literal &&
          (cb->ctypes[c] & ctype_letter) != 0 &&
          (cb->ctypes[d] & ctype_letter) != 0 &&
          (c <= CHAR_z) == (d <= CHAR_z))
       {
       uint32_t uc = (d <= CHAR_z)? 0 : 64;
       uint32_t C = c - uc;
       uint32_t D = d - uc;

       if (C <= CHAR_i)
         {
         add_to_class(options, xoptions, cb, C + uc,
           ((D < CHAR_i)? D : CHAR_i) + uc);
         C = CHAR_j;
         }

       if (C <= D && C <= CHAR_r)
         {
         add_to_class(options, xoptions, cb, C + uc,
           ((D < CHAR_r)? D : CHAR_r) + uc);
         C = CHAR_s;
         }

       if (C <= D)
         add_to_class(options, xoptions, cb, C + uc, D + uc);
       }
     else
 #endif
     /* Not an EBCDIC special range */

     add_to_class(options, xoptions, cb, c, d);
 #else
     PCRE2_ASSERT(cranges != NULL);
 #endif
     continue;
     }  /* End of range handling */

   /* Character ranges are ignored when class_ranges is present. */
 #if PCRE2_CODE_UNIT_WIDTH == 8
 #ifdef SUPPORT_UNICODE
   if (cranges != NULL) continue;
   xclass_props |= XCLASS_HAS_8BIT_CHARS;
 #endif
   /* Handle a single character. */

   add_to_class(options, xoptions, cb, meta, meta);
 #else
   PCRE2_ASSERT(cranges != NULL);
 #endif
   }   /* End of main class-processing loop */

 END_PROCESSING:

 #ifdef SUPPORT_WIDE_CHARS
 PCRE2_ASSERT((xclass_props & XCLASS_HAS_PROPS) == 0 ||
              (xclass_props & XCLASS_HIGH_ANY) == 0);

 if (cranges != NULL)
   {
   uint32_t *range = (uint32_t*)(cranges + 1);
   uint32_t *end = range + cranges->range_list_size;

   while (range < end && range[0] < 256)
     {
     PCRE2_ASSERT((xclass_props & XCLASS_HAS_8BIT_CHARS) != 0);
     /* Add range to bitset. If we are in UTF or UCP mode, then clear the
     caseless bit, because the cranges handle caselessness (only) in this
     condition; see the condition for PARSE_CLASS_CASELESS_UTF in
     compile_optimize_class(). */
     add_to_class(((options & (PCRE2_UTF|PCRE2_UCP)) != 0)?
         (options & ~PCRE2_CASELESS) : options, xoptions, cb, range[0], range[1]);

     if (range[1] > 255) break;
     range += 2;
     }

   if (cranges->char_lists_size > 0)
     {
     /* The cranges structure is still used and freed later. */
     PCRE2_ASSERT((xclass_props & XCLASS_HIGH_ANY) == 0);
     xclass_props |= XCLASS_REQUIRED | XCLASS_HAS_CHAR_LISTS;
     }
   else
     {
     if ((xclass_props & XCLASS_HIGH_ANY) != 0)
       {
       PCRE2_ASSERT(range + 2 == end && range[0] <= 256 &&
         range[1] >= GET_MAX_CHAR_VALUE(utf));
       should_flip_negation = TRUE;
       range = end;
       }

     while (range < end)
       {
       uint32_t range_start = range[0];
       uint32_t range_end = range[1];

       range += 2;
       xclass_props |= XCLASS_REQUIRED;

       if (range_start < 256) range_start = 256;

       if (lengthptr != NULL)
         {
 #ifdef SUPPORT_UNICODE
         if (utf)
           {
           *lengthptr += 1;

           if (range_start < range_end)
             *lengthptr += PRIV(ord2utf)(range_start, class_uchardata);

           *lengthptr += PRIV(ord2utf)(range_end, class_uchardata);
           continue;
           }
 #endif  /* SUPPORT_UNICODE */

         *lengthptr += range_start < range_end ? 3 : 2;
         continue;
         }

 #ifdef SUPPORT_UNICODE
       if (utf)
         {
         if (range_start < range_end)
           {
           *class_uchardata++ = XCL_RANGE;
           class_uchardata += PRIV(ord2utf)(range_start, class_uchardata);
           }
         else
           *class_uchardata++ = XCL_SINGLE;

         class_uchardata += PRIV(ord2utf)(range_end, class_uchardata);
         continue;
         }
 #endif  /* SUPPORT_UNICODE */

       /* Without UTF support, character values are constrained
       by the bit length, and can only be > 256 for 16-bit and
       32-bit libraries. */
 #if PCRE2_CODE_UNIT_WIDTH != 8
       if (range_start < range_end)
         {
         *class_uchardata++ = XCL_RANGE;
         *class_uchardata++ = range_start;
         }
       else
         *class_uchardata++ = XCL_SINGLE;

       *class_uchardata++ = range_end;
 #endif  /* PCRE2_CODE_UNIT_WIDTH == 8 */
       }

     if (lengthptr == NULL)
       cb->cx->memctl.free(cranges, cb->cx->memctl.memory_data);
     }
   }
 #endif /* SUPPORT_WIDE_CHARS */

 /* If there are characters with values > 255, or Unicode property settings
 (\p or \P), we have to compile an extended class, with its own opcode,
 unless there were no property settings and there was a negated special such
 as \S in the class, and PCRE2_UCP is not set, because in that case all
 characters > 255 are in or not in the class, so any that were explicitly
 given as well can be ignored.

 In the UCP case, if certain negated POSIX classes (ex: [:^ascii:]) were
 were present in a class, we either have to match or not match all wide
 characters (depending on whether the whole class is or is not negated).
 This requirement is indicated by match_all_or_no_wide_chars being true.
 We do this by including an explicit range, which works in both cases.
 This applies only in UTF and 16-bit and 32-bit non-UTF modes, since there
 cannot be any wide characters in 8-bit non-UTF mode.

 When there *are* properties in a positive UTF-8 or any 16-bit or 32_bit
 class where \S etc is present without PCRE2_UCP, causing an extended class
 to be compiled, we make sure that all characters > 255 are included by
 forcing match_all_or_no_wide_chars to be true.

 If, when generating an xclass, there are no characters < 256, we can omit
 the bitmap in the actual compiled code. */

 #ifdef SUPPORT_WIDE_CHARS  /* Defined for 16/32 bits, or 8-bit with Unicode */
 if ((xclass_props & XCLASS_REQUIRED) != 0)
   {
   PCRE2_UCHAR *previous = code;

   if ((xclass_props & XCLASS_HAS_CHAR_LISTS) == 0)
     *class_uchardata++ = XCL_END;    /* Marks the end of extra data */
   *code++ = OP_XCLASS;
   code += LINK_SIZE;
   *code = negate_class? XCL_NOT:0;
   if ((xclass_props & XCLASS_HAS_PROPS) != 0) *code |= XCL_HASPROP;

   /* If the map is required, move up the extra data to make room for it;
   otherwise just move the code pointer to the end of the extra data. */

   if ((xclass_props & XCLASS_HAS_8BIT_CHARS) != 0 || has_bitmap != NULL)
     {
     if (negate_class)
       {
       uint32_t *classwords = cb->classbits.classwords;
       for (int i = 0; i < 8; i++) classwords[i] = ~classwords[i];
       }

     if (has_bitmap == NULL)
       {
       *code++ |= XCL_MAP;
       (void)memmove(code + (32 / sizeof(PCRE2_UCHAR)), code,
         CU2BYTES(class_uchardata - code));
       memcpy(code, classbits, 32);
       code = class_uchardata + (32 / sizeof(PCRE2_UCHAR));
       }
     else
       {
       code = class_uchardata;
       if ((xclass_props & XCLASS_HAS_8BIT_CHARS) != 0)
         *has_bitmap = TRUE;
       }
     }
   else code = class_uchardata;

   if ((xclass_props & XCLASS_HAS_CHAR_LISTS) != 0)
     {
     /* Char lists size is an even number, because all items are 16 or 32
     bit values. The character list data is always aligned to 32 bits. */
     size_t char_lists_size = cranges->char_lists_size;
     PCRE2_ASSERT((char_lists_size & 0x1) == 0 &&
                  (cb->char_lists_size & 0x3) == 0);

     if (lengthptr != NULL)
       {
       char_lists_size = CLIST_ALIGN_TO(char_lists_size, sizeof(uint32_t));

 #if PCRE2_CODE_UNIT_WIDTH == 8
       *lengthptr += 2 + LINK_SIZE;
 #else
       *lengthptr += 1 + LINK_SIZE;
 #endif

       cb->char_lists_size += char_lists_size;

       char_lists_size /= sizeof(PCRE2_UCHAR);

       /* Storage space for character lists is included
       in the maximum pattern size. */
       if (*lengthptr > MAX_PATTERN_SIZE ||
           MAX_PATTERN_SIZE - *lengthptr < char_lists_size)
         {
         *errorcodeptr = ERR20;   /* Pattern is too large */
         return NULL;
         }
       }
     else
       {
       uint8_t *data;

       PCRE2_ASSERT(cranges->char_lists_types <= XCL_TYPE_MASK);
 #if PCRE2_CODE_UNIT_WIDTH == 8
       /* Encode as high / low bytes. */
       code[0] = (uint8_t)(XCL_LIST |
         (cranges->char_lists_types >> 8));
       code[1] = (uint8_t)cranges->char_lists_types;
       code += 2;
 #else
       *code++ = (PCRE2_UCHAR)(XCL_LIST | cranges->char_lists_types);
 #endif

       /* Character lists are stored in backwards direction from
       byte code start. The non-dfa/dfa matchers can access these
       lists using the byte code start stored in match blocks.
       Each list is aligned to 32 bit with an optional unused
       16 bit value at the beginning of the character list. */

       cb->char_lists_size += char_lists_size;
       data = (uint8_t*)cb->start_code - cb->char_lists_size;

       memcpy(data, (uint8_t*)(cranges + 1) + cranges->char_lists_start,
         char_lists_size);

       /* Since character lists total size is less than MAX_PATTERN_SIZE,
       their starting offset fits into a value which size is LINK_SIZE. */

       char_lists_size = cb->char_lists_size;
       PUT(code, 0, (uint32_t)(char_lists_size >> 1));
       code += LINK_SIZE;

 #if defined PCRE2_DEBUG || defined SUPPORT_VALGRIND
       if ((char_lists_size & 0x2) != 0)
         {
         /* In debug the unused 16 bit value is set
         to a fixed value and marked unused. */
         ((uint16_t*)data)[-1] = 0x5555;
 #ifdef SUPPORT_VALGRIND
         VALGRIND_MAKE_MEM_NOACCESS(data - 2, 2);
 #endif
         }
 #endif

       cb->char_lists_size =
         CLIST_ALIGN_TO(char_lists_size, sizeof(uint32_t));

       cb->cx->memctl.free(cranges, cb->cx->memctl.memory_data);
       }
     }

   /* Now fill in the complete length of the item */

   PUT(previous, 1, (int)(code - previous));
   goto DONE;   /* End of class handling */
   }
 #endif  /* SUPPORT_WIDE_CHARS */

 /* If there are no characters > 255, or they are all to be included or
 excluded, set the opcode to OP_CLASS or OP_NCLASS, depending on whether the
 whole class was negated and whether there were negative specials such as \S
 (non-UCP) in the class. Then copy the 32-byte map into the code vector,
 negating it if necessary. */

 if (negate_class)
   {
   uint32_t *classwords = cb->classbits.classwords;

   for (int i = 0; i < 8; i++) classwords[i] = ~classwords[i];
   }

 if ((SELECT_VALUE8(!utf, 0) || negate_class != should_flip_negation) &&
     cb->classbits.classwords[0] == ~(uint32_t)0)
   {
   const uint32_t *classwords = cb->classbits.classwords;
   int i;

   for (i = 0; i < 8; i++)
     if (classwords[i] != ~(uint32_t)0) break;

   if (i == 8)
     {
     *code++ = OP_ALLANY;
     goto DONE;   /* End of class handling */
     }
   }

 *code++ = (negate_class == should_flip_negation) ? OP_CLASS : OP_NCLASS;
 memcpy(code, classbits, 32);
 code += 32 / sizeof(PCRE2_UCHAR);

 DONE:
 *pcode = code;
 return pptr - 1;
 }


 /* ===================================================================*/
 /* Here follows a block of ECLASS-compiling functions. You may well want to
 read them from top to bottom; they are ordered from leafmost (at the top) to
 outermost parser (at the bottom of the file). */

 /* This function folds one operand using the negation operator.
 The new, combined chunk of stack code is written out to *pop_info. */

 static void
 fold_negation(eclass_op_info *pop_info, PCRE2_SIZE *lengthptr,
   BOOL preserve_classbits)
 {
 /* If the chunk of stack code is already composed of multiple ops, we won't
 descend in and try and propagate the negation down the tree. (That would lead
 to O(n^2) compile-time, which could be exploitable with a malicious regex -
 although maybe that's not really too much of a worry in a library that offers
 an exponential-time matching function!) */

 if (pop_info->op_single_type == 0)
   {
   if (lengthptr != NULL)
     *lengthptr += 1;
   else
     pop_info->code_start[pop_info->length] = ECL_NOT;
   pop_info->length += 1;
   }

 /* Otherwise, it's a nice single-op item, so we can easily fold in the negation
 without needing to produce an ECL_NOT. */

 else if (pop_info->op_single_type == ECL_ANY ||
          pop_info->op_single_type == ECL_NONE)
   {
   pop_info->op_single_type = (pop_info->op_single_type == ECL_NONE)?
       ECL_ANY : ECL_NONE;
   if (lengthptr == NULL)
     *(pop_info->code_start) = pop_info->op_single_type;
   }
 else
   {
   PCRE2_ASSERT(pop_info->op_single_type == ECL_XCLASS &&
                pop_info->length >= 1 + LINK_SIZE + 1);
   if (lengthptr == NULL)
     pop_info->code_start[1 + LINK_SIZE] ^= XCL_NOT;
   }

 if (!preserve_classbits)
   {
   for (int i = 0; i < 8; i++)
     pop_info->bits.classwords[i] = ~pop_info->bits.classwords[i];
   }
 }


 /* This function folds together two operands using a binary operator.
 The new, combined chunk of stack code is written out to *lhs_op_info. */

 static void
 fold_binary(int op, eclass_op_info *lhs_op_info, eclass_op_info *rhs_op_info,
   PCRE2_SIZE *lengthptr)
 {
 switch (op)
   {
   /* ECL_AND truth table:

      LHS  RHS  RESULT
      ----------------
      ANY  *    RHS
      *    ANY  LHS
      NONE *    NONE
      *    NONE NONE
      X    Y    X & Y
   */

   case ECL_AND:
   if (rhs_op_info->op_single_type == ECL_ANY)
     {
     /* no-op: drop the RHS */
     }
   else if (lhs_op_info->op_single_type == ECL_ANY)
     {
     /* no-op: drop the LHS, and memmove the RHS into its place */
     if (lengthptr == NULL)
       memmove(lhs_op_info->code_start, rhs_op_info->code_start,
               CU2BYTES(rhs_op_info->length));
     lhs_op_info->length = rhs_op_info->length;
     lhs_op_info->op_single_type = rhs_op_info->op_single_type;
     }
   else if (rhs_op_info->op_single_type == ECL_NONE)
     {
     /* the result is ECL_NONE: write into the LHS */
     if (lengthptr == NULL)
       lhs_op_info->code_start[0] = ECL_NONE;
     lhs_op_info->length = 1;
     lhs_op_info->op_single_type = ECL_NONE;
     }
   else if (lhs_op_info->op_single_type == ECL_NONE)
     {
     /* the result is ECL_NONE: drop the RHS */
     }
   else
     {
     /* Both of LHS & RHS are either ECL_XCLASS, or compound operations. */
     if (lengthptr != NULL)
       *lengthptr += 1;
     else
       {
       PCRE2_ASSERT(rhs_op_info->code_start ==
           lhs_op_info->code_start + lhs_op_info->length);
       rhs_op_info->code_start[rhs_op_info->length] = ECL_AND;
       }
     lhs_op_info->length += rhs_op_info->length + 1;
     lhs_op_info->op_single_type = 0;
     }

   for (int i = 0; i < 8; i++)
     lhs_op_info->bits.classwords[i] &= rhs_op_info->bits.classwords[i];
   break;

   /* ECL_OR truth table:

      LHS  RHS  RESULT
      ----------------
      ANY  *    ANY
      *    ANY  ANY
      NONE *    RHS
      *    NONE LHS
      X    Y    X | Y
   */

   case ECL_OR:
   if (rhs_op_info->op_single_type == ECL_NONE)
     {
     /* no-op: drop the RHS */
     }
   else if (lhs_op_info->op_single_type == ECL_NONE)
     {
     /* no-op: drop the LHS, and memmove the RHS into its place */
     if (lengthptr == NULL)
       memmove(lhs_op_info->code_start, rhs_op_info->code_start,
               CU2BYTES(rhs_op_info->length));
     lhs_op_info->length = rhs_op_info->length;
     lhs_op_info->op_single_type = rhs_op_info->op_single_type;
     }
   else if (rhs_op_info->op_single_type == ECL_ANY)
     {
     /* the result is ECL_ANY: write into the LHS */
     if (lengthptr == NULL)
       lhs_op_info->code_start[0] = ECL_ANY;
     lhs_op_info->length = 1;
     lhs_op_info->op_single_type = ECL_ANY;
     }
   else if (lhs_op_info->op_single_type == ECL_ANY)
     {
     /* the result is ECL_ANY: drop the RHS */
     }
   else
     {
     /* Both of LHS & RHS are either ECL_XCLASS, or compound operations. */
     if (lengthptr != NULL)
       *lengthptr += 1;
     else
       {
       PCRE2_ASSERT(rhs_op_info->code_start ==
           lhs_op_info->code_start + lhs_op_info->length);
       rhs_op_info->code_start[rhs_op_info->length] = ECL_OR;
       }
     lhs_op_info->length += rhs_op_info->length + 1;
     lhs_op_info->op_single_type = 0;
     }

   for (int i = 0; i < 8; i++)
     lhs_op_info->bits.classwords[i] |= rhs_op_info->bits.classwords[i];
   break;

   /* ECL_XOR truth table:

      LHS  RHS  RESULT
      ----------------
      ANY  *    !RHS
      *    ANY  !LHS
      NONE *    RHS
      *    NONE LHS
      X    Y    X ^ Y
   */

   case ECL_XOR:
   if (rhs_op_info->op_single_type == ECL_NONE)
     {
     /* no-op: drop the RHS */
     }
   else if (lhs_op_info->op_single_type == ECL_NONE)
     {
     /* no-op: drop the LHS, and memmove the RHS into its place */
     if (lengthptr == NULL)
       memmove(lhs_op_info->code_start, rhs_op_info->code_start,
               CU2BYTES(rhs_op_info->length));
     lhs_op_info->length = rhs_op_info->length;
     lhs_op_info->op_single_type = rhs_op_info->op_single_type;
     }
   else if (rhs_op_info->op_single_type == ECL_ANY)
     {
     /* the result is !LHS: fold in the negation, and drop the RHS */
     /* Preserve the classbits, because we promise to deal with them later. */
     fold_negation(lhs_op_info, lengthptr, TRUE);
     }
   else if (lhs_op_info->op_single_type == ECL_ANY)
     {
     /* the result is !RHS: drop the LHS, memmove the RHS into its place, and
     fold in the negation */
     if (lengthptr == NULL)
       memmove(lhs_op_info->code_start, rhs_op_info->code_start,
               CU2BYTES(rhs_op_info->length));
     lhs_op_info->length = rhs_op_info->length;
     lhs_op_info->op_single_type = rhs_op_info->op_single_type;

     /* Preserve the classbits, because we promise to deal with them later. */
     fold_negation(lhs_op_info, lengthptr, TRUE);
     }
   else
     {
     /* Both of LHS & RHS are either ECL_XCLASS, or compound operations. */
     if (lengthptr != NULL)
       *lengthptr += 1;
     else
       {
       PCRE2_ASSERT(rhs_op_info->code_start ==
           lhs_op_info->code_start + lhs_op_info->length);
       rhs_op_info->code_start[rhs_op_info->length] = ECL_XOR;
       }
     lhs_op_info->length += rhs_op_info->length + 1;
     lhs_op_info->op_single_type = 0;
     }

   for (int i = 0; i < 8; i++)
     lhs_op_info->bits.classwords[i] ^= rhs_op_info->bits.classwords[i];
   break;

   /* LCOV_EXCL_START */
   default:
   PCRE2_DEBUG_UNREACHABLE();
   break;
   /* LCOV_EXCL_STOP */
   }
 }


 static BOOL
 compile_eclass_nested(eclass_context *context, BOOL negated,
   uint32_t **pptr, PCRE2_UCHAR **pcode,
   eclass_op_info *pop_info, PCRE2_SIZE *lengthptr);

 /* This function consumes a group of implicitly-unioned class elements.
 These can be characters, ranges, properties, or nested classes, as long
 as they are all joined by being placed adjacently. */

 static BOOL
 compile_class_operand(eclass_context *context, BOOL negated,
   uint32_t **pptr, PCRE2_UCHAR **pcode, eclass_op_info *pop_info,
   PCRE2_SIZE *lengthptr)
 {
 uint32_t *ptr = *pptr;
 uint32_t *prev_ptr;
 PCRE2_UCHAR *code = *pcode;
 PCRE2_UCHAR *code_start = code;
 PCRE2_SIZE prev_length = (lengthptr != NULL)? *lengthptr : 0;
 PCRE2_SIZE extra_length;
 uint32_t meta = META_CODE(*ptr);

 switch (meta)
   {
   case META_CLASS_EMPTY_NOT:
   case META_CLASS_EMPTY:
   ++ptr;
   pop_info->length = 1;
   if ((meta == META_CLASS_EMPTY) == negated)
     {
     *code++ = pop_info->op_single_type = ECL_ANY;
     memset(pop_info->bits.classbits, 0xff, 32);
     }
   else
     {
     *code++ = pop_info->op_single_type = ECL_NONE;
     memset(pop_info->bits.classbits, 0, 32);
     }
   break;

   case META_CLASS:
   case META_CLASS_NOT:
   if ((*ptr & CLASS_IS_ECLASS) != 0)
     {
     if (!compile_eclass_nested(context, negated, &ptr, &code,
                                pop_info, lengthptr))
       return FALSE;

     PCRE2_ASSERT(*ptr == META_CLASS_END);
     ptr++;
     goto DONE;
     }

   ptr++;
   PCRE2_FALLTHROUGH /* Fall through */

   default:
   /* Scan forward characters, ranges, and properties.
   For example: inside [a-z_ -- m] we don't have brackets around "a-z_" but
   we still need to collect that fragment up into a "leaf" OP_CLASS. */

   prev_ptr = ptr;
   ptr = PRIV(compile_class_not_nested)(
     context->options, context->xoptions, ptr, &code,
     (meta != META_CLASS_NOT) == negated, &context->needs_bitmap,
     context->errorcodeptr, context->cb, lengthptr);
   if (ptr == NULL) return FALSE;

   /* We must have a 100% guarantee that ptr increases when
   compile_class_operand() returns, even on Release builds, so that we can
   statically prove our loops terminate. */
   /* LCOV_EXCL_START */
   if (ptr <= prev_ptr)
     {
     PCRE2_DEBUG_UNREACHABLE();
     return FALSE;
     }
   /* LCOV_EXCL_STOP */

   /* If we fell through above, consume the closing ']'. */
   if (meta == META_CLASS || meta == META_CLASS_NOT)
     {
     PCRE2_ASSERT(*ptr == META_CLASS_END);
     ptr++;
     }

   /* Regardless of whether (lengthptr == NULL), some data will still be written
   out to *pcode, which we need: we have to peek at it, to transform the opcode
   into the ECLASS version (since we need to hoist up the bitmaps). */
   PCRE2_ASSERT(code > code_start);
   extra_length = (lengthptr != NULL)? *lengthptr - prev_length : 0;

   /* Easiest case: convert OP_ALLANY to ECL_ANY */

   if (*code_start == OP_ALLANY)
     {
     PCRE2_ASSERT(code - code_start == 1 && extra_length == 0);
     pop_info->length = 1;
     *code_start = pop_info->op_single_type = ECL_ANY;
     memset(pop_info->bits.classbits, 0xff, 32);
     }

   /* For OP_CLASS and OP_NCLASS, we hoist out the bitmap and convert to
   ECL_NONE / ECL_ANY respectively. */

   else if (*code_start == OP_CLASS || *code_start == OP_NCLASS)
     {
     PCRE2_ASSERT(code - code_start == 1 + 32 / sizeof(PCRE2_UCHAR) &&
                  extra_length == 0);
     pop_info->length = 1;
     *code_start = pop_info->op_single_type =
         (*code_start == OP_CLASS)? ECL_NONE : ECL_ANY;
     memcpy(pop_info->bits.classbits, code_start + 1, 32);
     /* Rewind the code pointer, but make sure we adjust *lengthptr, because we
     do need to reserve that space (even though we only use it temporarily). */
     if (lengthptr != NULL)
       *lengthptr += code - (code_start + 1);
     code = code_start + 1;

     if (!context->needs_bitmap && *code_start == ECL_NONE)
       {
       uint32_t *classwords = pop_info->bits.classwords;

       for (int i = 0; i < 8; i++)
         if (classwords[i] != 0)
           {
           context->needs_bitmap = TRUE;
           break;
           }
       }
     else
       context->needs_bitmap = TRUE;
     }

   /* Finally, for OP_XCLASS we hoist out the bitmap (if any), and convert to
   ECL_XCLASS. */

   else
     {
     PCRE2_ASSERT(*code_start == OP_XCLASS);
     *code_start = pop_info->op_single_type = ECL_XCLASS;

     PCRE2_ASSERT(code - code_start >= 1 + LINK_SIZE + 1);

     memcpy(pop_info->bits.classbits, context->cb->classbits.classbits, 32);
     pop_info->length = (code - code_start) + extra_length;
     }

   break;
   }  /* End of switch(meta) */

 pop_info->code_start = (lengthptr == NULL)? code_start : NULL;

 if (lengthptr != NULL)
   {
   *lengthptr += code - code_start;
   code = code_start;
   }

 DONE:
 PCRE2_ASSERT(lengthptr == NULL || (code == code_start));

 *pptr = ptr;
 *pcode = code;
 return TRUE;
 }


 /* This function consumes a group of implicitly-unioned class elements.
 These can be characters, ranges, properties, or nested classes, as long
 as they are all joined by being placed adjacently. */

 static BOOL
 compile_class_juxtaposition(eclass_context *context, BOOL negated,
   uint32_t **pptr, PCRE2_UCHAR **pcode, eclass_op_info *pop_info,
   PCRE2_SIZE *lengthptr)
 {
 uint32_t *ptr = *pptr;
 PCRE2_UCHAR *code = *pcode;
 #ifdef PCRE2_DEBUG
 PCRE2_UCHAR *start_code = *pcode;
 #endif

 /* See compile_class_binary_loose() for comments on compile-time folding of
 the "negated" flag. */

 /* Because it's a non-empty class, there must be an operand at the start. */
 if (!compile_class_operand(context, negated, &ptr, &code, pop_info, lengthptr))
   return FALSE;

 while (*ptr != META_CLASS_END &&
        !(*ptr >= META_ECLASS_AND && *ptr <= META_ECLASS_NOT))
   {
   uint32_t op;
   BOOL rhs_negated;
   eclass_op_info rhs_op_info;

   if (negated)
     {
     /* !(A juxtapose B)  ->  !A && !B */
     op = ECL_AND;
     rhs_negated = TRUE;
     }
   else
     {
     /* A juxtapose B  ->  A || B */
     op = ECL_OR;
     rhs_negated = FALSE;
     }

   /* An operand must follow the operator. */
   if (!compile_class_operand(context, rhs_negated, &ptr, &code,
                              &rhs_op_info, lengthptr))
     return FALSE;

   /* Convert infix to postfix (RPN). */
   fold_binary(op, pop_info, &rhs_op_info, lengthptr);
   if (lengthptr == NULL)
     code = pop_info->code_start + pop_info->length;
   }

 PCRE2_ASSERT(lengthptr == NULL || code == start_code);

 *pptr = ptr;
 *pcode = code;
 return TRUE;
 }


 /* This function consumes unary prefix operators. */

 static BOOL
 compile_class_unary(eclass_context *context, BOOL negated,
   uint32_t **pptr, PCRE2_UCHAR **pcode, eclass_op_info *pop_info,
   PCRE2_SIZE *lengthptr)
 {
 uint32_t *ptr = *pptr;
 #ifdef PCRE2_DEBUG
 PCRE2_UCHAR *start_code = *pcode;
 #endif

 while (*ptr == META_ECLASS_NOT)
   {
   ++ptr;
   negated = !negated;
   }

 *pptr = ptr;
 /* Because it's a non-empty class, there must be an operand. */
 if (!compile_class_juxtaposition(context, negated, pptr, pcode,
                                  pop_info, lengthptr))
   return FALSE;

 PCRE2_ASSERT(lengthptr == NULL || *pcode == start_code);
 return TRUE;
 }


 /* This function consumes tightly-binding binary operators. */

 static BOOL
 compile_class_binary_tight(eclass_context *context, BOOL negated,
   uint32_t **pptr, PCRE2_UCHAR **pcode, eclass_op_info *pop_info,
   PCRE2_SIZE *lengthptr)
 {
 uint32_t *ptr = *pptr;
 PCRE2_UCHAR *code = *pcode;
 #ifdef PCRE2_DEBUG
 PCRE2_UCHAR *start_code = *pcode;
 #endif

 /* See compile_class_binary_loose() for comments on compile-time folding of
 the "negated" flag. */

 /* Because it's a non-empty class, there must be an operand at the start. */
 if (!compile_class_unary(context, negated, &ptr, &code, pop_info, lengthptr))
   return FALSE;

 while (*ptr == META_ECLASS_AND)
   {
   uint32_t op;
   BOOL rhs_negated;
   eclass_op_info rhs_op_info;

   if (negated)
     {
     /* !(A && B)  ->  !A || !B */
     op = ECL_OR;
     rhs_negated = TRUE;
     }
   else
     {
     /* A && B  ->  A && B */
     op = ECL_AND;
     rhs_negated = FALSE;
     }

   ++ptr;

   /* An operand must follow the operator. */
   if (!compile_class_unary(context, rhs_negated, &ptr, &code,
                            &rhs_op_info, lengthptr))
     return FALSE;

   /* Convert infix to postfix (RPN). */
   fold_binary(op, pop_info, &rhs_op_info, lengthptr);
   if (lengthptr == NULL)
     code = pop_info->code_start + pop_info->length;
   }

 PCRE2_ASSERT(lengthptr == NULL || code == start_code);

 *pptr = ptr;
 *pcode = code;
 return TRUE;
 }


 /* This function consumes loosely-binding binary operators. */

 static BOOL
 compile_class_binary_loose(eclass_context *context, BOOL negated,
   uint32_t **pptr, PCRE2_UCHAR **pcode, eclass_op_info *pop_info,
   PCRE2_SIZE *lengthptr)
 {
 uint32_t *ptr = *pptr;
 PCRE2_UCHAR *code = *pcode;
 #ifdef PCRE2_DEBUG
 PCRE2_UCHAR *start_code = *pcode;
 #endif

 /* We really want to fold the negation operator, if at all possible, so that
 simple cases can be reduced down. In particular, in 8-bit no-UTF mode, we want
 to produce a fully-folded expression, so that we can guarantee not to emit any
 OP_ECLASS codes (in the same way that we never emit OP_XCLASS in this mode).

 This has the consequence that with a little ingenuity, we can in fact avoid
 emitting (nearly...) all cases of the "NOT" operator. Imagine that we have:
     !(A ...
 We have parsed the preceding "!", and we are about to parse the "A" operand. We
 don't know yet whether there will even be a following binary operand! Both of
 these are possibilities for what follows:
     !(A && B)
     !(A)
 However, we can still fold the "!" into the "A" operand, because no matter what
 the following binary operator will be, we can produce an expression which is
 equivalent. */

 /* Because it's a non-empty class, there must be an operand at the start. */
 if (!compile_class_binary_tight(context, negated, &ptr, &code,
                                 pop_info, lengthptr))
   return FALSE;

 while (*ptr >= META_ECLASS_OR && *ptr <= META_ECLASS_XOR)
   {
   uint32_t op;
   BOOL op_neg;
   BOOL rhs_negated;
   eclass_op_info rhs_op_info;

   if (negated)
     {
     /* The whole expression is being negated; we respond by unconditionally
     negating the LHS A, before seeing what follows. And hooray! We can recover,
     no matter what follows. */
     /* !(A || B)   ->  !A && !B                     */
     /* !(A -- B)   ->  !(A && !B)    ->  !A || B    */
     /* !(A XOR B)  ->  !(!A XOR !B)  ->  !A XNOR !B */
     op = (*ptr == META_ECLASS_OR )? ECL_AND :
          (*ptr == META_ECLASS_SUB)? ECL_OR  :
          /*ptr == META_ECLASS_XOR*/ ECL_XOR;
     op_neg = (*ptr == META_ECLASS_XOR);
     rhs_negated = *ptr != META_ECLASS_SUB;
     }
   else
     {
     /* A || B   ->  A || B  */
     /* A -- B   ->  A && !B */
     /* A XOR B  ->  A XOR B */
     op = (*ptr == META_ECLASS_OR )? ECL_OR  :
          (*ptr == META_ECLASS_SUB)? ECL_AND :
          /*ptr == META_ECLASS_XOR*/ ECL_XOR;
     op_neg = FALSE;
     rhs_negated = *ptr == META_ECLASS_SUB;
     }

   ++ptr;

   /* An operand must follow the operator. */
   if (!compile_class_binary_tight(context, rhs_negated, &ptr, &code,
                                   &rhs_op_info, lengthptr))
     return FALSE;

   /* Convert infix to postfix (RPN). */
   fold_binary(op, pop_info, &rhs_op_info, lengthptr);
   if (op_neg) fold_negation(pop_info, lengthptr, FALSE);
   if (lengthptr == NULL)
     code = pop_info->code_start + pop_info->length;
   }

 PCRE2_ASSERT(lengthptr == NULL || code == start_code);

 *pptr = ptr;
 *pcode = code;
 return TRUE;
 }


 /* This function converts the META codes in pptr into opcodes written to
 pcode. The pptr must start at a META_CLASS or META_CLASS_NOT.

 The class is compiled as a left-associative sequence of operator
 applications.

 The pptr will be left pointing at the matching META_CLASS_END. */

 static BOOL
 compile_eclass_nested(eclass_context *context, BOOL negated,
   uint32_t **pptr, PCRE2_UCHAR **pcode,
   eclass_op_info *pop_info, PCRE2_SIZE *lengthptr)
 {
 uint32_t *ptr = *pptr;
 #ifdef PCRE2_DEBUG
 PCRE2_UCHAR *start_code = *pcode;
 #endif

 /* The CLASS_IS_ECLASS bit must be set since it is a nested class. */
 PCRE2_ASSERT(*ptr == (META_CLASS | CLASS_IS_ECLASS) ||
              *ptr == (META_CLASS_NOT | CLASS_IS_ECLASS));

 if (*ptr++ == (META_CLASS_NOT | CLASS_IS_ECLASS))
   negated = !negated;

 (*pptr)++;

 /* Because it's a non-empty class, there must be an operand at the start. */
 if (!compile_class_binary_loose(context, negated, pptr, pcode,
                                 pop_info, lengthptr))
   return FALSE;

 PCRE2_ASSERT(**pptr == META_CLASS_END);
 PCRE2_ASSERT(lengthptr == NULL || *pcode == start_code);
 return TRUE;
 }

 BOOL
 PRIV(compile_class_nested)(uint32_t options, uint32_t xoptions,
   uint32_t **pptr, PCRE2_UCHAR **pcode, int *errorcodeptr,
   compile_block *cb, PCRE2_SIZE *lengthptr)
 {
 eclass_context context;
 eclass_op_info op_info;
 PCRE2_SIZE previous_length = (lengthptr != NULL)? *lengthptr : 0;
 PCRE2_UCHAR *code = *pcode;
 PCRE2_UCHAR *previous;
 BOOL allbitsone = TRUE;

 context.needs_bitmap = FALSE;
 context.options = options;
 context.xoptions = xoptions;
 context.errorcodeptr = errorcodeptr;
 context.cb = cb;

 previous = code;
 *code++ = OP_ECLASS;
 code += LINK_SIZE;
 *code++ = 0;  /* Flags, currently zero. */
 if (!compile_eclass_nested(&context, FALSE, pptr, &code, &op_info, lengthptr))
   return FALSE;

 if (lengthptr != NULL)
   {
   *lengthptr += code - previous;
   code = previous;
   /* (*lengthptr - previous_length) now holds the amount of buffer that
   we require to make the call to compile_class_nested() with
   lengthptr = NULL, and including the (1+LINK_SIZE+1) that we write out
   before that call. */
   }

 /* Do some useful counting of what's in the bitmap. */
 for (int i = 0; i < 8; i++)
   if (op_info.bits.classwords[i] != 0xffffffff)
     {
     allbitsone = FALSE;
     break;
     }

 /* After constant-folding the extended class syntax, it may turn out to be
 a simple class after all. In that case, we can unwrap it from the
 OP_ECLASS container - and in fact, we must do so, because in 8-bit
 no-Unicode mode the matcher is compiled without support for OP_ECLASS. */

 #ifndef SUPPORT_WIDE_CHARS
 PCRE2_ASSERT(op_info.op_single_type != 0);
 #else
 if (op_info.op_single_type != 0)
 #endif
   {
   /* Rewind back over the OP_ECLASS. */
   code = previous;

   /* If the bits are all ones, and the "high characters" are all matched
   too, we use a special-cased encoding of OP_ALLANY. */

   if (op_info.op_single_type == ECL_ANY && allbitsone)
     {
     /* Advancing code means rewinding lengthptr, at this point. */
     if (lengthptr != NULL) *lengthptr -= 1;
     *code++ = OP_ALLANY;
     }

   /* If the high bits are all matched / all not-matched, then we emit an
   OP_NCLASS/OP_CLASS respectively. */

   else if (op_info.op_single_type == ECL_ANY ||
            op_info.op_single_type == ECL_NONE)
     {
     PCRE2_SIZE required_len = 1 + (32 / sizeof(PCRE2_UCHAR));

     if (lengthptr != NULL)
       {
       if (required_len > (*lengthptr - previous_length))
       *lengthptr = previous_length + required_len;
       }

     /* Advancing code means rewinding lengthptr, at this point. */
     if (lengthptr != NULL) *lengthptr -= required_len;
     *code++ = (op_info.op_single_type == ECL_ANY)? OP_NCLASS : OP_CLASS;
     memcpy(code, op_info.bits.classbits, 32);
     code += 32 / sizeof(PCRE2_UCHAR);
     }

   /* Otherwise, we have an ECL_XCLASS, so we have the OP_XCLASS data
   there, but, we pulled out its bitmap into op_info, so now we have to
   put that back into the OP_XCLASS. */

   else
     {
 #ifndef SUPPORT_WIDE_CHARS
     PCRE2_DEBUG_UNREACHABLE();
 #else
     BOOL need_map = context.needs_bitmap;
     PCRE2_SIZE required_len;

     PCRE2_ASSERT(op_info.op_single_type == ECL_XCLASS);
     required_len = op_info.length + (need_map? 32/sizeof(PCRE2_UCHAR) : 0);

     if (lengthptr != NULL)
       {
       /* Don't unconditionally request all the space we need - we may
       already have asked for more during processing of the ECLASS. */
       if (required_len > (*lengthptr - previous_length))
         *lengthptr = previous_length + required_len;

       /* The code we write out here won't be ignored, even during the
       (lengthptr != NULL) phase, because if there's a following quantifier
       it will peek backwards. So we do have to write out a (truncated)
       OP_XCLASS, even on this branch. */
       *lengthptr -= 1 + LINK_SIZE + 1;
       *code++ = OP_XCLASS;
       PUT(code, 0, 1 + LINK_SIZE + 1);
       code += LINK_SIZE;
       *code++ = 0;
       }
     else
       {
       PCRE2_UCHAR *rest;
       PCRE2_SIZE rest_len;
       PCRE2_UCHAR flags;

       /* 1 unit: OP_XCLASS | LINK_SIZE units | 1 unit: flags | ...rest */
       PCRE2_ASSERT(op_info.length >= 1 + LINK_SIZE + 1);
       rest = op_info.code_start + 1 + LINK_SIZE + 1;
       rest_len = (op_info.code_start + op_info.length) - rest;

       /* First read any data we use, before memmove splats it. */
       flags = op_info.code_start[1 + LINK_SIZE];
       PCRE2_ASSERT((flags & XCL_MAP) == 0);

       /* Next do the memmove before any writes. */
       memmove(code + 1 + LINK_SIZE + 1 + (need_map? 32/sizeof(PCRE2_UCHAR) : 0),
               rest, CU2BYTES(rest_len));

       /* Finally write the header data. */
       *code++ = OP_XCLASS;
       PUT(code, 0, (int)required_len);
       code += LINK_SIZE;
       *code++ = flags | (need_map? XCL_MAP : 0);
       if (need_map)
         {
         memcpy(code, op_info.bits.classbits, 32);
         code += 32 / sizeof(PCRE2_UCHAR);
         }
       code += rest_len;
       }
 #endif /* SUPPORT_WIDE_CHARS */
     }
   }

 /* Otherwise, we're going to keep the OP_ECLASS. However, again we need
 to do some adjustment to insert the bitmap if we have one. */

 #ifdef SUPPORT_WIDE_CHARS
 else
   {
   BOOL need_map = context.needs_bitmap;
   PCRE2_SIZE required_len =
     1 + LINK_SIZE + 1 + (need_map? 32/sizeof(PCRE2_UCHAR) : 0) + op_info.length;

   if (lengthptr != NULL)
     {
     if (required_len > (*lengthptr - previous_length))
       *lengthptr = previous_length + required_len;

     /* As for the XCLASS branch above, we do have to write out a dummy
     OP_ECLASS, because of the backwards peek by the quantifier code. Write
     out a (truncated) OP_ECLASS, even on this branch. */
     *lengthptr -= 1 + LINK_SIZE + 1;
     *code++ = OP_ECLASS;
     PUT(code, 0, 1 + LINK_SIZE + 1);
     code += LINK_SIZE;
     *code++ = 0;
     }
   else
     {
     if (need_map)
       {
       PCRE2_UCHAR *map_start = previous + 1 + LINK_SIZE + 1;
       previous[1 + LINK_SIZE] |= ECL_MAP;
       memmove(map_start + 32/sizeof(PCRE2_UCHAR), map_start,
               CU2BYTES(code - map_start));
       memcpy(map_start, op_info.bits.classbits, 32);
       code += 32 / sizeof(PCRE2_UCHAR);
       }
     PUT(previous, 1, (int)(code - previous));
     }
   }
 #endif /* SUPPORT_WIDE_CHARS */

 *pcode = code;
 return TRUE;
 }

 /* End of pcre2_compile_class.c */