src/pcre2_match_next.c - third_party/github.com/PCRE2Project/pcre2 - Git at Google

 /*************************************************
 *      Perl-Compatible Regular Expressions       *
 *************************************************/

 /* PCRE is a library of functions to support regular expressions whose syntax
 and semantics are as close as possible to those of the Perl 5 language.

                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
           New API code Copyright (c) 2016-2024 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:

     * Redistributions of source code must retain the above copyright notice,
       this list of conditions and the following disclaimer.

     * Redistributions in binary form must reproduce the above copyright
       notice, this list of conditions and the following disclaimer in the
       documentation and/or other materials provided with the distribution.

     * Neither the name of the University of Cambridge nor the names of its
       contributors may be used to endorse or promote products derived from
       this software without specific prior written permission.

 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 -----------------------------------------------------------------------------
 */


 #include "pcre2_internal.h"


 /* Advance the offset by one code unit, and return the new value.
 It is only called when the offset is not at the end of the subject. */

 static PCRE2_SIZE do_bumpalong(pcre2_match_data *match_data,
   PCRE2_SIZE offset)
 {
 PCRE2_SPTR subject = match_data->subject;
 PCRE2_SIZE subject_length = match_data->subject_length;
 #ifdef SUPPORT_UNICODE
 BOOL utf = (match_data->code->overall_options & PCRE2_UTF) != 0;
 #endif

 /* Skip over CRLF as an atomic sequence, if CRLF is configured as a newline
 sequence. */

 if (subject[offset] == CHAR_CR && offset + 1 < subject_length &&
     subject[offset + 1] == CHAR_LF)
   {
   switch(match_data->code->newline_convention)
     {
     case PCRE2_NEWLINE_CRLF:
     case PCRE2_NEWLINE_ANY:
     case PCRE2_NEWLINE_ANYCRLF:
     return offset + 2;
     }
   }

 /* Advance by one full character if in UTF mode. */

 #ifdef SUPPORT_UNICODE
 if (utf)
   {
   PCRE2_SPTR next = subject + offset + 1;
   PCRE2_SPTR subject_end = subject + subject_length;

   (void)subject_end; /* Suppress warning; 32-bit FORWARDCHARTEST ignores this */
   FORWARDCHARTEST(next, subject_end);
   return next - subject;
   }
 #endif

 return offset + 1;
 }


 /*************************************************
 *                Advance the match               *
 *************************************************/

 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
 pcre2_next_match(pcre2_match_data *match_data, PCRE2_SIZE *pstart_offset,
   uint32_t *poptions)
 {
 int rc = match_data->rc;
 PCRE2_SIZE start_offset = match_data->start_offset;
 PCRE2_SIZE *ovector = match_data->ovector;

 /* Match error, or no match: no further iteration possible. In previous versions
 of PCRE2, we recommended that clients use a strategy which involved retrying in
 certain cases after PCRE2_ERROR_NOMATCH, but this is no longer required. */

 if (rc < 0)
   return FALSE;

 /* Match succeeded: get the start offset for the next match */

 /* Although \K can affect the position of ovector[0], there are no ways to do
 anything surprising with ovector[1], which must always be >= start_offset. */

 PCRE2_ASSERT(ovector[1] >= start_offset);

 /* Special handling for patterns which contain \K in a lookaround, which enables
 the match start to be pushed back to before the starting search offset
 (ovector[0] < start_offset) or after the match ends (ovector[0] > ovector[1]).
 This is not a problem if ovector[1] > start_offset, because in this case, we can
 just attempt the next match at ovector[1]: we are making progress, which is all
 that we require.

 However, if we have ovector[1] == start_offset, then we have a very rare case
 which must be handled specially, because it's a non-empty match which
 nonetheless fails to make progress through the subject. */

 if (ovector[0] != start_offset && ovector[1] == start_offset)
   {
   /* If the match end is at the end of the subject, we are done. */

   if (start_offset >= match_data->subject_length)
     return FALSE;

   /* Otherwise, bump along by one code unit, and do a normal search. */

   *pstart_offset = do_bumpalong(match_data, ovector[1]);
   *poptions = 0;
   return TRUE;
   }

 /* If the previous match was for an empty string, we are finished if we are at
 the end of the subject. Otherwise, arrange to run another match at the same
 point to see if a non-empty match can be found. */

 if (ovector[0] == ovector[1])
   {
   /* If the match is at the end of the subject, we are done. */

   if (ovector[0] >= match_data->subject_length)
     return FALSE;

   /* Otherwise, continue at this exact same point, but we must set the flag
   which ensures that we don't return the exact same empty match again. */

   *pstart_offset = ovector[1];
   *poptions = PCRE2_NOTEMPTY_ATSTART;
   return TRUE;
   }

 /* Finally, we must be in the happy state of a non-empty match, where the end of
 the match is further on in the subject than start_offset, so we are easily able
 to continue and make progress. */

 *pstart_offset = ovector[1];
 *poptions = 0;
 return TRUE;
 }

 /* End of pcre2_match_next.c */
	/*************************************************
	* Perl-Compatible Regular Expressions *
	*************************************************/

	/* PCRE is a library of functions to support regular expressions whose syntax
	and semantics are as close as possible to those of the Perl 5 language.

	Written by Philip Hazel
	Original API code Copyright (c) 1997-2012 University of Cambridge
	New API code Copyright (c) 2016-2024 University of Cambridge

	-----------------------------------------------------------------------------
	Redistribution and use in source and binary forms, with or without
	modification, are permitted provided that the following conditions are met:

	* Redistributions of source code must retain the above copyright notice,
	this list of conditions and the following disclaimer.

	* Redistributions in binary form must reproduce the above copyright
	notice, this list of conditions and the following disclaimer in the
	documentation and/or other materials provided with the distribution.

	* Neither the name of the University of Cambridge nor the names of its
	contributors may be used to endorse or promote products derived from
	this software without specific prior written permission.

	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
	AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
	LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
	CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
	SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
	INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
	CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
	ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
	POSSIBILITY OF SUCH DAMAGE.
	-----------------------------------------------------------------------------
	*/


	#include "pcre2_internal.h"



	/* Advance the offset by one code unit, and return the new value.
	It is only called when the offset is not at the end of the subject. */

	static PCRE2_SIZE do_bumpalong(pcre2_match_data *match_data,
	PCRE2_SIZE offset)
	{
	PCRE2_SPTR subject = match_data->subject;
	PCRE2_SIZE subject_length = match_data->subject_length;
	#ifdef SUPPORT_UNICODE
	BOOL utf = (match_data->code->overall_options & PCRE2_UTF) != 0;
	#endif

	/* Skip over CRLF as an atomic sequence, if CRLF is configured as a newline
	sequence. */

	if (subject[offset] == CHAR_CR && offset + 1 < subject_length &&
	subject[offset + 1] == CHAR_LF)
	{
	switch(match_data->code->newline_convention)
	{
	case PCRE2_NEWLINE_CRLF:
	case PCRE2_NEWLINE_ANY:
	case PCRE2_NEWLINE_ANYCRLF:
	return offset + 2;
	}
	}

	/* Advance by one full character if in UTF mode. */

	#ifdef SUPPORT_UNICODE
	if (utf)
	{
	PCRE2_SPTR next = subject + offset + 1;
	PCRE2_SPTR subject_end = subject + subject_length;

	(void)subject_end; /* Suppress warning; 32-bit FORWARDCHARTEST ignores this */
	FORWARDCHARTEST(next, subject_end);
	return next - subject;
	}
	#endif

	return offset + 1;
	}



	/*************************************************
	* Advance the match *
	*************************************************/

	PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
	pcre2_next_match(pcre2_match_data match_data, PCRE2_SIZE pstart_offset,
	uint32_t *poptions)
	{
	int rc = match_data->rc;
	PCRE2_SIZE start_offset = match_data->start_offset;
	PCRE2_SIZE *ovector = match_data->ovector;

	/* Match error, or no match: no further iteration possible. In previous versions
	of PCRE2, we recommended that clients use a strategy which involved retrying in
	certain cases after PCRE2_ERROR_NOMATCH, but this is no longer required. */

	if (rc < 0)
	return FALSE;

	/* Match succeeded: get the start offset for the next match */

	/* Although \K can affect the position of ovector[0], there are no ways to do
	anything surprising with ovector[1], which must always be >= start_offset. */

	PCRE2_ASSERT(ovector[1] >= start_offset);

	/* Special handling for patterns which contain \K in a lookaround, which enables
	the match start to be pushed back to before the starting search offset
	(ovector[0] < start_offset) or after the match ends (ovector[0] > ovector[1]).
	This is not a problem if ovector[1] > start_offset, because in this case, we can
	just attempt the next match at ovector[1]: we are making progress, which is all
	that we require.

	However, if we have ovector[1] == start_offset, then we have a very rare case
	which must be handled specially, because it's a non-empty match which
	nonetheless fails to make progress through the subject. */

	if (ovector[0] != start_offset && ovector[1] == start_offset)
	{
	/* If the match end is at the end of the subject, we are done. */

	if (start_offset >= match_data->subject_length)
	return FALSE;

	/* Otherwise, bump along by one code unit, and do a normal search. */

	*pstart_offset = do_bumpalong(match_data, ovector[1]);
	*poptions = 0;
	return TRUE;
	}

	/* If the previous match was for an empty string, we are finished if we are at
	the end of the subject. Otherwise, arrange to run another match at the same
	point to see if a non-empty match can be found. */

	if (ovector[0] == ovector[1])
	{
	/* If the match is at the end of the subject, we are done. */

	if (ovector[0] >= match_data->subject_length)
	return FALSE;

	/* Otherwise, continue at this exact same point, but we must set the flag
	which ensures that we don't return the exact same empty match again. */

	*pstart_offset = ovector[1];
	*poptions = PCRE2_NOTEMPTY_ATSTART;
	return TRUE;
	}

	/* Finally, we must be in the happy state of a non-empty match, where the end of
	the match is further on in the subject than start_offset, so we are easily able
	to continue and make progress. */

	*pstart_offset = ovector[1];
	*poptions = 0;
	return TRUE;
	}

	/* End of pcre2_match_next.c */