|  | .TH PCRE2DEMO 3 "24 March 2025" "PCRE2 10.47-DEV" | 
|  | .\"AUTOMATICALLY GENERATED BY UpdateAlways - do not EDIT! | 
|  | .SH NAME | 
|  | PCRE2DEMO - A demonstration C program for PCRE2 | 
|  | .SH "SOURCE CODE" | 
|  | .rs | 
|  | .sp | 
|  | .\" Start example. | 
|  | .de EX | 
|  | .	do ds mF \\n[.fam] | 
|  | .  nr mE \\n(.f | 
|  | .  nf | 
|  | .  nh | 
|  | .	do fam C | 
|  | .  ft CW | 
|  | .. | 
|  | . | 
|  | . | 
|  | .\" End example. | 
|  | .de EE | 
|  | .	do fam \\*(mF | 
|  | .  ft \\n(mE | 
|  | .  fi | 
|  | .  hy \\n(HY | 
|  | .. | 
|  | . | 
|  | .RS -7 | 
|  | .EX | 
|  | /************************************************* | 
|  | *           PCRE2 DEMONSTRATION PROGRAM          * | 
|  | *************************************************/ | 
|  |  | 
|  | /* This is a demonstration program to illustrate a straightforward way of | 
|  | using the PCRE2 regular expression library from a C program. See the | 
|  | pcre2sample documentation for a short discussion ("man pcre2sample" if you have | 
|  | the PCRE2 man pages installed). PCRE2 is a revised API for the library, and is | 
|  | incompatible with the original PCRE API. | 
|  |  | 
|  | There are actually three libraries, each supporting a different code unit | 
|  | width. This demonstration program uses the 8-bit library. The default is to | 
|  | process each code unit as a separate character, but if the pattern begins with | 
|  | "(*UTF)", both it and the subject are treated as UTF-8 strings, where | 
|  | characters may occupy multiple code units. | 
|  |  | 
|  | In Unix-like environments, if PCRE2 is installed in your standard system | 
|  | libraries, you should be able to compile this program using this command: | 
|  |  | 
|  | cc -Wall pcre2demo.c -lpcre2-8 -o pcre2demo | 
|  |  | 
|  | If PCRE2 is not installed in a standard place, it is likely to be installed | 
|  | with support for the pkg-config mechanism. If you have pkg-config, you can | 
|  | compile this program using this command: | 
|  |  | 
|  | cc -Wall pcre2demo.c `pkg-config --cflags --libs libpcre2-8` -o pcre2demo | 
|  |  | 
|  | If you do not have pkg-config, you may have to use something like this: | 
|  |  | 
|  | cc -Wall pcre2demo.c -I/usr/local/include -L/usr/local/lib \e | 
|  | -R/usr/local/lib -lpcre2-8 -o pcre2demo | 
|  |  | 
|  | Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and | 
|  | library files for PCRE2 are installed on your system. Only some operating | 
|  | systems (Solaris is one) use the -R option. | 
|  |  | 
|  | Building under Windows: | 
|  |  | 
|  | If you want to statically link this program against a non-dll .a file, you must | 
|  | define PCRE2_STATIC before including pcre2.h, so in this environment, uncomment | 
|  | the following line. */ | 
|  |  | 
|  | /* #define PCRE2_STATIC */ | 
|  |  | 
|  | /* The PCRE2_CODE_UNIT_WIDTH macro must be defined before including pcre2.h. | 
|  | For a program that uses only one code unit width, setting it to 8, 16, or 32 | 
|  | makes it possible to use generic function names such as pcre2_compile(). Note | 
|  | that just changing 8 to 16 (for example) is not sufficient to convert this | 
|  | program to process 16-bit characters. Even in a fully 16-bit environment, where | 
|  | string-handling functions such as strcmp() and printf() work with 16-bit | 
|  | characters, the code for handling the table of named substrings will still need | 
|  | to be modified. */ | 
|  |  | 
|  | #define PCRE2_CODE_UNIT_WIDTH 8 | 
|  |  | 
|  | #include <stdio.h> | 
|  | #include <string.h> | 
|  | #include <pcre2.h> | 
|  |  | 
|  |  | 
|  | /************************************************************************** | 
|  | * Here is the program. The API includes the concept of "contexts" for     * | 
|  | * setting up unusual interface requirements for compiling and matching,   * | 
|  | * such as custom memory managers and non-standard newline definitions.    * | 
|  | * This program does not do any of this, so it makes no use of contexts,   * | 
|  | * always passing NULL where a context could be given.                     * | 
|  | **************************************************************************/ | 
|  |  | 
|  | int main(int argc, char **argv) | 
|  | { | 
|  | pcre2_code *re; | 
|  | PCRE2_SPTR pattern;     /* PCRE2_SPTR is a pointer to unsigned code units of */ | 
|  | PCRE2_SPTR subject;     /* the appropriate width (in this case, 8 bits). */ | 
|  | PCRE2_SPTR name_table; | 
|  |  | 
|  | int errornumber; | 
|  | int find_all, caseless_match; | 
|  | int i; | 
|  | int rc; | 
|  |  | 
|  | uint32_t namecount; | 
|  | uint32_t name_entry_size; | 
|  |  | 
|  | PCRE2_SIZE erroroffset; | 
|  | PCRE2_SIZE *ovector; | 
|  | PCRE2_SIZE ovector_last[2]; | 
|  | PCRE2_SIZE subject_length; | 
|  |  | 
|  | pcre2_match_data *match_data; | 
|  |  | 
|  |  | 
|  | /************************************************************************** | 
|  | * First, sort out the command line. Options:                              * | 
|  | * - "-g" to request repeated matching to find all occurrences,            * | 
|  | *   like Perl's /g option. We set the variable find_all to a non-zero     * | 
|  | *   value if the -g option is present.                                    * | 
|  | * - "-i" to request caseless matching, like Perl's /i option.  We set the * | 
|  | *   variable caseless_match to PCRE2_CASELESS if the -i option is         * | 
|  | *   present.                                                              * | 
|  | **************************************************************************/ | 
|  |  | 
|  | find_all = 0; | 
|  | caseless_match = 0; | 
|  | for (i = 1; i < argc; i++) | 
|  | { | 
|  | if (strcmp(argv[i], "-g") == 0) find_all = 1; | 
|  | else if (strcmp(argv[i], "-i") == 0) caseless_match = PCRE2_CASELESS; | 
|  | else if (argv[i][0] == '-') | 
|  | { | 
|  | printf("Unrecognised option %s\en", argv[i]); | 
|  | return 1; | 
|  | } | 
|  | else break; | 
|  | } | 
|  |  | 
|  | /* After the options, we require exactly two arguments, which are the pattern, | 
|  | and the subject string. */ | 
|  |  | 
|  | if (argc - i != 2) | 
|  | { | 
|  | printf("Exactly two arguments required: a regex and a subject string\en"); | 
|  | return 1; | 
|  | } | 
|  |  | 
|  | /* Pattern and subject are char arguments, so they can be straightforwardly | 
|  | cast to PCRE2_SPTR because we are working in 8-bit code units. The subject | 
|  | length is cast to PCRE2_SIZE for completeness, though PCRE2_SIZE is in fact | 
|  | defined to be size_t. */ | 
|  |  | 
|  | pattern = (PCRE2_SPTR)argv[i]; | 
|  | subject = (PCRE2_SPTR)argv[i+1]; | 
|  | subject_length = (PCRE2_SIZE)strlen((char *)subject); | 
|  |  | 
|  |  | 
|  | /************************************************************************* | 
|  | * Now we are going to compile the regular expression pattern, and handle * | 
|  | * any errors that are detected.                                          * | 
|  | *************************************************************************/ | 
|  |  | 
|  | re = pcre2_compile( | 
|  | pattern,               /* the pattern */ | 
|  | PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */ | 
|  | caseless_match,        /* possibly enable caseless */ | 
|  | &errornumber,          /* for error number */ | 
|  | &erroroffset,          /* for error offset */ | 
|  | NULL);                 /* use default compile context */ | 
|  |  | 
|  | /* Compilation failed: print the error message and exit. */ | 
|  |  | 
|  | if (re == NULL) | 
|  | { | 
|  | PCRE2_UCHAR buffer[256]; | 
|  | pcre2_get_error_message(errornumber, buffer, sizeof(buffer)); | 
|  | printf("PCRE2 compilation failed at offset %d: %s\en", (int)erroroffset, | 
|  | buffer); | 
|  | return 1; | 
|  | } | 
|  |  | 
|  |  | 
|  | /************************************************************************* | 
|  | * If the compilation succeeded, we call PCRE2 again, in order to do a    * | 
|  | * pattern match against the subject string. This does just ONE match. If * | 
|  | * further matching is needed, it will be done below. Before running the  * | 
|  | * match we must set up a match_data block for holding the result. Using  * | 
|  | * pcre2_match_data_create_from_pattern() ensures that the block is       * | 
|  | * exactly the right size for the number of capturing parentheses in the  * | 
|  | * pattern. If you need to know the actual size of a match_data block as  * | 
|  | * a number of bytes, you can find it like this:                          * | 
|  | *                                                                        * | 
|  | * PCRE2_SIZE match_data_size = pcre2_get_match_data_size(match_data);    * | 
|  | *************************************************************************/ | 
|  |  | 
|  | match_data = pcre2_match_data_create_from_pattern(re, NULL); | 
|  |  | 
|  | /* Now run the match. */ | 
|  |  | 
|  | rc = pcre2_match( | 
|  | re,                   /* the compiled pattern */ | 
|  | subject,              /* the subject string */ | 
|  | subject_length,       /* the length of the subject */ | 
|  | 0,                    /* start at offset 0 in the subject */ | 
|  | 0,                    /* default options */ | 
|  | match_data,           /* block for storing the result */ | 
|  | NULL);                /* use default match context */ | 
|  |  | 
|  | /* Matching failed: handle error cases */ | 
|  |  | 
|  | if (rc < 0) | 
|  | { | 
|  | switch(rc) | 
|  | { | 
|  | case PCRE2_ERROR_NOMATCH: printf("No match\en"); break; | 
|  | /* | 
|  | Handle other special cases if you like | 
|  | */ | 
|  | default: printf("Matching error %d\en", rc); break; | 
|  | } | 
|  | pcre2_match_data_free(match_data);   /* Release memory used for the match */ | 
|  | pcre2_code_free(re);                 /*   data and the compiled pattern. */ | 
|  | return 1; | 
|  | } | 
|  |  | 
|  | /* Match succeeded. Get a pointer to the output vector, where string offsets | 
|  | are stored. */ | 
|  |  | 
|  | ovector = pcre2_get_ovector_pointer(match_data); | 
|  | printf("Match succeeded at offset %d\en", (int)ovector[0]); | 
|  |  | 
|  |  | 
|  | /************************************************************************* | 
|  | * We have found the first match within the subject string. If the output * | 
|  | * vector wasn't big enough, say so. Then output any substrings that were * | 
|  | * captured.                                                              * | 
|  | *************************************************************************/ | 
|  |  | 
|  | /* The output vector wasn't big enough. This should not happen, because we used | 
|  | pcre2_match_data_create_from_pattern() above. */ | 
|  |  | 
|  | if (rc == 0) | 
|  | printf("ovector was not big enough for all the captured substrings\en"); | 
|  |  | 
|  | /* Since release 10.38 PCRE2 has locked out the use of \eK in lookaround | 
|  | assertions. This is the recommended behaviour. However, the option | 
|  | PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK allows applications to re-enable the old | 
|  | behaviour. If that is set, it is possible to run patterns such as /(?=.\eK)/ that | 
|  | use \eK in an assertion to set the start of a match later than its end. In this | 
|  | demonstration program, we show how to detect this case, although it cannot arise | 
|  | because the option is never set. */ | 
|  |  | 
|  | if (ovector[0] > ovector[1]) | 
|  | { | 
|  | printf("\e\eK was used in an assertion to set the match start after its end.\en" | 
|  | "From end to start the match was: %.*s\en", (int)(ovector[0] - ovector[1]), | 
|  | (char *)(subject + ovector[1])); | 
|  | printf("Run abandoned\en"); | 
|  | pcre2_match_data_free(match_data); | 
|  | pcre2_code_free(re); | 
|  | return 1; | 
|  | } | 
|  |  | 
|  | /* Show substrings stored in the output vector by number. Obviously, in a real | 
|  | application you might want to do things other than print them. */ | 
|  |  | 
|  | for (i = 0; i < rc; i++) | 
|  | { | 
|  | PCRE2_SPTR substring_start = subject + ovector[2*i]; | 
|  | PCRE2_SIZE substring_length = ovector[2*i+1] - ovector[2*i]; | 
|  | printf("%2d: %.*s\en", i, (int)substring_length, (char *)substring_start); | 
|  | } | 
|  |  | 
|  |  | 
|  | /************************************************************************** | 
|  | * That concludes the basic part of this demonstration program. We have    * | 
|  | * compiled a pattern, and performed a single match. The code that follows * | 
|  | * shows first how to access named substrings, and then how to code for    * | 
|  | * repeated matches on the same subject.                                   * | 
|  | **************************************************************************/ | 
|  |  | 
|  | /* See if there are any named substrings, and if so, show them by name. First | 
|  | we have to extract the count of named parentheses from the pattern. */ | 
|  |  | 
|  | (void)pcre2_pattern_info( | 
|  | re,                   /* the compiled pattern */ | 
|  | PCRE2_INFO_NAMECOUNT, /* get the number of named substrings */ | 
|  | &namecount);          /* where to put the answer */ | 
|  |  | 
|  | if (namecount == 0) | 
|  | printf("No named substrings\en"); | 
|  | else | 
|  | { | 
|  | PCRE2_SPTR tabptr; | 
|  | printf("Named substrings\en"); | 
|  |  | 
|  | /* Before we can access the substrings, we must extract the table for | 
|  | translating names to numbers, and the size of each entry in the table. */ | 
|  |  | 
|  | (void)pcre2_pattern_info( | 
|  | re,                       /* the compiled pattern */ | 
|  | PCRE2_INFO_NAMETABLE,     /* address of the table */ | 
|  | &name_table);             /* where to put the answer */ | 
|  |  | 
|  | (void)pcre2_pattern_info( | 
|  | re,                       /* the compiled pattern */ | 
|  | PCRE2_INFO_NAMEENTRYSIZE, /* size of each entry in the table */ | 
|  | &name_entry_size);        /* where to put the answer */ | 
|  |  | 
|  | /* Now we can scan the table and, for each entry, print the number, the name, | 
|  | and the substring itself. In the 8-bit library the number is held in two | 
|  | bytes, most significant first. */ | 
|  |  | 
|  | tabptr = name_table; | 
|  | for (i = 0; i < namecount; i++) | 
|  | { | 
|  | int n = (tabptr[0] << 8) | tabptr[1]; | 
|  | printf("(%d) %*s: %.*s\en", n, name_entry_size - 3, tabptr + 2, | 
|  | (int)(ovector[2*n+1] - ovector[2*n]), subject + ovector[2*n]); | 
|  | tabptr += name_entry_size; | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | /************************************************************************* | 
|  | * If the "-g" option was given on the command line, we want to continue  * | 
|  | * to search for additional matches in the subject string, in a similar   * | 
|  | * way to the /g option in Perl. This turns out to be trickier than you   * | 
|  | * might think because of the possibility of matching an empty string.    * | 
|  | *                                                                        * | 
|  | * To help with this task, PCRE2 provides the pcre2_next_match() helper.  * | 
|  | *************************************************************************/ | 
|  |  | 
|  | if (!find_all)     /* Check for -g */ | 
|  | { | 
|  | pcre2_match_data_free(match_data);  /* Release the memory that was used */ | 
|  | pcre2_code_free(re);                /* for the match data and the pattern. */ | 
|  | return 0;                           /* Exit the program. */ | 
|  | } | 
|  |  | 
|  | /* Loop for second and subsequent matches */ | 
|  |  | 
|  | ovector_last[0] = ovector[0]; | 
|  | ovector_last[1] = ovector[1]; | 
|  |  | 
|  | for (;;) | 
|  | { | 
|  | PCRE2_SIZE start_offset; | 
|  | uint32_t options; | 
|  |  | 
|  | /* After each successful match, we use pcre2_next_match() to obtain the match | 
|  | parameters for subsequent match attempts. */ | 
|  |  | 
|  | if (!pcre2_next_match(match_data, &start_offset, &options)) | 
|  | break; | 
|  |  | 
|  | /* Run the next matching operation */ | 
|  |  | 
|  | rc = pcre2_match( | 
|  | re,                   /* the compiled pattern */ | 
|  | subject,              /* the subject string */ | 
|  | subject_length,       /* the length of the subject */ | 
|  | start_offset,         /* starting offset in the subject */ | 
|  | options,              /* options */ | 
|  | match_data,           /* block for storing the result */ | 
|  | NULL);                /* use default match context */ | 
|  |  | 
|  | /* If this match attempt fails, exit the loop for subsequent matches. */ | 
|  |  | 
|  | if (rc == PCRE2_ERROR_NOMATCH) | 
|  | break; | 
|  |  | 
|  | /* Other matching errors are not recoverable. */ | 
|  |  | 
|  | if (rc < 0) | 
|  | { | 
|  | printf("Matching error %d\en", rc); | 
|  | pcre2_match_data_free(match_data); | 
|  | pcre2_code_free(re); | 
|  | return 1; | 
|  | } | 
|  |  | 
|  | /* This demonstration program depends on pcre2_next_match() to ensure that the | 
|  | loop for second and subsequent matches does not run forever. However, it would | 
|  | be robust practice for a production application to verify this. The following | 
|  | block of code shows how to do this. This error case is not reachable unless | 
|  | there is a bug in PCRE2. | 
|  |  | 
|  | Because this program does not set the PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK option, | 
|  | the logic is simple. We verify that either ovector[1] has advanced, or that we | 
|  | have an empty match touching the end of a previous non-empty match. See the | 
|  | API documentation for guidance if your application uses | 
|  | PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK and searches for multiple matches. */ | 
|  |  | 
|  | if (!(ovector[1] > ovector_last[1] || | 
|  | (ovector[1] == ovector[0] && ovector_last[1] > ovector_last[0] && | 
|  | ovector[1] == ovector_last[1]))) | 
|  | { | 
|  | printf("\e\eK was used in an assertion to yield non-advancing matches.\en"); | 
|  | printf("Run abandoned\en"); | 
|  | pcre2_match_data_free(match_data); | 
|  | pcre2_code_free(re); | 
|  | return 1; | 
|  | } | 
|  |  | 
|  | ovector_last[0] = ovector[0]; | 
|  | ovector_last[1] = ovector[1]; | 
|  |  | 
|  | /* Match succeeded. */ | 
|  |  | 
|  | printf("\enMatch succeeded again at offset %d\en", (int)ovector[0]); | 
|  |  | 
|  | /* The match succeeded, but the output vector wasn't big enough. This | 
|  | should not happen. */ | 
|  |  | 
|  | if (rc == 0) | 
|  | printf("ovector was not big enough for all the captured substrings\en"); | 
|  |  | 
|  | /* We guard against patterns such as /(?=.\eK)/ that use \eK in an assertion to | 
|  | set the start of a match later than its end. As explained above, this case | 
|  | should not occur because this demonstration program does not set the | 
|  | PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK option, however, we do include code showing | 
|  | how to detect it. */ | 
|  |  | 
|  | if (ovector[0] > ovector[1]) | 
|  | { | 
|  | printf("\e\eK was used in an assertion to set the match start after its end.\en" | 
|  | "From end to start the match was: %.*s\en", (int)(ovector[0] - ovector[1]), | 
|  | (char *)(subject + ovector[1])); | 
|  | printf("Run abandoned\en"); | 
|  | pcre2_match_data_free(match_data); | 
|  | pcre2_code_free(re); | 
|  | return 1; | 
|  | } | 
|  |  | 
|  | /* As before, show substrings stored in the output vector by number, and then | 
|  | also any named substrings. */ | 
|  |  | 
|  | for (i = 0; i < rc; i++) | 
|  | { | 
|  | PCRE2_SPTR substring_start = subject + ovector[2*i]; | 
|  | size_t substring_length = ovector[2*i+1] - ovector[2*i]; | 
|  | printf("%2d: %.*s\en", i, (int)substring_length, (char *)substring_start); | 
|  | } | 
|  |  | 
|  | if (namecount == 0) | 
|  | printf("No named substrings\en"); | 
|  | else | 
|  | { | 
|  | PCRE2_SPTR tabptr = name_table; | 
|  | printf("Named substrings\en"); | 
|  | for (i = 0; i < namecount; i++) | 
|  | { | 
|  | int n = (tabptr[0] << 8) | tabptr[1]; | 
|  | printf("(%d) %*s: %.*s\en", n, name_entry_size - 3, tabptr + 2, | 
|  | (int)(ovector[2*n+1] - ovector[2*n]), subject + ovector[2*n]); | 
|  | tabptr += name_entry_size; | 
|  | } | 
|  | } | 
|  | }      /* End of loop to find second and subsequent matches */ | 
|  |  | 
|  | printf("\en"); | 
|  |  | 
|  | pcre2_match_data_free(match_data); | 
|  | pcre2_code_free(re); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | /* End of pcre2demo.c */ | 
|  | .EE |