| /************************************************* |
| * Perl-Compatible Regular Expressions * |
| *************************************************/ |
| |
| /* PCRE is a library of functions to support regular expressions whose syntax |
| and semantics are as close as possible to those of the Perl 5 language. |
| |
| Written by Philip Hazel |
| Copyright (c) 1997-2006 University of Cambridge |
| |
| ----------------------------------------------------------------------------- |
| Redistribution and use in source and binary forms, with or without |
| modification, are permitted provided that the following conditions are met: |
| |
| * Redistributions of source code must retain the above copyright notice, |
| this list of conditions and the following disclaimer. |
| |
| * Redistributions in binary form must reproduce the above copyright |
| notice, this list of conditions and the following disclaimer in the |
| documentation and/or other materials provided with the distribution. |
| |
| * Neither the name of the University of Cambridge nor the names of its |
| contributors may be used to endorse or promote products derived from |
| this software without specific prior written permission. |
| |
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| POSSIBILITY OF SUCH DAMAGE. |
| ----------------------------------------------------------------------------- |
| */ |
| |
| /* This file has been modified to use glib instead of the internal table |
| * in ucptable.c -- Marco Barisione */ |
| |
| /* This module contains code for searching the table of Unicode character |
| properties. */ |
| |
| #ifdef HAVE_CONFIG_H |
| #include "config.h" |
| #endif |
| |
| #include "pcre_internal.h" |
| |
| #include "ucp.h" /* Category definitions */ |
| #include "ucpinternal.h" /* Internal table details */ |
| |
| |
| /* Table to translate from particular type value to the general value. */ |
| |
| static int ucp_gentype[] = { |
| ucp_C, ucp_C, ucp_C, ucp_C, ucp_C, /* Cc, Cf, Cn, Co, Cs */ |
| ucp_L, ucp_L, ucp_L, ucp_L, ucp_L, /* Ll, Lu, Lm, Lo, Lt */ |
| ucp_M, ucp_M, ucp_M, /* Mc, Me, Mn */ |
| ucp_N, ucp_N, ucp_N, /* Nd, Nl, No */ |
| ucp_P, ucp_P, ucp_P, ucp_P, ucp_P, /* Pc, Pd, Pe, Pf, Pi */ |
| ucp_P, ucp_P, /* Ps, Po */ |
| ucp_S, ucp_S, ucp_S, ucp_S, /* Sc, Sk, Sm, So */ |
| ucp_Z, ucp_Z, ucp_Z /* Zl, Zp, Zs */ |
| }; |
| |
| |
| |
| /************************************************* |
| * Search table and return type * |
| *************************************************/ |
| |
| /* Three values are returned: the category is ucp_C, ucp_L, etc. The detailed |
| character type is ucp_Lu, ucp_Nd, etc. The script is ucp_Latin, etc. |
| |
| Arguments: |
| c the character value |
| type_ptr the detailed character type is returned here |
| script_ptr the script is returned here |
| |
| Returns: the character type category |
| */ |
| |
| int |
| _pcre_ucp_findprop(const unsigned int c, int *type_ptr, int *script_ptr) |
| { |
| /* Note that the Unicode types have the same values in glib and in |
| * PCRE, so ucp_Ll == G_UNICODE_LOWERCASE_LETTER, |
| * ucp_Zs == G_UNICODE_SPACE_SEPARATOR, and so on. */ |
| *type_ptr = g_unichar_type(c); |
| *script_ptr = g_unichar_get_script(c); |
| return ucp_gentype[*type_ptr]; |
| } |
| |
| |
| |
| |
| /************************************************* |
| * Search table and return other case * |
| *************************************************/ |
| |
| /* If the given character is a letter, and there is another case for the |
| letter, return the other case. Otherwise, return -1. |
| |
| Arguments: |
| c the character value |
| |
| Returns: the other case or NOTACHAR if none |
| */ |
| |
| unsigned int |
| _pcre_ucp_othercase(const unsigned int c) |
| { |
| int other_case = NOTACHAR; |
| |
| if (g_unichar_islower(c)) |
| other_case = g_unichar_toupper(c); |
| else if (g_unichar_isupper(c)) |
| other_case = g_unichar_tolower(c); |
| |
| if (other_case == c) |
| other_case = NOTACHAR; |
| |
| return other_case; |
| } |
| |
| |
| /* End of pcre_ucp_searchfuncs.c */ |