| From 23d48c5fc7aa889dc7798f9c64acd43d9cb34683 Mon Sep 17 00:00:00 2001 |
| From: Christian Persch <chpe@gnome.org> |
| Date: Sun, 12 Feb 2012 21:20:33 +0100 |
| Subject: [PATCH] regex: Use glib for unicode data |
| |
| Use g_unichar_type() and g_unichar_get_script() instead of pcre tables. |
| --- |
| glib/pcre/pcre_compile.c | 26 +++--- |
| glib/pcre/pcre_dfa_exec.c | 96 ++++++++-------- |
| glib/pcre/pcre_exec.c | 26 +++--- |
| glib/pcre/pcre_internal.h | 11 +-- |
| glib/pcre/pcre_tables.c | 16 +++ |
| glib/pcre/pcre_xclass.c | 24 ++-- |
| glib/pcre/ucp.h | 265 +++++++++++++++++++++++---------------------- |
| 7 files changed, 239 insertions(+), 225 deletions(-) |
| |
| diff --git a/glib/pcre/pcre_compile.c b/glib/pcre/pcre_compile.c |
| index 21bef80..a6c84e1 100644 |
| --- a/glib/pcre/pcre_compile.c |
| +++ b/glib/pcre/pcre_compile.c |
| @@ -2920,43 +2920,43 @@ Returns: TRUE if auto-possessifying is OK |
| static BOOL |
| check_char_prop(int c, int ptype, int pdata, BOOL negated) |
| { |
| -const ucd_record *prop = GET_UCD(c); |
| +const pcre_uint8 chartype = UCD_CHARTYPE(c); |
| switch(ptype) |
| { |
| case PT_LAMP: |
| - return (prop->chartype == ucp_Lu || |
| - prop->chartype == ucp_Ll || |
| - prop->chartype == ucp_Lt) == negated; |
| + return (chartype == ucp_Lu || |
| + chartype == ucp_Ll || |
| + chartype == ucp_Lt) == negated; |
| |
| case PT_GC: |
| - return (pdata == PRIV(ucp_gentype)[prop->chartype]) == negated; |
| + return (pdata == PRIV(ucp_gentype)[chartype]) == negated; |
| |
| case PT_PC: |
| - return (pdata == prop->chartype) == negated; |
| + return (pdata == chartype) == negated; |
| |
| case PT_SC: |
| - return (pdata == prop->script) == negated; |
| + return (pdata == UCD_SCRIPT(c)) == negated; |
| |
| /* These are specials */ |
| |
| case PT_ALNUM: |
| - return (PRIV(ucp_gentype)[prop->chartype] == ucp_L || |
| - PRIV(ucp_gentype)[prop->chartype] == ucp_N) == negated; |
| + return (PRIV(ucp_gentype)[chartype] == ucp_L || |
| + PRIV(ucp_gentype)[chartype] == ucp_N) == negated; |
| |
| case PT_SPACE: /* Perl space */ |
| - return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z || |
| + return (PRIV(ucp_gentype)[chartype] == ucp_Z || |
| c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR) |
| == negated; |
| |
| case PT_PXSPACE: /* POSIX space */ |
| - return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z || |
| + return (PRIV(ucp_gentype)[chartype] == ucp_Z || |
| c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || |
| c == CHAR_FF || c == CHAR_CR) |
| == negated; |
| |
| case PT_WORD: |
| - return (PRIV(ucp_gentype)[prop->chartype] == ucp_L || |
| - PRIV(ucp_gentype)[prop->chartype] == ucp_N || |
| + return (PRIV(ucp_gentype)[chartype] == ucp_L || |
| + PRIV(ucp_gentype)[chartype] == ucp_N || |
| c == CHAR_UNDERSCORE) == negated; |
| } |
| return FALSE; |
| diff --git a/glib/pcre/pcre_dfa_exec.c b/glib/pcre/pcre_dfa_exec.c |
| index 9565d46..3f913ce 100644 |
| --- a/glib/pcre/pcre_dfa_exec.c |
| +++ b/glib/pcre/pcre_dfa_exec.c |
| @@ -1060,7 +1060,7 @@ for (;;) |
| if (clen > 0) |
| { |
| BOOL OK; |
| - const ucd_record * prop = GET_UCD(c); |
| + const pcre_uint8 chartype = UCD_CHARTYPE(c); |
| switch(code[1]) |
| { |
| case PT_ANY: |
| @@ -1068,43 +1068,43 @@ for (;;) |
| break; |
| |
| case PT_LAMP: |
| - OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || |
| - prop->chartype == ucp_Lt; |
| + OK = chartype == ucp_Lu || chartype == ucp_Ll || |
| + chartype == ucp_Lt; |
| break; |
| |
| case PT_GC: |
| - OK = PRIV(ucp_gentype)[prop->chartype] == code[2]; |
| + OK = PRIV(ucp_gentype)[chartype] == code[2]; |
| break; |
| |
| case PT_PC: |
| - OK = prop->chartype == code[2]; |
| + OK = chartype == code[2]; |
| break; |
| |
| case PT_SC: |
| - OK = prop->script == code[2]; |
| + OK = UCD_SCRIPT(c) == code[2]; |
| break; |
| |
| /* These are specials for combination cases. */ |
| |
| case PT_ALNUM: |
| - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || |
| - PRIV(ucp_gentype)[prop->chartype] == ucp_N; |
| + OK = PRIV(ucp_gentype)[chartype] == ucp_L || |
| + PRIV(ucp_gentype)[chartype] == ucp_N; |
| break; |
| |
| case PT_SPACE: /* Perl space */ |
| - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z || |
| + OK = PRIV(ucp_gentype)[chartype] == ucp_Z || |
| c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR; |
| break; |
| |
| case PT_PXSPACE: /* POSIX space */ |
| - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z || |
| + OK = PRIV(ucp_gentype)[chartype] == ucp_Z || |
| c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || |
| c == CHAR_FF || c == CHAR_CR; |
| break; |
| |
| case PT_WORD: |
| - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || |
| - PRIV(ucp_gentype)[prop->chartype] == ucp_N || |
| + OK = PRIV(ucp_gentype)[chartype] == ucp_L || |
| + PRIV(ucp_gentype)[chartype] == ucp_N || |
| c == CHAR_UNDERSCORE; |
| break; |
| |
| @@ -1294,7 +1294,7 @@ for (;;) |
| if (clen > 0) |
| { |
| BOOL OK; |
| - const ucd_record * prop = GET_UCD(c); |
| + const pcre_uint8 chartype = UCD_CHARTYPE(c); |
| switch(code[2]) |
| { |
| case PT_ANY: |
| @@ -1302,43 +1302,43 @@ for (;;) |
| break; |
| |
| case PT_LAMP: |
| - OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || |
| - prop->chartype == ucp_Lt; |
| + OK = chartype == ucp_Lu || chartype == ucp_Ll || |
| + chartype == ucp_Lt; |
| break; |
| |
| case PT_GC: |
| - OK = PRIV(ucp_gentype)[prop->chartype] == code[3]; |
| + OK = PRIV(ucp_gentype)[chartype] == code[3]; |
| break; |
| |
| case PT_PC: |
| - OK = prop->chartype == code[3]; |
| + OK = chartype == code[3]; |
| break; |
| |
| case PT_SC: |
| - OK = prop->script == code[3]; |
| + OK = UCD_SCRIPT(c) == code[3]; |
| break; |
| |
| /* These are specials for combination cases. */ |
| |
| case PT_ALNUM: |
| - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || |
| - PRIV(ucp_gentype)[prop->chartype] == ucp_N; |
| + OK = PRIV(ucp_gentype)[chartype] == ucp_L || |
| + PRIV(ucp_gentype)[chartype] == ucp_N; |
| break; |
| |
| case PT_SPACE: /* Perl space */ |
| - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z || |
| + OK = PRIV(ucp_gentype)[chartype] == ucp_Z || |
| c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR; |
| break; |
| |
| case PT_PXSPACE: /* POSIX space */ |
| - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z || |
| + OK = PRIV(ucp_gentype)[chartype] == ucp_Z || |
| c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || |
| c == CHAR_FF || c == CHAR_CR; |
| break; |
| |
| case PT_WORD: |
| - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || |
| - PRIV(ucp_gentype)[prop->chartype] == ucp_N || |
| + OK = PRIV(ucp_gentype)[chartype] == ucp_L || |
| + PRIV(ucp_gentype)[chartype] == ucp_N || |
| c == CHAR_UNDERSCORE; |
| break; |
| |
| @@ -1541,7 +1541,7 @@ for (;;) |
| if (clen > 0) |
| { |
| BOOL OK; |
| - const ucd_record * prop = GET_UCD(c); |
| + const pcre_uint8 chartype = UCD_CHARTYPE(c); |
| switch(code[2]) |
| { |
| case PT_ANY: |
| @@ -1549,43 +1549,43 @@ for (;;) |
| break; |
| |
| case PT_LAMP: |
| - OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || |
| - prop->chartype == ucp_Lt; |
| + OK = chartype == ucp_Lu || chartype == ucp_Ll || |
| + chartype == ucp_Lt; |
| break; |
| |
| case PT_GC: |
| - OK = PRIV(ucp_gentype)[prop->chartype] == code[3]; |
| + OK = PRIV(ucp_gentype)[chartype] == code[3]; |
| break; |
| |
| case PT_PC: |
| - OK = prop->chartype == code[3]; |
| + OK = chartype == code[3]; |
| break; |
| |
| case PT_SC: |
| - OK = prop->script == code[3]; |
| + OK = UCD_SCRIPT(c) == code[3]; |
| break; |
| |
| /* These are specials for combination cases. */ |
| |
| case PT_ALNUM: |
| - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || |
| - PRIV(ucp_gentype)[prop->chartype] == ucp_N; |
| + OK = PRIV(ucp_gentype)[chartype] == ucp_L || |
| + PRIV(ucp_gentype)[chartype] == ucp_N; |
| break; |
| |
| case PT_SPACE: /* Perl space */ |
| - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z || |
| + OK = PRIV(ucp_gentype)[chartype] == ucp_Z || |
| c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR; |
| break; |
| |
| case PT_PXSPACE: /* POSIX space */ |
| - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z || |
| + OK = PRIV(ucp_gentype)[chartype] == ucp_Z || |
| c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || |
| c == CHAR_FF || c == CHAR_CR; |
| break; |
| |
| case PT_WORD: |
| - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || |
| - PRIV(ucp_gentype)[prop->chartype] == ucp_N || |
| + OK = PRIV(ucp_gentype)[chartype] == ucp_L || |
| + PRIV(ucp_gentype)[chartype] == ucp_N || |
| c == CHAR_UNDERSCORE; |
| break; |
| |
| @@ -1813,7 +1813,7 @@ for (;;) |
| if (clen > 0) |
| { |
| BOOL OK; |
| - const ucd_record * prop = GET_UCD(c); |
| + const pcre_uint8 chartype = UCD_CHARTYPE(c); |
| switch(code[1 + IMM2_SIZE + 1]) |
| { |
| case PT_ANY: |
| @@ -1821,43 +1821,43 @@ for (;;) |
| break; |
| |
| case PT_LAMP: |
| - OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || |
| - prop->chartype == ucp_Lt; |
| + OK = chartype == ucp_Lu || chartype == ucp_Ll || |
| + chartype == ucp_Lt; |
| break; |
| |
| case PT_GC: |
| - OK = PRIV(ucp_gentype)[prop->chartype] == code[1 + IMM2_SIZE + 2]; |
| + OK = PRIV(ucp_gentype)[chartype] == code[1 + IMM2_SIZE + 2]; |
| break; |
| |
| case PT_PC: |
| - OK = prop->chartype == code[1 + IMM2_SIZE + 2]; |
| + OK = chartype == code[1 + IMM2_SIZE + 2]; |
| break; |
| |
| case PT_SC: |
| - OK = prop->script == code[1 + IMM2_SIZE + 2]; |
| + OK = UCD_SCRIPT(c) == code[1 + IMM2_SIZE + 2]; |
| break; |
| |
| /* These are specials for combination cases. */ |
| |
| case PT_ALNUM: |
| - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || |
| - PRIV(ucp_gentype)[prop->chartype] == ucp_N; |
| + OK = PRIV(ucp_gentype)[chartype] == ucp_L || |
| + PRIV(ucp_gentype)[chartype] == ucp_N; |
| break; |
| |
| case PT_SPACE: /* Perl space */ |
| - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z || |
| + OK = PRIV(ucp_gentype)[chartype] == ucp_Z || |
| c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR; |
| break; |
| |
| case PT_PXSPACE: /* POSIX space */ |
| - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z || |
| + OK = PRIV(ucp_gentype)[chartype] == ucp_Z || |
| c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || |
| c == CHAR_FF || c == CHAR_CR; |
| break; |
| |
| case PT_WORD: |
| - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || |
| - PRIV(ucp_gentype)[prop->chartype] == ucp_N || |
| + OK = PRIV(ucp_gentype)[chartype] == ucp_L || |
| + PRIV(ucp_gentype)[chartype] == ucp_N || |
| c == CHAR_UNDERSCORE; |
| break; |
| |
| diff --git a/glib/pcre/pcre_exec.c b/glib/pcre/pcre_exec.c |
| index 830b8b5..c89a3f9 100644 |
| --- a/glib/pcre/pcre_exec.c |
| +++ b/glib/pcre/pcre_exec.c |
| @@ -2565,7 +2565,7 @@ for (;;) |
| } |
| GETCHARINCTEST(c, eptr); |
| { |
| - const ucd_record *prop = GET_UCD(c); |
| + const pcre_uint8 chartype = UCD_CHARTYPE(c); |
| |
| switch(ecode[1]) |
| { |
| @@ -2574,44 +2574,44 @@ for (;;) |
| break; |
| |
| case PT_LAMP: |
| - if ((prop->chartype == ucp_Lu || |
| - prop->chartype == ucp_Ll || |
| - prop->chartype == ucp_Lt) == (op == OP_NOTPROP)) |
| + if ((chartype == ucp_Lu || |
| + chartype == ucp_Ll || |
| + chartype == ucp_Lt) == (op == OP_NOTPROP)) |
| RRETURN(MATCH_NOMATCH); |
| break; |
| |
| case PT_GC: |
| - if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP)) |
| + if ((ecode[2] != PRIV(ucp_gentype)[chartype]) == (op == OP_PROP)) |
| RRETURN(MATCH_NOMATCH); |
| break; |
| |
| case PT_PC: |
| - if ((ecode[2] != prop->chartype) == (op == OP_PROP)) |
| + if ((ecode[2] != chartype) == (op == OP_PROP)) |
| RRETURN(MATCH_NOMATCH); |
| break; |
| |
| case PT_SC: |
| - if ((ecode[2] != prop->script) == (op == OP_PROP)) |
| + if ((ecode[2] != UCD_SCRIPT(c)) == (op == OP_PROP)) |
| RRETURN(MATCH_NOMATCH); |
| break; |
| |
| /* These are specials */ |
| |
| case PT_ALNUM: |
| - if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L || |
| - PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP)) |
| + if ((PRIV(ucp_gentype)[chartype] == ucp_L || |
| + PRIV(ucp_gentype)[chartype] == ucp_N) == (op == OP_NOTPROP)) |
| RRETURN(MATCH_NOMATCH); |
| break; |
| |
| case PT_SPACE: /* Perl space */ |
| - if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z || |
| + if ((PRIV(ucp_gentype)[chartype] == ucp_Z || |
| c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR) |
| == (op == OP_NOTPROP)) |
| RRETURN(MATCH_NOMATCH); |
| break; |
| |
| case PT_PXSPACE: /* POSIX space */ |
| - if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z || |
| + if ((PRIV(ucp_gentype)[chartype] == ucp_Z || |
| c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || |
| c == CHAR_FF || c == CHAR_CR) |
| == (op == OP_NOTPROP)) |
| @@ -2619,8 +2619,8 @@ for (;;) |
| break; |
| |
| case PT_WORD: |
| - if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L || |
| - PRIV(ucp_gentype)[prop->chartype] == ucp_N || |
| + if ((PRIV(ucp_gentype)[chartype] == ucp_L || |
| + PRIV(ucp_gentype)[chartype] == ucp_N || |
| c == CHAR_UNDERSCORE) == (op == OP_NOTPROP)) |
| RRETURN(MATCH_NOMATCH); |
| break; |
| diff --git a/glib/pcre/pcre_internal.h b/glib/pcre/pcre_internal.h |
| index 181c312..234af1b 100644 |
| --- a/glib/pcre/pcre_internal.h |
| +++ b/glib/pcre/pcre_internal.h |
| @@ -2329,15 +2329,12 @@ extern const int PRIV(ucp_typerange)[]; |
| #ifdef SUPPORT_UCP |
| /* UCD access macros */ |
| |
| -#define UCD_BLOCK_SIZE 128 |
| -#define GET_UCD(ch) (PRIV(ucd_records) + \ |
| - PRIV(ucd_stage2)[PRIV(ucd_stage1)[(ch) / UCD_BLOCK_SIZE] * \ |
| - UCD_BLOCK_SIZE + (ch) % UCD_BLOCK_SIZE]) |
| +unsigned int _pcre_ucp_othercase(const unsigned int c); |
| |
| -#define UCD_CHARTYPE(ch) GET_UCD(ch)->chartype |
| -#define UCD_SCRIPT(ch) GET_UCD(ch)->script |
| +#define UCD_CHARTYPE(ch) (pcre_uint8)g_unichar_type((gunichar)(ch)) |
| +#define UCD_SCRIPT(ch) (pcre_uint8)g_unichar_get_script((gunichar)(ch)) |
| #define UCD_CATEGORY(ch) PRIV(ucp_gentype)[UCD_CHARTYPE(ch)] |
| -#define UCD_OTHERCASE(ch) (ch + GET_UCD(ch)->other_case) |
| +#define UCD_OTHERCASE(ch) (_pcre_ucp_othercase(ch)) |
| |
| #endif /* SUPPORT_UCP */ |
| |
| diff --git a/glib/pcre/pcre_tables.c b/glib/pcre/pcre_tables.c |
| index 7ac2d89..e401974 100644 |
| --- a/glib/pcre/pcre_tables.c |
| +++ b/glib/pcre/pcre_tables.c |
| @@ -584,6 +584,22 @@ const ucp_type_table PRIV(utt)[] = { |
| |
| const int PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table); |
| |
| +unsigned int |
| +_pcre_ucp_othercase(const unsigned int c) |
| +{ |
| + int other_case = NOTACHAR; |
| + |
| + if (g_unichar_islower(c)) |
| + other_case = g_unichar_toupper(c); |
| + else if (g_unichar_isupper(c)) |
| + other_case = g_unichar_tolower(c); |
| + |
| + if (other_case == c) |
| + other_case = NOTACHAR; |
| + |
| + return other_case; |
| +} |
| + |
| #endif /* SUPPORT_UTF */ |
| |
| /* End of pcre_tables.c */ |
| diff --git a/glib/pcre/pcre_xclass.c b/glib/pcre/pcre_xclass.c |
| index dca7a39..e5a55d7 100644 |
| --- a/glib/pcre/pcre_xclass.c |
| +++ b/glib/pcre/pcre_xclass.c |
| @@ -127,7 +127,7 @@ while ((t = *data++) != XCL_END) |
| #ifdef SUPPORT_UCP |
| else /* XCL_PROP & XCL_NOTPROP */ |
| { |
| - const ucd_record *prop = GET_UCD(c); |
| + const pcre_uint8 chartype = UCD_CHARTYPE(c); |
| |
| switch(*data) |
| { |
| @@ -136,46 +136,46 @@ while ((t = *data++) != XCL_END) |
| break; |
| |
| case PT_LAMP: |
| - if ((prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || |
| - prop->chartype == ucp_Lt) == (t == XCL_PROP)) return !negated; |
| + if ((chartype == ucp_Lu || chartype == ucp_Ll || |
| + chartype == ucp_Lt) == (t == XCL_PROP)) return !negated; |
| break; |
| |
| case PT_GC: |
| - if ((data[1] == PRIV(ucp_gentype)[prop->chartype]) == (t == XCL_PROP)) |
| + if ((data[1] == PRIV(ucp_gentype)[chartype]) == (t == XCL_PROP)) |
| return !negated; |
| break; |
| |
| case PT_PC: |
| - if ((data[1] == prop->chartype) == (t == XCL_PROP)) return !negated; |
| + if ((data[1] == chartype) == (t == XCL_PROP)) return !negated; |
| break; |
| |
| case PT_SC: |
| - if ((data[1] == prop->script) == (t == XCL_PROP)) return !negated; |
| + if ((data[1] == UCD_SCRIPT(c)) == (t == XCL_PROP)) return !negated; |
| break; |
| |
| case PT_ALNUM: |
| - if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L || |
| - PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (t == XCL_PROP)) |
| + if ((PRIV(ucp_gentype)[chartype] == ucp_L || |
| + PRIV(ucp_gentype)[chartype] == ucp_N) == (t == XCL_PROP)) |
| return !negated; |
| break; |
| |
| case PT_SPACE: /* Perl space */ |
| - if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z || |
| + if ((PRIV(ucp_gentype)[chartype] == ucp_Z || |
| c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR) |
| == (t == XCL_PROP)) |
| return !negated; |
| break; |
| |
| case PT_PXSPACE: /* POSIX space */ |
| - if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z || |
| + if ((PRIV(ucp_gentype)[chartype] == ucp_Z || |
| c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || |
| c == CHAR_FF || c == CHAR_CR) == (t == XCL_PROP)) |
| return !negated; |
| break; |
| |
| case PT_WORD: |
| - if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L || |
| - PRIV(ucp_gentype)[prop->chartype] == ucp_N || c == CHAR_UNDERSCORE) |
| + if ((PRIV(ucp_gentype)[chartype] == ucp_L || |
| + PRIV(ucp_gentype)[chartype] == ucp_N || c == CHAR_UNDERSCORE) |
| == (t == XCL_PROP)) |
| return !negated; |
| break; |
| diff --git a/glib/pcre/ucp.h b/glib/pcre/ucp.h |
| index 59c3bec..53a48c9 100644 |
| --- a/glib/pcre/ucp.h |
| +++ b/glib/pcre/ucp.h |
| @@ -10,6 +10,7 @@ the UCD access macros. New values that are added for new releases of Unicode |
| should always be at the end of each enum, for backwards compatibility. */ |
| |
| /* These are the general character categories. */ |
| +#include "gunicode.h" |
| |
| enum { |
| ucp_C, /* Other */ |
| @@ -24,148 +25,148 @@ enum { |
| /* These are the particular character types. */ |
| |
| enum { |
| - ucp_Cc, /* Control */ |
| - ucp_Cf, /* Format */ |
| - ucp_Cn, /* Unassigned */ |
| - ucp_Co, /* Private use */ |
| - ucp_Cs, /* Surrogate */ |
| - ucp_Ll, /* Lower case letter */ |
| - ucp_Lm, /* Modifier letter */ |
| - ucp_Lo, /* Other letter */ |
| - ucp_Lt, /* Title case letter */ |
| - ucp_Lu, /* Upper case letter */ |
| - ucp_Mc, /* Spacing mark */ |
| - ucp_Me, /* Enclosing mark */ |
| - ucp_Mn, /* Non-spacing mark */ |
| - ucp_Nd, /* Decimal number */ |
| - ucp_Nl, /* Letter number */ |
| - ucp_No, /* Other number */ |
| - ucp_Pc, /* Connector punctuation */ |
| - ucp_Pd, /* Dash punctuation */ |
| - ucp_Pe, /* Close punctuation */ |
| - ucp_Pf, /* Final punctuation */ |
| - ucp_Pi, /* Initial punctuation */ |
| - ucp_Po, /* Other punctuation */ |
| - ucp_Ps, /* Open punctuation */ |
| - ucp_Sc, /* Currency symbol */ |
| - ucp_Sk, /* Modifier symbol */ |
| - ucp_Sm, /* Mathematical symbol */ |
| - ucp_So, /* Other symbol */ |
| - ucp_Zl, /* Line separator */ |
| - ucp_Zp, /* Paragraph separator */ |
| - ucp_Zs /* Space separator */ |
| + ucp_Cc = G_UNICODE_CONTROL, /* Control */ |
| + ucp_Cf = G_UNICODE_FORMAT, /* Format */ |
| + ucp_Cn = G_UNICODE_UNASSIGNED, /* Unassigned */ |
| + ucp_Co = G_UNICODE_PRIVATE_USE, /* Private use */ |
| + ucp_Cs = G_UNICODE_SURROGATE, /* Surrogate */ |
| + ucp_Ll = G_UNICODE_LOWERCASE_LETTER, /* Lower case letter */ |
| + ucp_Lm = G_UNICODE_MODIFIER_LETTER, /* Modifier letter */ |
| + ucp_Lo = G_UNICODE_OTHER_LETTER, /* Other letter */ |
| + ucp_Lt = G_UNICODE_TITLECASE_LETTER, /* Title case letter */ |
| + ucp_Lu = G_UNICODE_UPPERCASE_LETTER, /* Upper case letter */ |
| + ucp_Mc = G_UNICODE_SPACING_MARK, /* Spacing mark */ |
| + ucp_Me = G_UNICODE_ENCLOSING_MARK, /* Enclosing mark */ |
| + ucp_Mn = G_UNICODE_NON_SPACING_MARK, /* Non-spacing mark */ |
| + ucp_Nd = G_UNICODE_DECIMAL_NUMBER, /* Decimal number */ |
| + ucp_Nl = G_UNICODE_LETTER_NUMBER, /* Letter number */ |
| + ucp_No = G_UNICODE_OTHER_NUMBER, /* Other number */ |
| + ucp_Pc = G_UNICODE_CONNECT_PUNCTUATION, /* Connector punctuation */ |
| + ucp_Pd = G_UNICODE_DASH_PUNCTUATION, /* Dash punctuation */ |
| + ucp_Pe = G_UNICODE_CLOSE_PUNCTUATION, /* Close punctuation */ |
| + ucp_Pf = G_UNICODE_FINAL_PUNCTUATION, /* Final punctuation */ |
| + ucp_Pi = G_UNICODE_INITIAL_PUNCTUATION, /* Initial punctuation */ |
| + ucp_Po = G_UNICODE_OTHER_PUNCTUATION, /* Other punctuation */ |
| + ucp_Ps = G_UNICODE_OPEN_PUNCTUATION, /* Open punctuation */ |
| + ucp_Sc = G_UNICODE_CURRENCY_SYMBOL, /* Currency symbol */ |
| + ucp_Sk = G_UNICODE_MODIFIER_SYMBOL, /* Modifier symbol */ |
| + ucp_Sm = G_UNICODE_MATH_SYMBOL, /* Mathematical symbol */ |
| + ucp_So = G_UNICODE_OTHER_SYMBOL, /* Other symbol */ |
| + ucp_Zl = G_UNICODE_LINE_SEPARATOR, /* Line separator */ |
| + ucp_Zp = G_UNICODE_PARAGRAPH_SEPARATOR, /* Paragraph separator */ |
| + ucp_Zs = G_UNICODE_SPACE_SEPARATOR /* Space separator */ |
| }; |
| |
| /* These are the script identifications. */ |
| |
| enum { |
| - ucp_Arabic, |
| - ucp_Armenian, |
| - ucp_Bengali, |
| - ucp_Bopomofo, |
| - ucp_Braille, |
| - ucp_Buginese, |
| - ucp_Buhid, |
| - ucp_Canadian_Aboriginal, |
| - ucp_Cherokee, |
| - ucp_Common, |
| - ucp_Coptic, |
| - ucp_Cypriot, |
| - ucp_Cyrillic, |
| - ucp_Deseret, |
| - ucp_Devanagari, |
| - ucp_Ethiopic, |
| - ucp_Georgian, |
| - ucp_Glagolitic, |
| - ucp_Gothic, |
| - ucp_Greek, |
| - ucp_Gujarati, |
| - ucp_Gurmukhi, |
| - ucp_Han, |
| - ucp_Hangul, |
| - ucp_Hanunoo, |
| - ucp_Hebrew, |
| - ucp_Hiragana, |
| - ucp_Inherited, |
| - ucp_Kannada, |
| - ucp_Katakana, |
| - ucp_Kharoshthi, |
| - ucp_Khmer, |
| - ucp_Lao, |
| - ucp_Latin, |
| - ucp_Limbu, |
| - ucp_Linear_B, |
| - ucp_Malayalam, |
| - ucp_Mongolian, |
| - ucp_Myanmar, |
| - ucp_New_Tai_Lue, |
| - ucp_Ogham, |
| - ucp_Old_Italic, |
| - ucp_Old_Persian, |
| - ucp_Oriya, |
| - ucp_Osmanya, |
| - ucp_Runic, |
| - ucp_Shavian, |
| - ucp_Sinhala, |
| - ucp_Syloti_Nagri, |
| - ucp_Syriac, |
| - ucp_Tagalog, |
| - ucp_Tagbanwa, |
| - ucp_Tai_Le, |
| - ucp_Tamil, |
| - ucp_Telugu, |
| - ucp_Thaana, |
| - ucp_Thai, |
| - ucp_Tibetan, |
| - ucp_Tifinagh, |
| - ucp_Ugaritic, |
| - ucp_Yi, |
| + ucp_Arabic = G_UNICODE_SCRIPT_ARABIC, |
| + ucp_Armenian = G_UNICODE_SCRIPT_ARMENIAN, |
| + ucp_Bengali = G_UNICODE_SCRIPT_BENGALI, |
| + ucp_Bopomofo = G_UNICODE_SCRIPT_BOPOMOFO, |
| + ucp_Braille = G_UNICODE_SCRIPT_BRAILLE, |
| + ucp_Buginese = G_UNICODE_SCRIPT_BUGINESE, |
| + ucp_Buhid = G_UNICODE_SCRIPT_BUHID, |
| + ucp_Canadian_Aboriginal = G_UNICODE_SCRIPT_CANADIAN_ABORIGINAL, |
| + ucp_Cherokee = G_UNICODE_SCRIPT_CHEROKEE, |
| + ucp_Common = G_UNICODE_SCRIPT_COMMON, |
| + ucp_Coptic = G_UNICODE_SCRIPT_COPTIC, |
| + ucp_Cypriot = G_UNICODE_SCRIPT_CYPRIOT, |
| + ucp_Cyrillic = G_UNICODE_SCRIPT_CYRILLIC, |
| + ucp_Deseret = G_UNICODE_SCRIPT_DESERET, |
| + ucp_Devanagari = G_UNICODE_SCRIPT_DEVANAGARI, |
| + ucp_Ethiopic = G_UNICODE_SCRIPT_ETHIOPIC, |
| + ucp_Georgian = G_UNICODE_SCRIPT_GEORGIAN, |
| + ucp_Glagolitic = G_UNICODE_SCRIPT_GLAGOLITIC, |
| + ucp_Gothic = G_UNICODE_SCRIPT_GOTHIC, |
| + ucp_Greek = G_UNICODE_SCRIPT_GREEK, |
| + ucp_Gujarati = G_UNICODE_SCRIPT_GUJARATI, |
| + ucp_Gurmukhi = G_UNICODE_SCRIPT_GURMUKHI, |
| + ucp_Han = G_UNICODE_SCRIPT_HAN, |
| + ucp_Hangul = G_UNICODE_SCRIPT_HANGUL, |
| + ucp_Hanunoo = G_UNICODE_SCRIPT_HANUNOO, |
| + ucp_Hebrew = G_UNICODE_SCRIPT_HEBREW, |
| + ucp_Hiragana = G_UNICODE_SCRIPT_HIRAGANA, |
| + ucp_Inherited = G_UNICODE_SCRIPT_INHERITED, |
| + ucp_Kannada = G_UNICODE_SCRIPT_KANNADA, |
| + ucp_Katakana = G_UNICODE_SCRIPT_KATAKANA, |
| + ucp_Kharoshthi = G_UNICODE_SCRIPT_KHAROSHTHI, |
| + ucp_Khmer = G_UNICODE_SCRIPT_KHMER, |
| + ucp_Lao = G_UNICODE_SCRIPT_LAO, |
| + ucp_Latin = G_UNICODE_SCRIPT_LATIN, |
| + ucp_Limbu = G_UNICODE_SCRIPT_LIMBU, |
| + ucp_Linear_B = G_UNICODE_SCRIPT_LINEAR_B, |
| + ucp_Malayalam = G_UNICODE_SCRIPT_MALAYALAM, |
| + ucp_Mongolian = G_UNICODE_SCRIPT_MONGOLIAN, |
| + ucp_Myanmar = G_UNICODE_SCRIPT_MYANMAR, |
| + ucp_New_Tai_Lue = G_UNICODE_SCRIPT_NEW_TAI_LUE, |
| + ucp_Ogham = G_UNICODE_SCRIPT_OGHAM, |
| + ucp_Old_Italic = G_UNICODE_SCRIPT_OLD_ITALIC, |
| + ucp_Old_Persian = G_UNICODE_SCRIPT_OLD_PERSIAN, |
| + ucp_Oriya = G_UNICODE_SCRIPT_ORIYA, |
| + ucp_Osmanya = G_UNICODE_SCRIPT_OSMANYA, |
| + ucp_Runic = G_UNICODE_SCRIPT_RUNIC, |
| + ucp_Shavian = G_UNICODE_SCRIPT_SHAVIAN, |
| + ucp_Sinhala = G_UNICODE_SCRIPT_SINHALA, |
| + ucp_Syloti_Nagri = G_UNICODE_SCRIPT_SYLOTI_NAGRI, |
| + ucp_Syriac = G_UNICODE_SCRIPT_SYRIAC, |
| + ucp_Tagalog = G_UNICODE_SCRIPT_TAGALOG, |
| + ucp_Tagbanwa = G_UNICODE_SCRIPT_TAGBANWA, |
| + ucp_Tai_Le = G_UNICODE_SCRIPT_TAI_LE, |
| + ucp_Tamil = G_UNICODE_SCRIPT_TAMIL, |
| + ucp_Telugu = G_UNICODE_SCRIPT_TELUGU, |
| + ucp_Thaana = G_UNICODE_SCRIPT_THAANA, |
| + ucp_Thai = G_UNICODE_SCRIPT_THAI, |
| + ucp_Tibetan = G_UNICODE_SCRIPT_TIBETAN, |
| + ucp_Tifinagh = G_UNICODE_SCRIPT_TIFINAGH, |
| + ucp_Ugaritic = G_UNICODE_SCRIPT_UGARITIC, |
| + ucp_Yi = G_UNICODE_SCRIPT_YI, |
| /* New for Unicode 5.0: */ |
| - ucp_Balinese, |
| - ucp_Cuneiform, |
| - ucp_Nko, |
| - ucp_Phags_Pa, |
| - ucp_Phoenician, |
| + ucp_Balinese = G_UNICODE_SCRIPT_BALINESE, |
| + ucp_Cuneiform = G_UNICODE_SCRIPT_CUNEIFORM, |
| + ucp_Nko = G_UNICODE_SCRIPT_NKO, |
| + ucp_Phags_Pa = G_UNICODE_SCRIPT_PHAGS_PA, |
| + ucp_Phoenician = G_UNICODE_SCRIPT_PHOENICIAN, |
| /* New for Unicode 5.1: */ |
| - ucp_Carian, |
| - ucp_Cham, |
| - ucp_Kayah_Li, |
| - ucp_Lepcha, |
| - ucp_Lycian, |
| - ucp_Lydian, |
| - ucp_Ol_Chiki, |
| - ucp_Rejang, |
| - ucp_Saurashtra, |
| - ucp_Sundanese, |
| - ucp_Vai, |
| + ucp_Carian = G_UNICODE_SCRIPT_CARIAN, |
| + ucp_Cham = G_UNICODE_SCRIPT_CHAM, |
| + ucp_Kayah_Li = G_UNICODE_SCRIPT_KAYAH_LI, |
| + ucp_Lepcha = G_UNICODE_SCRIPT_LEPCHA, |
| + ucp_Lycian = G_UNICODE_SCRIPT_LYCIAN, |
| + ucp_Lydian = G_UNICODE_SCRIPT_LYDIAN, |
| + ucp_Ol_Chiki = G_UNICODE_SCRIPT_OL_CHIKI, |
| + ucp_Rejang = G_UNICODE_SCRIPT_REJANG, |
| + ucp_Saurashtra = G_UNICODE_SCRIPT_SAURASHTRA, |
| + ucp_Sundanese = G_UNICODE_SCRIPT_SUNDANESE, |
| + ucp_Vai = G_UNICODE_SCRIPT_VAI, |
| /* New for Unicode 5.2: */ |
| - ucp_Avestan, |
| - ucp_Bamum, |
| - ucp_Egyptian_Hieroglyphs, |
| - ucp_Imperial_Aramaic, |
| - ucp_Inscriptional_Pahlavi, |
| - ucp_Inscriptional_Parthian, |
| - ucp_Javanese, |
| - ucp_Kaithi, |
| - ucp_Lisu, |
| - ucp_Meetei_Mayek, |
| - ucp_Old_South_Arabian, |
| - ucp_Old_Turkic, |
| - ucp_Samaritan, |
| - ucp_Tai_Tham, |
| - ucp_Tai_Viet, |
| + ucp_Avestan = G_UNICODE_SCRIPT_AVESTAN, |
| + ucp_Bamum = G_UNICODE_SCRIPT_BAMUM, |
| + ucp_Egyptian_Hieroglyphs = G_UNICODE_SCRIPT_EGYPTIAN_HIEROGLYPHS, |
| + ucp_Imperial_Aramaic = G_UNICODE_SCRIPT_IMPERIAL_ARAMAIC, |
| + ucp_Inscriptional_Pahlavi = G_UNICODE_SCRIPT_INSCRIPTIONAL_PAHLAVI, |
| + ucp_Inscriptional_Parthian = G_UNICODE_SCRIPT_INSCRIPTIONAL_PARTHIAN, |
| + ucp_Javanese = G_UNICODE_SCRIPT_JAVANESE, |
| + ucp_Kaithi = G_UNICODE_SCRIPT_KAITHI, |
| + ucp_Lisu = G_UNICODE_SCRIPT_LISU, |
| + ucp_Meetei_Mayek = G_UNICODE_SCRIPT_MEETEI_MAYEK, |
| + ucp_Old_South_Arabian = G_UNICODE_SCRIPT_OLD_SOUTH_ARABIAN, |
| + ucp_Old_Turkic = G_UNICODE_SCRIPT_OLD_TURKIC, |
| + ucp_Samaritan = G_UNICODE_SCRIPT_SAMARITAN, |
| + ucp_Tai_Tham = G_UNICODE_SCRIPT_TAI_THAM, |
| + ucp_Tai_Viet = G_UNICODE_SCRIPT_TAI_VIET, |
| /* New for Unicode 6.0.0: */ |
| - ucp_Batak, |
| - ucp_Brahmi, |
| - ucp_Mandaic, |
| + ucp_Batak = G_UNICODE_SCRIPT_BATAK, |
| + ucp_Brahmi = G_UNICODE_SCRIPT_BRAHMI, |
| + ucp_Mandaic = G_UNICODE_SCRIPT_MANDAIC, |
| /* New for Unicode 6.1.0: */ |
| - ucp_Chakma, |
| - ucp_Meroitic_Cursive, |
| - ucp_Meroitic_Hieroglyphs, |
| - ucp_Miao, |
| - ucp_Sharada, |
| - ucp_Sora_Sompeng, |
| - ucp_Takri |
| + ucp_Chakma = G_UNICODE_SCRIPT_CHAKMA, |
| + ucp_Meroitic_Cursive = G_UNICODE_SCRIPT_MEROITIC_CURSIVE, |
| + ucp_Meroitic_Hieroglyphs = G_UNICODE_SCRIPT_MEROITIC_HIEROGLYPHS, |
| + ucp_Miao = G_UNICODE_SCRIPT_MIAO, |
| + ucp_Sharada = G_UNICODE_SCRIPT_SHARADA, |
| + ucp_Sora_Sompeng = G_UNICODE_SCRIPT_SORA_SOMPENG, |
| + ucp_Takri = G_UNICODE_SCRIPT_TAKRI, |
| }; |
| |
| #endif |
| -- |
| 1.7.5.1.217.g4e3aa.dirty |
| |