| /* decomp.c - Character decomposition. |
| * |
| * Copyright (C) 1999, 2000 Tom Tromey |
| * Copyright 2000 Red Hat, Inc. |
| * |
| * The Gnome Library is free software; you can redistribute it and/or |
| * modify it under the terms of the GNU Lesser General Public License as |
| * published by the Free Software Foundation; either version 2 of the |
| * License, or (at your option) any later version. |
| * |
| * The Gnome Library is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| * Lesser General Public License for more details. |
| * |
| * You should have received a copy of the GNU Lesser General Public |
| * License along with the Gnome Library; see the file COPYING.LIB. If not, |
| * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
| * Boston, MA 02111-1307, USA. |
| */ |
| |
| #include "glib.h" |
| #include "gunidecomp.h" |
| |
| #include <config.h> |
| |
| #include <stdlib.h> |
| |
| /* We cheat a bit and cast type values to (char *). We detect these |
| using the &0xff trick. */ |
| #define CC(Page, Char) \ |
| ((((GPOINTER_TO_INT(combining_class_table[Page])) & 0xff) \ |
| == GPOINTER_TO_INT(combining_class_table[Page])) \ |
| ? GPOINTER_TO_INT(combining_class_table[Page]) \ |
| : (combining_class_table[Page][Char])) |
| |
| #define COMBINING_CLASS(Char) \ |
| (((Char) > (G_UNICODE_LAST_CHAR)) ? 0 : CC((Char) >> 8, (Char) & 0xff)) |
| |
| /* Compute the canonical ordering of a string in-place. */ |
| void |
| g_unicode_canonical_ordering (gunichar *string, |
| size_t len) |
| { |
| size_t i; |
| int swap = 1; |
| |
| while (swap) |
| { |
| int last; |
| swap = 0; |
| last = COMBINING_CLASS (string[0]); |
| for (i = 0; i < len - 1; ++i) |
| { |
| int next = COMBINING_CLASS (string[i + 1]); |
| if (next != 0 && last > next) |
| { |
| size_t j; |
| /* Percolate item leftward through string. */ |
| for (j = i; j > 0; --j) |
| { |
| gunichar t; |
| if (COMBINING_CLASS (string[j]) <= next) |
| break; |
| t = string[j + 1]; |
| string[j + 1] = string[j]; |
| string[j] = t; |
| swap = 1; |
| } |
| /* We're re-entering the loop looking at the old |
| character again. */ |
| next = last; |
| } |
| last = next; |
| } |
| } |
| } |
| |
| gunichar * |
| g_unicode_canonical_decomposition (gunichar ch, |
| size_t *result_len) |
| { |
| gunichar *r = NULL; |
| |
| if (ch <= 0xffff) |
| { |
| int start = 0; |
| int end = G_N_ELEMENTS (decomp_table); |
| while (start != end) |
| { |
| int half = (start + end) / 2; |
| if (ch == decomp_table[half].ch) |
| { |
| /* Found it. */ |
| int i, len; |
| /* We store as a double-nul terminated string. */ |
| for (len = 0; (decomp_table[half].expansion[len] |
| || decomp_table[half].expansion[len + 1]); |
| len += 2) |
| ; |
| |
| /* We've counted twice as many bytes as there are |
| characters. */ |
| *result_len = len / 2; |
| r = malloc (len / 2 * sizeof (gunichar)); |
| |
| for (i = 0; i < len; i += 2) |
| { |
| r[i / 2] = (decomp_table[half].expansion[i] << 8 |
| | decomp_table[half].expansion[i + 1]); |
| } |
| break; |
| } |
| else if (ch > decomp_table[half].ch) |
| start = half; |
| else |
| end = half; |
| } |
| } |
| |
| if (r == NULL) |
| { |
| /* Not in our table. */ |
| r = malloc (sizeof (gunichar)); |
| *r = ch; |
| *result_len = 1; |
| } |
| |
| /* Supposedly following the Unicode 2.1.9 table means that the |
| decompositions come out in canonical order. I haven't tested |
| this, but we rely on it here. */ |
| return r; |
| } |