blob: 1c2eef1d27f6b82989b4489d0fc5ceeedf69ae50 [file] [log] [blame]
/* GLib
* unicode-data.c: Test Unicode character data
*
* Copyright (C) 2019 Red Hat, Inc
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 02111-1307, USA.
*/
#include "config.h"
#include <glib.h>
#include <string.h>
#include <locale.h>
#ifndef G_OS_WIN32
#include <unistd.h>
#endif
#include "test-common.h"
static const char *
char_type (GUnicodeType t)
{
const char *names[] = {
"Cc", "Cf", "Cn", "Co", "Cs", "Ll", "Lm", "Lo", "Lt",
"Lu", "Mc", "Me", "Mn", "Nd", "Nl", "No", "Pc", "Pd",
"Pe", "Pf", "Pi", "Po", "Ps", "Sc", "Sk", "Sm", "So",
"Zl", "Zp", "Zs"
};
return names[t];
}
static const char *
break_type (GUnicodeBreakType t)
{
const char *names[] = {
"BK", "CR", "LF", "CM", "SG", "ZW", "IN", "GL", "CB",
"SP", "BA", "BB", "B2", "HY", "NS", "OP", "CL", "QU",
"EX", "ID", "NU", "IS", "SY", "AL", "PR", "PO", "SA",
"AI", "XX", "NL", "WJ", "JL", "JV", "JT", "H2", "H3",
"CP", "CJ", "HL", "RI", "EB", "EM", "ZWJ"
};
return names[t];
}
static const char *
script_name (GUnicodeScript s)
{
const char *names[] = {
"Zyyy", "Zinh", "Arab", "Armn", "Beng", "Bopo", "Cher",
"Copt", "Cyrl", "Dsrt", "Deva", "Ethi", "Geor", "Goth",
"Grek", "Gujr", "Guru", "Hani", "Hang", "Hebr", "Hira",
"Knda", "Kana", "Khmr", "Laoo", "Latn", "Mlym", "Mong",
"Mymr", "Ogam", "Ital", "Orya", "Runr", "Sinh", "Syrc",
"Taml", "Telu", "Thaa", "Thai", "Tibt", "Cans", "Yiii",
"Tglg", "Hano", "Buhd", "Tagb", "Brai", "Cprt", "Limb",
"Osma", "Shaw", "Linb", "Tale", "Ugar", "Talu", "Bugi",
"Glag", "Tfng", "Sylo", "Xpeo", "Khar", "Zzzz", "Bali",
"Xsux", "Phnx", "Phag", "Nkoo", "Kali", "Lepc", "Rjng",
"Sund", "Saur", "Cham", "Olck", "Vaii", "Cari", "Lyci",
"Lydi", "Avst", "Bamu", "Egyp", "Armi", "Phli", "Prti",
"Java", "Kthi", "Lisu", "Mtei", "Sarb", "Orkh", "Samr",
"Lana", "Tavt", "Batk", "Brah", "Mand", "Cakm", "Merc",
"Mero", "Plrd", "Shrd", "Sora", "Takr", "Bass", "Aghb",
"Dupl", "Elba", "Gran", "Khoj", "Sind", "Lina", "Mahj",
"Mani", "Mend", "Modi", "Mroo", "Nbat", "Narb", "Perm",
"Hmng", "Palm", "Pauc", "Phlp", "Sidd", "Tirh", "Wara",
"Ahom", "Hluw", "Hatr", "Mult", "Hung", "Sgnw", "Adlm",
"Bhks", "Marc", "Newa", "Osge", "Tang", "Gonm", "Nshu",
"Soyo", "Zanb", "Dogr", "Gong", "Rohg", "Maka", "Medf",
"Sogo", "Sogd", "Elym", "Nand", "Rohg", "Wcho"
};
return names[s];
}
static void
test_file (const char *filename, GString *string)
{
char *contents;
gsize length;
GError *error = NULL;
char *p;
GString *s1, *s2, *s3;
GUnicodeScript prev_script = -1;
int m;
if (!g_file_get_contents (filename, &contents, &length, &error))
{
g_error ("%s", error->message);
g_error_free (error);
return;
}
g_string_append (string, "Text: ");
s1 = g_string_new ("Char type: ");
s2 = g_string_new ("Break type: ");
s3 = g_string_new ("Script: ");
m = MAX (MAX (s1->len, s2->len), s3->len);
g_string_append_printf (s1, "%*s", (int)(m - s1->len), "");
g_string_append_printf (s2, "%*s", (int)(m - s2->len), "");
g_string_append_printf (s3, "%*s", (int)(m - s3->len), "");
g_string_append_printf (string, "%*s", (int)(m - strlen ("Text: ")), "");
for (p = contents; *p; p = g_utf8_next_char (p))
{
gunichar ch = g_utf8_get_char (p);
const char *ctype = char_type (g_unichar_type (ch));
const char *btype = break_type (g_unichar_break_type (ch));
GUnicodeScript script = g_unichar_get_script (ch);
int c = strlen (ctype);
int b = strlen (btype);
int s = 0;
int t = 0;
g_string_append_printf (s1, "%s", ctype);
g_string_append_printf (s2, "%s", btype);
if (prev_script != script)
{
const char *str = script_name (script);
prev_script = script;
g_string_append (s3, str);
s = strlen (str);
}
if (ch == 0x20)
{
g_string_append (string, "[ ]");
t = 3;
}
else if (g_unichar_isgraph (ch) &&
(ch != 0x2028) &&
(ch != 0x2029))
{
g_string_append_unichar (string, ch);
t = 1;
}
else
{
char *str = g_strdup_printf ("[%#04x]", ch);
g_string_append (string, str);
t = strlen (str);
g_free (str);
}
m = MAX (t, MAX (MAX (c + 1, b + 1), s + 1));
g_string_append_printf (string, "%*s", m - t, "");
g_string_append_printf (s1, "%*s", m - c, "");
g_string_append_printf (s2, "%*s", m - b, "");
g_string_append_printf (s3, "%*s", m - s, "");
}
g_string_append (string, "\n");
g_string_append_len (string, s1->str, s1->len);
g_string_append (string, "\n");
g_string_append_len (string, s2->str, s2->len);
g_string_append (string, "\n");
g_string_append_len (string, s3->str, s3->len);
g_string_append (string, "\n");
g_string_free (s1, TRUE);
g_string_free (s2, TRUE);
g_string_free (s3, TRUE);
g_free (contents);
}
static gchar *
get_expected_filename (const gchar *filename)
{
gchar *f, *p, *expected;
f = g_strdup (filename);
p = strstr (f, ".chars");
if (p)
*p = 0;
expected = g_strconcat (f, ".expected", NULL);
g_free (f);
return expected;
}
static void
test_break (gconstpointer d)
{
const char *filename = d;
char *expected_file;
GError *error = NULL;
GString *dump;
char *diff = NULL;
expected_file = get_expected_filename (filename);
dump = g_string_sized_new (0);
test_file (filename, dump);
diff = diff_with_file (expected_file, dump->str, dump->len, &error);
g_assert_no_error (error);
if (diff && diff[0])
{
g_printerr ("Contents don't match expected contents:\n%s", diff);
g_test_fail ();
g_free (diff);
}
g_string_free (dump, TRUE);
g_free (expected_file);
}
int
main (int argc, char *argv[])
{
GDir *dir;
GError *error = NULL;
const gchar *name;
gchar *path;
g_setenv ("LC_ALL", "en_US.UTF-8", TRUE);
setlocale (LC_ALL, "");
g_test_init (&argc, &argv, NULL);
/* allow to easily generate expected output for new test cases */
if (argc > 1)
{
GString *string;
string = g_string_sized_new (0);
test_file (argv[1], string);
g_print ("%s", string->str);
return 0;
}
path = g_test_build_filename (G_TEST_DIST, "chars", NULL);
dir = g_dir_open (path, 0, &error);
g_free (path);
g_assert_no_error (error);
while ((name = g_dir_read_name (dir)) != NULL)
{
if (!strstr (name, "chars"))
continue;
path = g_strdup_printf ("/chars/%s", name);
g_test_add_data_func_full (path, g_test_build_filename (G_TEST_DIST, "chars", name, NULL),
test_break, g_free);
g_free (path);
}
g_dir_close (dir);
return g_test_run ();
}