| #!/usr/bin/perl -w |
| # |
| # Script to convert http://www.unicode.org/Public/UNIDATA/Scripts.txt |
| # into a machine-readable table. |
| # |
| ###################################################################### |
| |
| if (@ARGV != 1) { |
| die "Usage: gen-script-table.pl Scripts.txt > gscripttable.h\n"; |
| } |
| |
| open IN, $ARGV[0] || die "Cannot open $ARGV[0]: $!\n"; |
| |
| my @ranges; |
| my $file; |
| my $easy_range; |
| my $i; |
| my $start; |
| my $end; |
| my $script; |
| |
| |
| while (<IN>) { |
| if (/^\#\s+(Scripts-.*.txt)/) { |
| $file = $1; |
| } |
| |
| s/#.*//; |
| next if /^\s*$/; |
| if (!/^([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s*;\s*([A-Za-z_]+)\s*$/) { |
| die "Cannot parse line: '$_'\n"; |
| } |
| |
| if (defined $2) { |
| push @ranges, [ hex $1, hex $2, uc $3 ]; |
| } else { |
| push @ranges, [ hex $1, hex $1, uc $3 ]; |
| } |
| } |
| |
| @ranges = sort { $a->[0] <=> $b->[0] } @ranges; |
| $date = gmtime; |
| |
| print <<"EOT"; |
| /* gscripttable.h: Generated by gen-script-table.pl |
| * |
| * Date: $date |
| * Source: $file |
| * |
| * Do not edit. |
| */ |
| |
| EOT |
| |
| $easy_range = 0x2000; |
| |
| print <<"EOT"; |
| #define G_EASY_SCRIPTS_RANGE $easy_range |
| |
| static const guchar g_script_easy_table[$easy_range] = { |
| EOT |
| |
| $i = 0; |
| $end = -1; |
| |
| for (my $c = 0; $c < $easy_range; $c++) { |
| |
| if ($c % 3 == 0) { |
| printf "\n "; |
| } |
| |
| if ($c > $end) { |
| $start = $ranges[$i]->[0]; |
| $end = $ranges[$i]->[1]; |
| $script = $ranges[$i]->[2]; |
| $i++; |
| } |
| |
| if ($c < $start) { |
| printf " G_UNICODE_SCRIPT_UNKNOWN,"; |
| } else { |
| printf " G_UNICODE_SCRIPT_%s,", $script; |
| } |
| } |
| |
| if ($end >= $easy_range) { |
| $i--; |
| $ranges[$i]->[0] = $easy_range; |
| } |
| |
| |
| print <<"EOT"; |
| |
| }; |
| |
| static const struct { |
| gunichar start; |
| guint16 chars; |
| guint16 script; |
| } g_script_table[] = { |
| EOT |
| |
| for (; $i <= $#ranges; $i++) { |
| $start = $ranges[$i]->[0]; |
| $end = $ranges[$i]->[1]; |
| $script = $ranges[$i]->[2]; |
| |
| while ($i <= $#ranges - 1 && |
| $ranges[$i + 1]->[0] == $end + 1 && |
| $ranges[$i + 1]->[2] eq $script) { |
| $i++; |
| $end = $ranges[$i]->[1]; |
| } |
| |
| printf " { %#06x, %5d, G_UNICODE_SCRIPT_%s },\n", $start, $end - $start + 1, $script; |
| } |
| |
| printf "};\n"; |
| |