charset/source/ucmtools/rptp2ucm.cpp - third_party/github.com/unicode-org/icu-data - Git at Google

 // © 2016 and later: Unicode, Inc. and others.
 // License & terms of use: http://www.unicode.org/copyright.html
 /*
 *******************************************************************************
 *
 *   Copyright (C) 2000-2009, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
 *   file name:  rptp2ucm.c
 *   encoding:   US-ASCII
 *   tab size:   8 (not used)
 *   indentation:4
 *
 *   created on: 2001feb16
 *   created by: Markus W. Scherer
 *
 *   This tool reads two CDRA conversion table files (RPMAP & TPMAP or RXMAP and TXMAP) and
 *   generates a canonicalized ICU .ucm file from them.
 *   If the RPMAP/RXMAP file does not contain a comment line with the substitution character,
 *   then this tool also attempts to read the header of the corresponding UPMAP/UXMAP file
 *   to extract subchar and subchar1.
 *
 *   R*MAP: Unicode->codepage
 *   T*MAP: codepage->Unicode
 *
 *   Starting 2003oct25, rptp2ucm handles m:n mappings as well, but requires
 *   a more elaborate build using the ICU common (icuuc) and toolutil libraries.
 *   On Windows (on one line):
 *
 *   cl -nologo -MD
 *      -I..\..\..\..\icu\source\common
 *      -I..\..\..\..\icu\source\tools\toolutil
 *      rptp2ucm.c -link /LIBPATH:..\..\..\..\icu\lib icuuc.lib icutu.lib
 */

 #include "unicode/utypes.h"
 #include "unicode/ustring.h"
 #include "rptp_map.h"
 #include "cmemory.h"
 #include "cstring.h"
 #include "ucnv_ext.h"
 #include "ucm.h"
 #include "uparse.h"
 #include "uoptions.h"

 #include <stdio.h>
 #include <time.h>

 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))

 typedef const char *TMAParray[4];

 typedef struct RMAPtoTMAP {
     const uint16_t ccsid;
     const char *RMAP;
     const TMAParray TMAP;
 } RMAPtoTMAP;

 /* This table is here because the .package files are not consistently machine parseable. */
 /* Also not all package files exist for all combinations. */
 /* TODO: I wish there was a less manual process to get the mapping table information. */
 static const RMAPtoTMAP
 knownRMAPtoTMAP[] = {
     {0x0112, "RPMAP100", {"TPMAP100", NULL, NULL, NULL}},
     {0x011E, "RPMAP100", {"TPMAP100", NULL, NULL, NULL}},
     {0x01A4, "RXMAP110", {"TXMAP110", NULL, NULL, NULL}},
     {0x01A4, "RXMAP120", {"TXMAP110", NULL, NULL, NULL}},
     {0x01A9, "RPMAP101", {"TPMAP101", NULL, NULL, NULL}},
     {0x0360, "RXMAP110", {"TXMAP110", NULL, NULL, NULL}},
     {0x0391, "RPMAP100", {"TPMAP100", NULL, NULL, NULL}},
     {0x039E, "RPMAP100", {"TPMAP100", NULL, NULL, NULL}},
     {0x03A2, "RPMAP120", {"TPMAP110", "TPMAP12A", NULL, NULL}},
     {0x03A2, "RXMAP120", {"TXMAP110", NULL, NULL, NULL}},
     {0x03A3, "RPMAP120", {"TPMAP110", NULL, NULL, NULL}},
     {0x03A3, "RXMAP120", {"TXMAP110", NULL, NULL, NULL}},
     {0x03A4, "RPMAP120", {"TPMAP110", NULL, NULL, NULL}},
     {0x03A4, "RPMAP12A", {"TPMAP11A", "TPMAP12A", NULL, NULL}},
     {0x03A5, "RPMAP110", {"TPMAP100", NULL, NULL, NULL}},
     {0x03A5, "RXMAP110", {"TXMAP100", NULL, NULL, NULL}},
     {0x03A7, "RPMAP110", {"TPMAP100", NULL, NULL, NULL}},
     {0x03A7, "RXMAP110", {"TXMAP100", NULL, NULL, NULL}},
     {0x03A9, "RPMAP110", {"TPMAP100", NULL, NULL, NULL}},
     {0x03A9, "RXMAP110", {"TXMAP100", NULL, NULL, NULL}},
     {0x03AB, "RPMAP120", {"TPMAP110", "TPMAP12A", NULL, NULL}},
     {0x03AB, "RXMAP120", {"TXMAP110", NULL, NULL, NULL}},
     {0x03AD, "RPMAP130", {"TPMAP120", NULL, NULL, NULL}},
     {0x03AD, "RPMAP13A", {"TPMAP12A", NULL, NULL, NULL}},
     {0x03AE, "RPMAP120", {"TPMAP110", NULL, NULL, NULL}},
     {0x03AE, "RPMAP12A", {"TPMAP11A", "TPMAP12A", NULL, NULL}},
     {0x03AF, "RPMAP130", {"TPMAP120", NULL, NULL, NULL}},
     {0x03AF, "RPMAP14A", {"TPMAP13A", NULL, NULL, NULL}},
     {0x03AF, "RPMAP15A", {"TPMAP14A", NULL, NULL, NULL}},
     {0x03B4, "RPMAP110", {"TPMAP100", NULL, NULL, NULL}},
     {0x03B4, "RXMAP110", {"TXMAP100", NULL, NULL, NULL}},
     {0x03B5, "RPMAP110", {"TPMAP100", NULL, NULL, NULL}},
     {0x03B5, "RPMAP11A", {"TPMAP10A", NULL, NULL, NULL}},
     {0x03B5, "RXMAP110", {"TXMAP100", NULL, NULL, NULL}},
     {0x03B6, "RPMAP110", {"TPMAP100", NULL, NULL, NULL}},
     {0x03B6, "RXMAP110", {"TXMAP100", NULL, NULL, NULL}},
     {0x03B9, "RPMAP100", {"TPMAP100", NULL, NULL, NULL}},
     {0x03BA, "RPMAP101", {"TPMAP101", NULL, NULL, NULL}},
     {0x03C0, "RPMAP100", {"TPMAP100", NULL, NULL, NULL}},
     {0x03C4, "RPMAP110", {"TPMAP100", NULL, NULL, NULL}},
     {0x03C4, "RXMAP110", {"TXMAP100", NULL, NULL, NULL}},
     {0x03CA, "RPMAP110", {"TPMAP100", "TPMAP110", NULL, NULL}},
     {0x03FC, "RPMAP100", {"TPMAP100", NULL, NULL, NULL}},
     {0x03FD, "RPMAP100", {"TPMAP100", NULL, NULL, NULL}},
     {0x03FF, "RPMAP100", {"TPMAP100", NULL, NULL, NULL}},
     {0x044C, "RPMAP100", {"TPMAP100", NULL, NULL, NULL}},
     {0x044D, "RPMAP100", {"TPMAP100", NULL, NULL, NULL}},
     {0x044E, "RPMAP100", {"TPMAP100", NULL, NULL, NULL}},
     {0x044F, "RPMAP100", {"TPMAP100", NULL, NULL, NULL}},
     {0x0450, "RPMAP100", {"TPMAP100", NULL, NULL, NULL}},
     {0x0451, "RPMAP100", {"TPMAP100", NULL, NULL, NULL}},
     {0x0452, "RPMAP100", {"TPMAP100", NULL, NULL, NULL}},
     {0x0453, "RPMAP100", {"TPMAP100", NULL, NULL, NULL}},
     {0x0471, "RPMAP100", {"TPMAP100", NULL, NULL, NULL}},
     {0x0471, "RPMAPMOD", {"TPMAP100", NULL, NULL, NULL}},
     {0x048B, "RPMAP100", {"TPMAP100", NULL, NULL, NULL}},
     {0x048D, "RPMAP101", {"TPMAP101", NULL, NULL, NULL}},
     {0x048E, "RPMAP100", {"TPMAP100", NULL, NULL, NULL}},
     {0x048F, "RPMAP100", {"TPMAP100", NULL, NULL, NULL}},
     {0x0490, "RPMAP100", {"TPMAP100", NULL, NULL, NULL}},
     {0x0561, "RPMAP100", {"TPMAP100", NULL, NULL, NULL}},
     {0x0565, "RPMAP110", {"TPMAP100", NULL, NULL, NULL}},
     {0x0565, "RXMAP110", {"TXMAP100", NULL, NULL, NULL}},
     {0x0567, "RPMAP110", {"TPMAP100", NULL, NULL, NULL}},
     {0x0567, "RXMAP110", {"TXMAP100", NULL, NULL, NULL}},
     {0x056A, "RPMAP100", {"TPMAP100", NULL, NULL, NULL}},
     {0x056A, "RPMAP110", {"TPMAP100", NULL, NULL, NULL}},
     {0x1328, "RPMAP101", {"TPMAP101", NULL, NULL, NULL}},
     {0x1345, "RPMAP100", {"TPMAP100", NULL, NULL, NULL}},
     {0x1350, "RPMAP101", {"TPMAP101", NULL, NULL, NULL}},
     {0x1351, "RPMAP101", {"TPMAP101", NULL, NULL, NULL}},
     {0x135A, "RPMAP101", {"TPMAP101", NULL, NULL, NULL}},
     {0x135B, "RPMAP101", {"TPMAP101", NULL, NULL, NULL}},
     {0x135C, "RPMAP101", {"TPMAP101", NULL, NULL, NULL}},
     {0x135D, "RPMAP101", {"TPMAP101", NULL, NULL, NULL}},
     {0x135E, "RPMAP101", {"TPMAP101", NULL, NULL, NULL}},
     {0x135F, "RPMAP101", {"TPMAP101", NULL, NULL, NULL}},
     {0x1361, "RPMAP101", {"TPMAP101", NULL, NULL, NULL}},
     {0x1362, "RPMAP101", {"TPMAP101", NULL, NULL, NULL}},
     {0x1363, "RPMAP101", {"TPMAP101", NULL, NULL, NULL}},
     {0x13A2, "RPMAP120", {"TPMAP110", NULL, NULL, NULL}},
     {0x13A2, "RXMAP120", {"TXMAP110", NULL, NULL, NULL}},
     {0x13AB, "RPMAP120", {"TPMAP110", "TPMAP12A", NULL, NULL}},// package is missing. Is this correct?
     {0x13AB, "RXMAP120", {"TXMAP110", NULL, NULL, NULL}},// package is missing. Is this correct?
     {0x13BA, "RPMAP120", {"TPMAP110", NULL, NULL, NULL}},// package text is garbled. Is this correct?
     {0x13BA, "RPMAP12A", {"TPMAP11A", NULL, NULL, NULL}},// package text is garbled. Is this correct?
     {0x155F, "RPMAP100", {"TPMAP100", NULL, NULL, NULL}},
     {0x1561, "RPMAP100", {"TPMAP100", NULL, NULL, NULL}},
     {0x21A4, "RPMAP100", {"TPMAP100", NULL, NULL, NULL}},
     {0x21A4, "RXMAP110", {"TXMAP110", NULL, NULL, NULL}},
     {0x2352, "RPMAP101", {"TPMAP101", NULL, NULL, NULL}},
     {0x2368, "RPMAP101", {"TPMAP101", NULL, NULL, NULL}},
     {0x245A, "RPMAP101", {"TPMAP101", NULL, NULL, NULL}},
     {0x256C, "RPMAP110", {"TPMAP100", NULL, NULL, NULL}},
     {0x3344, "RPMAP100", {"TPMAP100", NULL, NULL, NULL}},
     {0x3344, "RPMAP10A", {"TPMAP100", NULL, NULL, NULL}},
     {0x3345, "RPMAP100", {"TPMAP100", NULL, NULL, NULL}},
     {0x3354, "RPMAP101", {"TPMAP101", NULL, NULL, NULL}},
     {0x3357, "RPMAP101", {"TPMAP101", NULL, NULL, NULL}},
     {0x3359, "RPMAP101", {"TPMAP101", NULL, NULL, NULL}},
     {0x3364, "RPMAP101", {"TPMAP101", NULL, NULL, NULL}},
     {0x3365, "RPMAP101", {"TPMAP101", NULL, NULL, NULL}},
     {0x336A, "RPMAP101", {"TPMAP101", NULL, NULL, NULL}},
     {0x4345, "RPMAP100", {"TPMAP100", NULL, NULL, NULL}},
     {0x4358, "RPMAP101", {"TPMAP101", NULL, NULL, NULL}},
     {0x5360, "RPMAP101", {"TPMAP101", NULL, NULL, NULL}},
     {0x8122, "RPMAP100", {"TPMAP100", NULL, NULL, NULL}},
     {0x83BA, "RPMAP120", {"TPMAP110", NULL, NULL, NULL}},// package is missing. Is this correct?
     {0x83BA, "RPMAP12A", {"TPMAP11A", "TPMAP12A", NULL, NULL}},// package is missing. Is this correct?
     {0xD1B5, "RPMAP101", {"TPMAP101", NULL, NULL, NULL}},
     {0xD3AF, "RPMAP100", {"TPMAP100", NULL, NULL, NULL}}
 };

 typedef struct UCMSubchar {
     const uint16_t ccsid;
     uint32_t subchar, subchar1;
 } UCMSubchar;

 /* This is here because the U?MAP??? file goes by a significantly different name from the R?MAP??? file. */
 static const UCMSubchar
 knownSubchars[]={
     274, 0x3f, 0,
     913, 0x1a, 0,
     1047, 0x3f, 0,
     1114, 0x1A, 0,
     1137, 0x3F, 0,
     1166, 0x3F, 0,
     1167, 0x1A, 0,
     1168, 0x1A, 0,
     8612, 0x3f, 0,
     9444, 0x1A, 0,
     9447, 0x1A, 0,
     9449, 0x1A, 0
 };

 typedef struct CCSIDStateTable {
     uint16_t ccsid;
     uint16_t unicode; /* 0 means any unicode version. */
     const char *table;
 } CCSIDStateTable;

 #define Big5DBCSStates \
     "<icu:state>                   81-fe:1\n" \
     "<icu:state>                   40-7e, 80-fe\n"

 #define Big5MBCSStates \
     "<icu:state>                   0-7f, 81-fe:1\n" \
     "<icu:state>                   40-7e, 80-fe\n"

 #define japanesePCDBCSStates \
     "<icu:state>                   0-80:2, 81-fc:1, fd-ff:2\n" \
     "<icu:state>                   40-7e, 80-fc\n" \
     "<icu:state>\n"

 #define states1390 \
     "# includes mappings for surrogate pairs\n" \
     "<icu:state>                   0-ff, e:1.s, f:0.s\n" \
     "<icu:state>                   initial, 0-3f:4, e:1.s, f:0.s, 40:3, 41-fe:2, ff:4, b3-b7:5\n" \
     "<icu:state>                   0-40:1.i, 41-fe:1., ff:1.i\n" \
     "<icu:state>                   0-ff:1.i, 40:1.\n" \
     "<icu:state>                   0-ff:1.i\n" \
     "<icu:state>                   0-40:1.i, 41-fe:1.p, ff:1.i\n"

 #define states16684 \
     "# includes mappings for surrogate pairs\n" \
     "<icu:state>                   0-3f:3, 40:2, 41-fe:1, ff:3, b3-b7:4\n" \
     "<icu:state>                   41-fe\n" \
     "<icu:state>                   40\n" \
     "<icu:state>                   \n" \
     "<icu:state>                   41-fe.p\n"

 static const CCSIDStateTable
 knownStateTables[]={

     301,0,"<icu:state>                   0-80:2, 81-9f:1, a0-df:2, e0-fc:1, fd-ff:2\n"
           "<icu:state>                   40-7e, 80-fc\n"
           "<icu:state>\n",

     367,0,"<icu:state>                   0-7f\n",

     927,0,japanesePCDBCSStates,

     926,0,japanesePCDBCSStates,

     928,0,japanesePCDBCSStates,

     932,0,"<icu:state>                   0-80, 81-9f:1, a0-df, e0-fc:1, fd-ff\n"
           "<icu:state>                   40-7e, 80-fc\n",

     941,0,japanesePCDBCSStates,

     942,0, "<icu:state>                   0-80, 81-9f:1, a0-df, e0-fc:1, fd-ff\n"
            "<icu:state>                   40-7e, 80-fc\n",

     943,0, "<icu:state>                   0-7f, 81-9f:1, a0-df, e0-fc:1\n"
            "<icu:state>                   40-7e, 80-fc\n",

     944,0, "<icu:state>                   0-80, 81-bf:1, c0-ff\n"
            "<icu:state>                   40-7e, 80-fe\n",

     946,0, "<icu:state>                   0-80, 81-fb:1,fc:2,fd-ff\n"
            "<icu:state>                   40-7e, 80-fe\n"
            "<icu:state>                   80-fe.u,fc",

     947,0, Big5DBCSStates,

     948,0, "<icu:state>                   0-80, 81-fb:1,fc:2,fd-fe\n"
            "<icu:state>                   40-7e, 80-fe\n"
            "<icu:state>                   80-fe.u,fc\n",

     949,0, "<icu:state>                   0-84, 8f-fe:1\n"
            "<icu:state>                   40-7e, 80-fe\n",

     950,0, Big5MBCSStates,

     954,0, "<icu:state>                   0-8d, 8e:2, 8f:3, 90-9f, a1-fe:1\n"
            "<icu:state>                   a1-fe\n"
            "<icu:state>                   a1-e4\n"
            "<icu:state>                   a1-fe:1, a1:4\n"
            "<icu:state>                   a1-fe.u\n",

     955,0, "<icu:state>                   0-20:2, 21-7e:1, 7f-ff:2\n"
            "<icu:state>                   21-7e\n"
            "<icu:state>\n",

     963,0, "<icu:state>                   0-20:2, 21-7e:1, 7f-ff:2\n"
            "<icu:state>                   21-7e\n"
            "<icu:state>\n",

     964,0, "# The fourth <icu:state> line is commented out (and does not count)\n"
            "# because the state table is hand-optimized and does not use what would be\n"
            "# the natural path for the encoding scheme.\n"
            "# The third <icu:state> used to start with \"a1-b0:3\" but overrode every one\n"
            "# of these byte values with a different state transition.\n"
            "\n"
            "# 0: Initial state, single bytes and lead bytes\n"
            "<icu:state>                   0-8d, 8e:2, 90-9f, a1-fe:1, aa-c1:4, c3:4, fe:4\n"
            "# 1: Trail byte state with mappings\n"
            "<icu:state>                   a1-fe\n"
            "# 2: Second of four bytes, follows lead byte 8e\n"
            "<icu:state>                   a1:3, a2:7, a3-ab:3, ac:6, ad:5, ae-b0:3\n"
            "# (unreachable/optimized away)\n"
            "# <icu:state>                   a1-fe:1\n"
            "# 3: Third of four bytes, 8e xx .. .. for most xx in a1-b0; all-unassigned\n"
            "<icu:state>                   a1-fe:4\n"
            "# 4: All-unassigned trail byte state\n"
            "<icu:state>                   a1-fe.u\n"
            "# 5: 8e ad .. .. with some mappings\n"
            "<icu:state>                   a1-a4:1, a5-fe:4\n"
            "# 6: 8e ac .. .. with some mappings\n"
            "<icu:state>                   a1-e2:1, e3-fe:4\n"
            "# 7: 8e a2 .. .. with some mappings\n"
            "<icu:state>                   a1-f2:1, f3-fe:4\n",

     970,0, "<icu:state>                   0-9f, a1-fe:1\n"
            "<icu:state>                   a1-fe\n",

     1363,0,"<icu:state>                   0-7f, 81-fe:1\n"
            "<icu:state>                   40-7e, 80-fe\n",

     1350,0,"<icu:state>                   0-8d, 8e:2, 8f:3, 90-9f, a1-fe:1\n"
            "<icu:state>                   a1-fe\n"
            "<icu:state>                   a1-e4\n"
            "<icu:state>                   a1-fe:1, a1:4, a3-a5:4, a8:4, ac-af:4, ee-f2:4\n"
            "<icu:state>                   a1-fe.u\n",

     1351,0,"<icu:state>                   0-ff:2, 81-9f:1, e0-fc:1\n"
            "<icu:state>                   40-7e, 80-fc\n"
            "<icu:state>\n",

     1370,0,"<icu:state>                   0-80, 81-fe:1\n"
            "<icu:state>                   40-7e, 80-fe\n",

     1373,0,Big5MBCSStates,

     1374,0,Big5DBCSStates,

     /* 1232 says UTF-32, but it's really post Unicode 4.0 */
     1375,1232,  "<icu:state>                   0-7f, 81-fe:1, 87-a0:2, c8:2, fa-fe:2\n"
                 "<icu:state>                   40-7e, a1-fe\n"
                 "<icu:state>                   40-7e.p, a1-fe.p\n",

     1375,0,Big5MBCSStates,

     /* 1232 says UTF-32, but it's really post Unicode 4.0 */
     1377,1232,  "# includes mappings for surrogate pairs\n"
                 "<icu:state>                   0-ff, e:1.s, f:0.s\n"
                 "<icu:state>                   initial, 0-3f:4, e:1.s, f:0.s, 40:3, 41-fe:2, ff:4, 4b:5, e0:5, c2-d6:5, db-df:5\n"
                 "<icu:state>                   0-40:1.i, 41-fe:1., ff:1.i\n"
                 "<icu:state>                   0-ff:1.i, 40:1.\n"
                 "<icu:state>                   0-ff:1.i\n"
                 "<icu:state>                   0-40:1.i, 41-fe:1.p, ff:1.i\n",

     1381,0,"<icu:state>                   0-84, 8c-fe:1\n"
            "<icu:state>                   a1-fe\n",

     1383,0,"<icu:state>                   0-9f, a1-fe:1\n"
            "<icu:state>                   a1-fe\n",

     1385,0,"<icu:state>                   81-fe:1\n"
            "<icu:state>                   40-7e, 80-fe\n",

     1386,0,"<icu:state>                   0-80, 81-fe:1\n" /* Was 0-7f, 81-fe:1 */
            "<icu:state>                   40-7e, 80-fe\n",

     1390,0,states1390,

     1399,0,states1390,

     5039,0,"<icu:state>                   0-80, 81-9f:1, a0-df, e0-fc:1, fd-ff\n"
            "<icu:state>                   40-7e, 80-fc\n",

     5050,0,"<icu:state>                   0-8d, 8e:2, 8f:3, 90-9f, a1-fe:1\n"
            "<icu:state>                   a1-fe\n"
            "<icu:state>                   a1-e4\n"
            "<icu:state>                   a1-fe:1, a1:4, a3-af:4, b6:4, d6:4, da-db:4, ed-f2:4\n"
            "<icu:state>                   a1-fe.u\n",

     5067,0,"<icu:state>                   0-20:2, 21-7e:1, 7f-ff:2\n"
            "<icu:state>                   21-7e\n"
            "<icu:state>\n",

     5470,0,Big5DBCSStates,

     /* 1232 says UTF-32, but it's really post Unicode 4.0 */
     5471,1232,  "<icu:state>                   0-7f, 81-fe:1, 88-a0:2, c8:2, fa-fe:2\n"
                 "<icu:state>                   40-7e, a1-fe\n"
                 "<icu:state>                   40-7e.p, a1-fe.p\n",

     5471,0,Big5MBCSStates,

     5475,0,Big5MBCSStates,

     5478,0,"<icu:state>                   0-20:2, 21-7e:1, 7f-ff:2\n"
            "<icu:state>                   21-7e\n"
            "<icu:state>\n",

     5487,0,"<icu:state>                   81-fe:1\n"
            "<icu:state>                   30-39:2\n"
            "<icu:state>                   81-fe:3\n"
            "<icu:state>                   30-39\n",

     5488,0,"<icu:state> 0-7f, 81:7, 82:8, 83:9, 84:a, 85-fe:4\n"    /* Modified form of ICU's gb18030 */
            "<icu:state> 30-39:2, 40-7e, 80-fe\n"
            "<icu:state> 81-fe:3\n"
            "<icu:state> 30-39\n"
            "<icu:state> 30-39:5, 40-7e, 80-fe\n"
            "<icu:state> 81-fe:6\n"
            "<icu:state> 30-39\n"
            "<icu:state> 30:2, 31-35:5, 36-39:2, 40-7e, 80-fe\n"
            "<icu:state> 30-35:2, 36-39:5, 40-7e, 80-fe\n"
            "<icu:state> 30-35:5, 36:2, 37-39:5, 40-7e, 80-fe\n"
            "<icu:state> 30-31:2, 32-39:5, 40-7e, 80-fe\n",

     9577,0,"<icu:state>                   81-fe:1\n"
            "<icu:state>                   40-7e, 80-fe\n",

     16684,0,states16684,

     21427,0,"<icu:state>                   0-80:2, 81-fe:1, ff:2\n"
             "<icu:state>                   40-7e, 80-fe\n"
             "<icu:state>\n",

     25546,0,"<icu:state>                   0-7f, e:1.s, f:0.s\n"
             "<icu:state>                   initial, 0-20:3, e:1.s, f:0.s, 21-7e:2, 7f-ff:3\n"
             "<icu:state>                   0-20:1.i, 21-7e:1., 7f-ff:1.i\n"
             "<icu:state>                   0-ff:1.i\n",

     33722,0,"<icu:state>                   0-8d, 8e:2, 8f:3, 90-9f, a1-fe:1\n"
             "<icu:state>                   a1-fe\n"
             "<icu:state>                   a1-e4\n"
             "<icu:state>                   a1-fe:1, a1:4, a3-af:4, b6:4, d6:4, da-db:4, ed-f2:4\n"
             "<icu:state>                   a1-fe.u\n",

     54191,0,"<icu:state>                   0-80, 81-9f:1, a0-df, e0-fc:1, fd-ff\n"
             "<icu:state>                   40-7e, 80-fc\n",

     62383,0,"<icu:state>                   0-7f, 81-9f:1, a0-df, e0-fc:1\n" // Same as CCSID 943
             "<icu:state>                   40-7e, 80-fc\n"

 };

 #define MAX_YEAR 2900
 #define MIN_YEAR 1940

 static FilenameMappingHistory* filenameHistory;

 static UCMFile *fromUFile, *toUFile;

 static uint32_t subchar, subchar1;
 static uint16_t ccsid, unicodeCCSID;

 /*Year when the ucm files were produced using this tool*/
 static uint16_t year;

 enum {
     U_UNKNOWN_CHARSET_FAMILY=9
 };

 static uint32_t minTwoByte, maxTwoByte;

 static int32_t
     minCharLength,
     maxCharLength;

 static uint8_t charsetFamily, oredBytes;

 static UBool
     usesPUA,
     variantLF,
     variantASCII,
     variantControls,
     variantSUB,
     is7Bit,
     is_0xe_0xf_Stateful;

 static void
 init() {
     fromUFile=ucm_open();
     toUFile=ucm_open();

     subchar=subchar1=0;
     ccsid=0;
     unicodeCCSID=0;
     year=0;

     minTwoByte=0xffff;
     maxTwoByte=0;

     minCharLength=0;
     maxCharLength=0;
     charsetFamily=U_UNKNOWN_CHARSET_FAMILY;
     oredBytes=0;

     usesPUA=0;
     variantLF=0;
     variantASCII=0;
     variantControls=0;
     variantSUB=0;
     is7Bit=0;
     is_0xe_0xf_Stateful=0;
 }

 static void
 cleanup() {
     ucm_close(fromUFile);
     ucm_close(toUFile);
 }

 static int32_t
 parseDigit(char c) {
     if('0'<=c && c<='9') {
         return (int32_t)(c-'0');
     } else if('a'<=c && c<='f') {
         return (int32_t)(c-('a'-10));
     } else if('A'<=c && c<='F') {
         return (int32_t)(c-('A'-10));
     } else {
         return -1;
     }
 }

 /*
  * 0..ff - byte value
  * 0x100 - no byte (EUC)
  * -1 - c1 not a digit
  * -2 - c2 not a digit
  */
 static int32_t
 parseByte(char c1, char c2, UBool firstByte) {
     int32_t d1, d2;

     d1=parseDigit(c1);
     if(d1<0) {
         return -1;
     }
     d2=parseDigit(c2);
     if(d2<0) {
         if(firstByte && c2=='-' && d1<=3) {
             /* this is a special EUC format where the code set number prepends the bytes */
             switch(d1) {
             case 0:
             case 1:
                 return 0x100;
             case 2:
                 return 0x8e;
             case 3:
                 return 0x8f;
             default:
                 /* never occurs because of the above check */
                 break;
             }
         }
         return -2;
     }
     return (d1<<4)|d2;
 }

 static uint16_t
 parseYear(const char *yearToParse, const char *line) {
     char *end;
     uint16_t localYear=(uint16_t)uprv_strtoul(yearToParse, &end, 10);
     if(end!=yearToParse+4 || localYear < MIN_YEAR || MAX_YEAR < localYear) {
         fprintf(stderr, "error parsing year from \"%s\"; year is %d\n", line, localYear);
         exit(2);
     }
     if (localYear > year) {
         year = localYear;
     }
     return localYear;
 }

 static void
 parseMappings(FILE *f, UCMFile *ucm) {
     char line[200];
     char *s, *end;
     int32_t lineNum=0;
     int32_t startSkipLineNum=0, endSkipLineNum = 0;
     UBool isOK;

     UCMapping m={ 0 };
     UChar32 codePoints[UCNV_EXT_MAX_UCHARS];
     uint8_t bytes[UCNV_EXT_MAX_BYTES];

     UChar32 cp;
     int32_t byte, charLength, u16Length;
     int8_t uLen, bLen;

     isOK=TRUE;

     while(fgets(line, sizeof(line), f)!=NULL) {
         s=(char *)u_skipWhitespace(line);
         lineNum++;

         /* skip empty lines or EOF characters */
         if(*s==0 || *s=='\n' || *s=='\r' || *s=='\x7F') {
             continue;
         }

         /* Skip useless mappings! */
         /* You'll see things like, "* only. They do not constitute part of the official UCS-2 to 1275 table."
                                 or "* only. They do not constitute part of the official UCS2 table." */
         if(uprv_strstr(s, "* only. They do not constitute part of the official UCS")!=NULL) {
             UBool nonCommentFound = FALSE;
             startSkipLineNum = lineNum;
             /* Ignore the next few mappings. They have no value */
             while(fgets(line, sizeof(line), f)!=NULL) {
                 s=(char *)u_skipWhitespace(line);
                 lineNum++;
                 if(uprv_strstr(s, "* The official table starts here:")!=NULL) {
                     break;  /* continue with outer loop */
                 }
                 if (s[0] != '*') {
                     nonCommentFound = TRUE;
                 }
             }
             endSkipLineNum = lineNum-1;
             if (nonCommentFound) {
                 fprintf(stderr, "Warning: skipped lines %d-%d, since it doesn't seem to be real data\n", startSkipLineNum, endSkipLineNum);
             }
         }

         /* explicit end of table */
         if(uprv_memcmp(s, "END CHARMAP", 11)==0) {
             break;
         }

         /* comment lines, parse substitution characters, otherwise skip them */
         if(*s=='#' || *s=='*') {
             /* get subchar1 */
             s=uprv_strstr(line, "for U+00xx");
             if(s==NULL) {
                 s=uprv_strstr(line, "for U+000000xx");
             }
             if(s!=NULL) {
                 s=uprv_strstr(line, "x'");
                 if(s!=NULL) {
                     s+=2;
                     subchar1=uprv_strtoul(s, &end, 16);
                     if(end!=s+2 || *end!='\'') {
                         fprintf(stderr, "error parsing subchar1 from \"%s\"\n", line);
                         exit(2);
                     }
                     continue;
                 } else {
                     fprintf(stderr, "error finding subchar1 on \"%s\"\n", line);
                     exit(2);
                 }
             }

             /* get subchar */
             s=uprv_strstr(line, "for U+xxxx");
             if(s==NULL) {
                 s=uprv_strstr(line, "for U+000xxxxx");
             }
             if(s==NULL) {
                 s=uprv_strstr(line, "for U+0000xxxx");
             }
             if(s!=NULL) {
                 s=uprv_strstr(line, "x'");
                 if(s!=NULL) {
                     s+=2;
                     subchar=uprv_strtoul(s, &end, 16);
                     if(end<s+2 || *end!='\'') {
                         fprintf(stderr, "error parsing subchar from \"%s\"\n", line);
                         exit(2);
                     }
                     continue;
                 } else {
                     fprintf(stderr, "error finding subchar on \"%s\"\n", line);
                     exit(2);
                 }
             }

             /* get modified date */
             s=uprv_strstr(line, "Modified");
             if(s!=NULL && uprv_strstr(s, ":") != NULL) {
                 int len = (int)uprv_strlen(s);
                 while (!isdigit(s[len-1])) {
                     len--;
                 }
                 parseYear(s+len-4, line);
                 continue;
             }

             /* Handle "File updated on:" or "update:" */
             s=uprv_strstr(line, "update");
             if(s!=NULL && uprv_strstr(s, ":") != NULL) {
                 int len = (int)uprv_strlen(s);
                 while (!isdigit(s[len-1])) {
                     len--;
                 }
                 parseYear(s+len-4, line);
                 continue;
             }

             s=uprv_strstr(line, "Update");
             if(s!=NULL && uprv_strstr(s, ":") != NULL) {
                 int len = (int)uprv_strlen(s);
                 if (uprv_strstr(s, "(")) {
                     while (s[len] != '(') {
                         len--;
                     }
                 }
                 while (!isdigit(s[len-1])) {
                     len--;
                 }
                 parseYear(s+len-4, line);
                 continue;
             }

             /* get creation date */
             s=uprv_strstr(line, "Creation date:");
             if(s!=NULL) {
                 int len = (int)uprv_strlen(s);
                 while (!isdigit(s[len-1])) {
                     len--;
                 }
                 parseYear(s+len-4, line);
                 continue;
             }

             continue;
         }

         /* parse a mapping */
         charLength=0;
         uLen=bLen=0;

         /* parse bytes */
         for(;;) {
             if(*s==' ' || *s=='\t' || *s=='+') {
                 /* do some of the analysis while we know the character boundaries */
                 if(minCharLength==0 || charLength<minCharLength) {
                     minCharLength=charLength;
                 }
                 if(maxCharLength==0 || charLength>maxCharLength) {
                     maxCharLength=charLength;
                 }

                 if(charLength==2) {
                     uint32_t twoByte;

                     twoByte=((uint32_t)bytes[bLen-2]<<8)|bytes[bLen-1];
                     if(twoByte<minTwoByte) {
                         minTwoByte=twoByte;
                     }
                     if(twoByte>maxTwoByte) {
                         maxTwoByte=twoByte;
                     }
                 }

                 /* skip an optional plus sign */
                 if(bLen>0 && *s=='+') {
                     charLength=0; /* count codepage characters between plusses */
                     ++s;
                 }
                 if(*s==' ' || *s=='\t') {
                     break;
                 }
             }

             byte=parseByte(s[0], s[1], (UBool)(bLen==0));
             if(byte<0) {
                 fprintf(stderr, "%d: error parsing codepage bytes on \"%s\"\n", lineNum, line);
                 isOK=FALSE;
                 break;
             }
             if(byte>0xff) {
                 /* special EUC prefix which does not result in a byte */
                 s+=2;
                 continue;
             }

             if(bLen==UCNV_EXT_MAX_BYTES) {
                 fprintf(stderr, "%d: error: too many codepage bytes on \"%s\"\n", lineNum, line);
                 isOK=FALSE;
                 break;
             }

             bytes[bLen++]=(uint8_t)byte;
             oredBytes|=(uint8_t)byte;
             ++charLength;

             s+=2;
         }

         if(!isOK) {
             continue;
         }

         if(bLen==0) {
             fprintf(stderr, "%d: no codepage bytes on \"%s\"\n", lineNum, line);
             isOK=FALSE;
             continue;
         } else if(bLen<=4) {
             uprv_memcpy(m.b.bytes, bytes, bLen);
         }
         m.bLen=bLen;

         s=(char *)u_skipWhitespace(s);

         /* parse code points */
         for(;;) {
             /* skip a plus sign between codepage characters */
             if(uLen>0 && *s=='+') {
                 ++s;
             }
             if(*s==0 || *s==' ' || *s=='\t' || *s=='\n' || *s=='\r') {
                 break;
             }

             cp=(UChar32)uprv_strtoul(s, &end, 16);
             if(end==s) {
                 if(uprv_strncmp(s, "????", 4)==0 || uprv_strstr(s, "UNASSIGNED")!=NULL) {
                     /* this is a non-entry, do not add it to the mapping table */
                     goto continueOuterLoop;
                 }
                 fprintf(stderr, "%d: error parsing Unicode code point on \"%s\"\n", lineNum, line);
                 isOK=FALSE;
                 break;
             }
             if((uint32_t)cp>0x10ffff || U_IS_SURROGATE(cp)) {
                 fprintf(stderr, "%d: error: Unicode code point must be 0..d7ff or e000..10ffff - \"%s\"\n", lineNum, line);
                 isOK=FALSE;
                 break;
             }

             if(uLen==UCNV_EXT_MAX_UCHARS) {
                 fprintf(stderr, "%d: error: too many Unicode code points on \"%s\"\n", lineNum, line);
                 isOK=FALSE;
                 break;
             }
             codePoints[uLen++]=cp;
             s=end+1;
         }

         if(!isOK) {
             continue;
         }

         if(uLen==0) {
             fprintf(stderr, "%d: no Unicode code points on \"%s\"\n", lineNum, line);
             isOK=FALSE;
             continue;
         } else if(uLen==1) {
             m.u=codePoints[0];
         } else {
             UErrorCode errorCode=U_ZERO_ERROR;
             u_strFromUTF32(NULL, 0, &u16Length, codePoints, uLen, &errorCode);
             if( (U_FAILURE(errorCode) && errorCode!=U_BUFFER_OVERFLOW_ERROR) ||
                 u16Length>UCNV_EXT_MAX_UCHARS
             ) {
                 fprintf(stderr, "%d: too many UChars on \"%s\"\n", lineNum, line);
                 isOK=FALSE;
                 continue;
             }
         }
         m.uLen=uLen;

         ucm_addMapping(ucm->base, &m, codePoints, bytes);
 continueOuterLoop:
         ;
     }

     if (endSkipLineNum >= lineNum - 20) {
         /* Usually there are at least a few mappings. Let's say that 20 is the minimum */
         fprintf(stderr, "Internal Error: Skipped too many lines\n");
         isOK = FALSE;
     }

     if(!isOK) {
         exit(2);
     }
 }

 /* merge the mappings into fromUFile and set real precision flags */
 static void
 mergeMappings() {
     uint8_t subBytes[4];
     int32_t subcharLength;

     if(subchar>0xffffff) {
         subBytes[0]=(uint8_t)(subchar>>24);
         subBytes[1]=(uint8_t)(subchar>>16);
         subBytes[2]=(uint8_t)(subchar>>8);
         subBytes[3]=(uint8_t)subchar;
         subcharLength=4;
     } else if(subchar>0xffff) {
         subBytes[0]=(uint8_t)(subchar>>16);
         subBytes[1]=(uint8_t)(subchar>>8);
         subBytes[2]=(uint8_t)subchar;
         subcharLength=3;
     } else if(subchar>0xff) {
         subBytes[0]=(uint8_t)(subchar>>8);
         subBytes[1]=(uint8_t)subchar;
         subcharLength=2;
     } else {
         subBytes[0]=(uint8_t)subchar;
         subcharLength=1;
     }

     ucm_mergeTables(
         fromUFile->base, toUFile->base,
         subBytes, subcharLength,
         (uint8_t)subchar1);
 }

 static void
 analyzeTable() {
     UCMTable *table;
     UCMapping *m, *mLimit;
     UChar32 *codePoints;
     uint8_t *bytes;

     UChar32 u;
     int32_t i, countASCII=0;
     uint8_t b;

     table=fromUFile->base;
     m=table->mappings;
     mLimit=m+table->mappingsLength;

     for(; m<mLimit; ++m) {
         codePoints=UCM_GET_CODE_POINTS(table, m);
         bytes=UCM_GET_BYTES(table, m);

         /* PUA used? */
         for(i=0; i<m->uLen; ++i) {
             u=codePoints[i];
             if((uint32_t)(u-0xe000)<0x1900 || (uint32_t)(u-0xf0000)<0x20000) {
                 usesPUA=1;
             }
         }

         /* only consider roundtrip mappings for the rest */
         if(m->f!=0) {
             continue;
         }

         if(m->uLen==1) {
             u=*codePoints;
             b=*bytes;

             if(m->bLen==1) {
                 /* ASCII or EBCDIC? */
                 if(u==0x41) {
                     if(b==0x41) {
                         charsetFamily=U_ASCII_FAMILY;
                     } else if(b==0xc1) {
                         charsetFamily=U_EBCDIC_FAMILY;
                     }
                 } else if(u==0xa) {
                     if(b==0xa) {
                         charsetFamily=U_ASCII_FAMILY;
                     } else if(b==0x25) {
                         charsetFamily=U_EBCDIC_FAMILY;
                         variantLF=0;
                     } else if(b==0x15) {
                         charsetFamily=U_EBCDIC_FAMILY;
                         variantLF=1;
                     }
                 }
             }

             /* US-ASCII? */
             if((uint32_t)(u-0x21)<94) {
                 if(m->bLen==1 && u==b) {
                     ++countASCII;
                 } else {
                     variantASCII=1;
                 }
             } else if(u<0x20 || u==0x7f) {
                 /* non-ISO C0 controls? */
                 if(u!=b) {
                     /* IBM PC rotation of SUB and other controls: 0x1a->0x7f->0x1c->0x1a */
                     if(u==0x1a && b==0x7f || u==0x1c && b==0x1a || u==0x7f && b==0x1c) {
                         charsetFamily=U_ASCII_FAMILY;
                         variantSUB=1;
                     } else {
                         variantControls=1;
                     }
                 }
             }
         }
     }

     is7Bit= oredBytes<=0x7f;

     if(charsetFamily==U_UNKNOWN_CHARSET_FAMILY) {
         if(minCharLength==2 && maxCharLength==2) {
             /* guess the charset family for DBCS according to typical byte distributions */
             if( ((0x2020<=minTwoByte || minTwoByte<=0x217e) && maxTwoByte<=0x7e7e) ||
                 ((0xa0a0<=minTwoByte || minTwoByte<=0xa1fe) && maxTwoByte<=0xfefe) ||
                 ((0x8140<=minTwoByte || minTwoByte<=0x81fe) && maxTwoByte<=0xfefe)
             ) {
                 charsetFamily=U_ASCII_FAMILY;
             } else if((minTwoByte==0x4040 || (0x4141<=minTwoByte && minTwoByte<=0x41fe)) && maxTwoByte<=0xfefe) {
                 charsetFamily=U_EBCDIC_FAMILY;
             }
         }
         if(minCharLength==4 && maxCharLength==4) {
             /* guess the charset family for QBCS according to typical byte distributions */
             if (ccsid == 5487) {
                 /* Special partial gb18030 table */
                 charsetFamily=U_ASCII_FAMILY;
             }
         }
         if(charsetFamily==U_UNKNOWN_CHARSET_FAMILY) {
             fprintf(stderr, "error: unable to determine the charset family\n");
             exit(3);
         }
     }

     /* reset variant indicators if they do not apply */
     if(charsetFamily!=U_ASCII_FAMILY || minCharLength!=1) {
         variantASCII=variantSUB=variantControls=0;
     } else if(countASCII!=94) {
         /* if there are not 94 mappings for ASCII graphic characters, then set variantASCII */
         variantASCII=1;
     }

     if(charsetFamily!=U_EBCDIC_FAMILY || minCharLength!=1) {
         variantLF=0;
     }
     if(ccsid==25546) {
         /* Special case. It's not EBCDIC, but it is stateful like EBCDIC_STATEFUL. */
         is_0xe_0xf_Stateful = 1;
     }
 }

 static int
 getSubchar(uint16_t ccsidToMatch) {
     int i;

     for(i=0; i<sizeof(knownSubchars)/sizeof(knownSubchars[0]); ++i) {
         if(knownSubchars[i].ccsid == ccsidToMatch) {
             subchar=knownSubchars[i].subchar;
             subchar1=knownSubchars[i].subchar1;
             return 1;
         }
     }

     return 0;
 }

 static void
 getSubcharFromUPMAP(FILE *f) {
     char line[200];
     char *s, *end;
     uint32_t *p;
     uint32_t value, bytes;

     while(fgets(line, sizeof(line), f)!=NULL && uprv_memcmp(line, "CHARMAP", 7)!=0) {
         s=(char *)u_skipWhitespace(line);

         /* skip empty lines */
         if(*s==0 || *s=='\n' || *s=='\r') {
             continue;
         }

         /* look for variations of subchar entries */
         if(uprv_memcmp(s, "<subchar>", 9)==0) {
             s=(char *)u_skipWhitespace(s+9);
             p=&subchar;
         } else if(uprv_memcmp(s, "<subchar1>", 10)==0) {
             s=(char *)u_skipWhitespace(s+10);
             p=&subchar1;
         } else if(uprv_memcmp(s, "#<subchar1>", 11)==0) {
             s=(char *)u_skipWhitespace(s+11);
             p=&subchar1;
         } else {
             continue;
         }

         /* get the value and store it in *p */
         bytes=0;
         while(s[0]=='\\' && s[1]=='x') {
             value=uprv_strtoul(s+2, &end, 16);
             s+=4;
             if(end!=s) {
                 fprintf(stderr, "error parsing UPMAP subchar from \"%s\"\n", line);
                 exit(2);
             }
             bytes=(bytes<<8)|value;
         }
         *p=bytes;
     }
 }

 static const char *
 getStateTable() {
     int32_t i;

     for(i=0; i<LENGTHOF(knownStateTables); ++i) {
         if(knownStateTables[i].ccsid == ccsid && (knownStateTables[i].unicode == 0 || knownStateTables[i].unicode == unicodeCCSID)) {
             return knownStateTables[i].table;
         }
     }

     return NULL;
 }

 static void
 writeBytes(char *s, uint32_t b) {
     if(b<=0xff) {
         sprintf(s, "\\x%02lX", b);
     } else if(b<=0xffff) {
         sprintf(s, "\\x%02lX\\x%02lX", b>>8, b&0xff);
     } else if(b<=0xffffff) {
         sprintf(s, "\\x%02lX\\x%02lX\\x%02lX", b>>16, (b>>8)&0xff, b&0xff);
     } else {
         sprintf(s, "\\x%02lX\\x%02lX\\x%02lX\\x%02lX", b>>24, (b>>16)&0xff, (b>>8)&0xff, b&0xff);
     }
 }

 static void
 writeUCM(FILE *f, const char *ucmname, const char *rpname, const char *tpname) {
     char buffer[200];
     char *key, *value;
     const char *s, *end, *next;

     UCMStates *states;

     states=&fromUFile->states;

     /* write the header */
     fprintf(f,
         "# ***************************************************************************\n"
         "# *\n"
         "# *   Copyright (C) 1995-2007, International Business Machines\n"
         "# *   Corporation and others.  All Rights Reserved.\n"
         "# *\n"
         "# ***************************************************************************\n"
         "#\n"
         "# File created by rptp2ucm (compiled on %s)\n"
         "# from source files %s and %s\n"
         "#\n", __DATE__, rpname, tpname);

     /* ucmname does not have a path or .ucm */
     fprintf(f, "<code_set_name>               \"%s\"\n", ucmname);

     fputs("<char_name_mask>              \"AXXXX\"\n", f);
     fprintf(f, "<mb_cur_max>                  %u\n", maxCharLength);
     fprintf(f, "<mb_cur_min>                  %u\n", minCharLength);

     states->maxCharLength=maxCharLength;
     states->minCharLength=minCharLength;

     states->conversionType=UCNV_MBCS;
     states->outputType=maxCharLength-1;

     if(maxCharLength==1) {
         fputs("<uconv_class>                 \"SBCS\"\n", f);
         states->conversionType=UCNV_SBCS;
     } else if(maxCharLength==2) {
         if(minCharLength==1) {
             if(charsetFamily==U_EBCDIC_FAMILY) {
                 fputs("<uconv_class>                 \"EBCDIC_STATEFUL\"\n", f);
                 is_0xe_0xf_Stateful = 1;
                 states->conversionType=UCNV_EBCDIC_STATEFUL;
                 states->outputType=MBCS_OUTPUT_2_SISO;
             } else {
                 fputs("<uconv_class>                 \"MBCS\"\n", f);
             }
         } else if(minCharLength==2) {
             fputs("<uconv_class>                 \"DBCS\"\n", f);
             states->conversionType=UCNV_DBCS;
         } else {
             fputs("<uconv_class>                 \"MBCS\"\n", f);
         }
     } else {
         fputs("<uconv_class>                 \"MBCS\"\n", f);
     }

     if(subchar!=0) {
         writeBytes(buffer, subchar);
         fprintf(f, "<subchar>                     %s\n", buffer);
     }

     if(subchar1!=0) {
         if (minCharLength>1) {
             fprintf(stderr, "warning: <subchar1> \\x%02X is ignored for charsets without an SBCS portion.\n", subchar1);
         }
         else if (maxCharLength==1) {
             if (subchar!=subchar1) {
                 fprintf(stderr, "warning: <subchar1> \\x%02X is ignored for SBCS charsets.\n", subchar1);
             }
             /* else we got a duplicate subchar and subchar1. */
         }
         else {
             fprintf(f, "<subchar1>                    \\x%02X\n", subchar1);
         }
     }

     /* write charset family */
     if(charsetFamily==U_ASCII_FAMILY) {
         fputs("<icu:charsetFamily>           \"ASCII\"\n", f);
     } else {
         fputs("<icu:charsetFamily>           \"EBCDIC\"\n", f);
     }

     /* write alias describing the codepage */
     sprintf(buffer, "<icu:alias>                   \"ibm-%u", ccsid);
     if(!usesPUA && !variantLF && !variantASCII && !variantControls && !variantSUB) {
         uprv_strcat(buffer, "_STD\"\n\n");
     } else {
         /* add variant indicators in alphabetic order */
         if(variantASCII) {
             uprv_strcat(buffer, "_VASCII");
         }
         if(variantControls) {
             uprv_strcat(buffer, "_VGCTRL");
         }
         if(variantLF) {
             uprv_strcat(buffer, "_VLF");
         }
         if(variantSUB) {
             uprv_strcat(buffer, "_VSUB");
         }
         if(usesPUA) {
             uprv_strcat(buffer, "_VPUA");
         }
         uprv_strcat(buffer, "\"\n\n");
     }
     fputs(buffer, f);

     /* write the state table - <icu:state> */
     s=getStateTable();
     if(s==NULL && is7Bit) {
         s="<icu:state>                   0-7f\n";
     }
     if(s!=NULL) {
         fputs(s, f);
         fputs("\n", f);

         /* set the state table */
         while(*s!=0) {
             /* separate the state table string into lines */
             end=uprv_strchr(s, '\n');
             if(end!=NULL) {
                 next=end+1;
             } else {
                 end=uprv_strchr(s, 0);
                 next=end;
             }

             uprv_memcpy(buffer, s, end-s);
             buffer[end-s]=0;
             ucm_parseHeaderLine(fromUFile, buffer, &key, &value);
             s=next;
         }
     }

     ucm_processStates(states, false);

     /* separate extension mappings out of base table, and other checks */
     if(!ucm_separateMappings(fromUFile, is_0xe_0xf_Stateful)) {
         fprintf(stderr, "error: ucm_separateMappings() failed\n");
         exit(U_INVALID_FORMAT_ERROR);
     }

     /* merge the base and extension tables again to be friendlier to other tools */
     if(fromUFile->ext->mappingsLength>0) {
         UCMTable *base, *ext;
         UCMapping *m, *mLimit;

         base=fromUFile->base;
         ext=fromUFile->ext;
         m=ext->mappings;
         mLimit=m+ext->mappingsLength;
         while(m<mLimit) {
             ucm_addMapping(base, m, UCM_GET_CODE_POINTS(ext, m), UCM_GET_BYTES(ext, m));
             ++m;
         }

         ucm_sortTable(base);
         ext->mappingsLength=0;
     }

     /* write the mappings */
     fputs("CHARMAP\n", f);
     ucm_printTable(fromUFile->base, f, TRUE);
     fputs("END CHARMAP\n", f);

     if(fromUFile->ext->mappingsLength>0) {
         fputs("\nCHARMAP\n", f);
         ucm_printTable(fromUFile->ext, f, TRUE);
         fputs("END CHARMAP\n", f);
     }
 }

 static const TMAParray *
 findTPMAPs(const char *rmapExtention) {
     int32_t idx;
     for (idx = 0; idx < (int32_t)(sizeof(knownRMAPtoTMAP)/sizeof(knownRMAPtoTMAP[0])); idx++) {
         if (knownRMAPtoTMAP[idx].ccsid == ccsid && strcmp(rmapExtention, knownRMAPtoTMAP[idx].RMAP) == 0) {
             return &(knownRMAPtoTMAP[idx].TMAP);
         }
     }
     return NULL;
 }

 static char **
 createTPMAPNames(const char *origRpmapFilename, int32_t *numFiles, UBool *multiplePossible) {
     char *rpmapFilename = strdup(origRpmapFilename);
     char *packageFilename;
     char *extension;
     const char *rpmapExtension;
     FILE *packageFile = NULL;
     char **tpmapFiles = NULL;
     int32_t length;
     const TMAParray *TMAPs;

     *numFiles = 0;
     *multiplePossible = FALSE;
     packageFilename = (char *)malloc(strlen(origRpmapFilename) + 8);
     length = (int32_t)strlen(rpmapFilename);

     rpmapExtension = strrchr(origRpmapFilename, '.') + 1;
     uprv_memmove(rpmapFilename+length-17, origRpmapFilename+length-13, 4);
     uprv_memmove(rpmapFilename+length-13, origRpmapFilename+length-17, 4);
     strcpy(packageFilename, origRpmapFilename);
     uprv_memmove(packageFilename+length-17, origRpmapFilename+length-13, 4);
     uprv_memmove(packageFilename+length-13, origRpmapFilename+length-17, 4);
     packageFilename[length-9] = 0;
     strcat(packageFilename, ".PACKAGE");
     extension = strrchr(packageFilename, '.');
     packageFile = fopen(packageFilename, "r");
     TMAPs = findTPMAPs(rpmapExtension);
     if (TMAPs != NULL || packageFile != NULL) {
         int32_t idx;
         TMAPs = findTPMAPs(rpmapExtension);
         if (TMAPs == NULL) {
             fprintf(stderr, "error: \"%s\" has a package, but has no recognized TPMAP table\n", rpmapFilename);
             exit(1);
         }
         if (packageFile != NULL) {
             fprintf(stderr, "warning: This tool doesn't read package files yet. So the correct list of alternate mapping files may be out of date.\n");
         }
         while ((*TMAPs)[*numFiles] != NULL) {
             (*numFiles)++;
         }
         tpmapFiles = (char **)malloc(sizeof(FILE*)*(*numFiles));
         for (idx = 0; idx < *numFiles; idx++) {
             tpmapFiles[idx] = strdup(rpmapFilename);
             strcpy(tpmapFiles[idx]+(length-8), (*TMAPs)[idx]);
         }
         *multiplePossible = TRUE;
         if (packageFile) {
             fclose(packageFile);
         }
     }
     else {
         /* No Package information. Use the default name. */
         tpmapFiles = (char **)malloc(sizeof(FILE*));
         tpmapFiles[0] = strdup(rpmapFilename);
         if(tpmapFiles[0][length-8]=='R') {
             tpmapFiles[0][length-8]='T';
         } else {
             tpmapFiles[0][length-8]='t';
         }
         *numFiles = 1;
     }

     free(rpmapFilename);
     free(packageFilename);
     return tpmapFiles;
 }

 static void
 freeTPMAPNames(char **tpmapFiles, int32_t numFiles) {
     int32_t idx;
     for (idx = 0; idx < numFiles; idx++) {
         free(tpmapFiles[idx]);
     }
     free(tpmapFiles);
 }

 static void
 setCCSID(uint32_t value) {
     if (!getCCSIDValues(value, &unicodeCCSID, &ccsid)) {
         fprintf(stderr, "error: %X is not a Unicode conversion table\n", value);
         exit(1);
     }
 }

 static void
 processTable(const char *arg) {
     char filename[1024], tpname[32];
     const char *basename, *s;
     const char *ucmFilename, *tmapFilename;
     FILE *rpmap, *tpmap, *ucm;
     uint32_t value;
     int length, idx;
     char **tpmapFileStrings;
     int32_t tpmapFileStringsNum;
     UBool multipleTablesPossible;
     UErrorCode errorCode = U_ZERO_ERROR;

     init();

     /* separate path and basename */
     basename=uprv_strrchr(arg, '/');
     if(basename==NULL) {
         basename=uprv_strrchr(arg, '\\');
         if(basename==NULL) {
             basename=arg;
         } else {
             ++basename;
         }
     } else {
         ++basename;
         s=uprv_strrchr(arg, '\\');
         if(s!=NULL && ++s>basename) {
             basename=s;
         }
     }

     /* is this a standard RPMAP filename? */
     value=uprv_strtoul(basename, (char **)&s, 16);
     if( uprv_strlen(basename)!=17 ||
         (uprv_memcmp(basename+9, "RPMAP", 5)!=0 && uprv_memcmp(basename+9, "rpmap", 5)!=0 &&
          uprv_memcmp(basename+9, "RXMAP", 5)!=0 && uprv_memcmp(basename+9, "rxmap", 5)!=0) ||
         (s-basename)!=8 ||
         *s!='.'
     ) {
         fprintf(stderr, "error: \"%s\" is not a standard RPMAP filename\n", basename);
         exit(1);
     }

     setCCSID(value);

     /* try to find all the TPMAP files for this RPMAP */
     tpmapFileStrings = createTPMAPNames(arg, &tpmapFileStringsNum, &multipleTablesPossible);

     cleanup();

     for (idx = 0; idx < tpmapFileStringsNum; idx++) {
         init();
         setCCSID(value);

         /* try to open the RPMAP file */
         rpmap=fopen(arg, "r");
         if(rpmap==NULL) {
             fprintf(stderr, "error: unable to open \"%s\"\n", arg);
             exit(1);
         }

         tpmap=fopen(tpmapFileStrings[idx], "r");
         if (tpmap == NULL) {
             /* there is no TPMAP */
             fprintf(stderr, "error: unable to find the TPMAP file \"%s\" for \"%s\"\n", tpmapFileStrings[idx], arg);
             exit(1);
         }
         puts(tpmapFileStrings[idx]);
         length=(int)uprv_strlen(tpmapFileStrings[idx]);
         uprv_strcpy(tpname, tpmapFileStrings[idx]+length-17);

         /* parse both files */
         parseMappings(rpmap, fromUFile);
         parseMappings(tpmap, toUFile);
         fclose(tpmap);
         fclose(rpmap);

         /* if there is no subchar, then try to get it from the corresponding UPMAP */
         if(subchar==0) {
             FILE *f;

             /* restore the RPMAP filename and just replace the R by U */
             uprv_strcpy(filename+length-17, basename);
             if(filename[length-8]=='R') {
                 filename[length-8]='U';
             } else {
                 filename[length-8]='u';
             }

             f=fopen(filename, "r");
             if(f==NULL) {
                 /* try reversing the CCSIDs */
                 uprv_memcpy(filename+length-17, basename+4, 4);
                 uprv_memcpy(filename+length-13, basename, 4);
                 f=fopen(filename, "r");
             }
             if(f!=NULL) {
                 getSubcharFromUPMAP(f);
                 fclose(f);
             }
         }
         if(subchar==0 && !getSubchar(ccsid)) {
             fprintf(stderr, "warning: missing subchar in \"%s\" (CCSID=0x%04X)\n", filename, ccsid);
         }

         /* generate the .ucm filename */
         tmapFilename = strrchr(tpmapFileStrings[idx], '/');
         if (tmapFilename == NULL) {
             tmapFilename = strrchr(tpmapFileStrings[idx], '\\');
         }
         if (tmapFilename == NULL) {
             tmapFilename = tpmapFileStrings[idx];
         }
         else {
             tmapFilename++; /* Skip the file separator */
         }

         ucmFilename = filenameHistory->getFilename(basename, tmapFilename, year, &errorCode);
         if (U_FAILURE(errorCode)) {
             fprintf(stderr, "error: Can't generate filename from %s - %s\n", basename, u_errorName(errorCode));
             exit(1);
         }

         /* merge the mappings */
         mergeMappings();

         /* analyze the conversion table */
         analyzeTable();

         /* open the .ucm file */
         ucm=fopen(ucmFilename, "w");
         if(ucm==NULL) {
             fprintf(stderr, "error: unable to open output file \"%s\"\n", filename);
             exit(4);
         }

         /* remove the .ucm from the filename for the following processing */
         strcpy(filename, ucmFilename);
         filename[uprv_strlen(filename)-4]=0;

         /* write the .ucm file */
         writeUCM(ucm, filename, basename, tpname);
         fclose(ucm);
         cleanup();
     }
     freeTPMAPNames(tpmapFileStrings, tpmapFileStringsNum);
 }


 enum
 {
     HISTORY_FILE
 };

 UOption options[]={
     UOPTION_DEF( "historyFile", 'f', UOPT_REQUIRES_ARG),
 };

 int main(int argc, char* argv[])
 {
     UErrorCode status = U_ZERO_ERROR;
     argc = u_parseArgs(argc, argv, (int32_t)(sizeof(options)/sizeof(options[0])), options);
     if(argc<2 || !options[HISTORY_FILE].doesOccur) {
         fprintf(stderr,
                 "usage: %s -f historyFile.txt { rpmap/rxmap-filename }+\n",
                 argv[0]);
         exit(1);
     }

     filenameHistory = FilenameMappingHistory::create(options[HISTORY_FILE].value, &status);
     if (U_FAILURE(status)) {
         fprintf(stderr,
                 "usage: %s could not use \"%s\". error=%s\n",
                 argv[0], filenameHistory, u_errorName(status));
         exit(1);
     }

     while(--argc>0) {
         puts(*++argv);
         processTable(*argv);
     }

     filenameHistory->writeHistoryFile(&status);

     return 0;
 }