blob: 2869d23f2a8834ecca987c7dcedaf31ce8c227db [file] [log] [blame]
/*
*******************************************************************************
* Copyright (C) 2007-2007, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
*/
#include "rptp_map.h"
#include "uvector.h"
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
typedef enum {
ITEM_UTS22_NAME,
ITEM_RMAP_NAME,
ITEM_TMAP_NAME,
ITEM_COUNT
} LineTypes;
struct FilenameMapping {
char *uts22Name;
char *rmapName;
char *tmapName;
FilenameMapping(const char *uts22, const char *rmap, const char *tmap) :
uts22Name(strdup(uts22)), rmapName(strdup(rmap)), tmapName(strdup(tmap))
{
}
~FilenameMapping() {
free(uts22Name);
free(rmapName);
free(tmapName);
}
};
U_CDECL_BEGIN
static void U_EXPORT2
freeFilenameMapping(void *obj) {
delete (FilenameMapping*)obj;
}
static int8_t U_EXPORT2
sortByUTS22(const UHashTok key1, const UHashTok key2) {
FilenameMapping* item1 = (FilenameMapping*)key1.pointer;
FilenameMapping* item2 = (FilenameMapping*)key2.pointer;
return strcmp(item1->uts22Name, item2->uts22Name);
}
static int8_t U_EXPORT2
compareByUTS22(const UHashTok key1, const UHashTok key2) {
return sortByUTS22(key1, key2) == 0;
}
static UBool U_EXPORT2
compareFilenameMapping(const UHashTok key1, const UHashTok key2) {
FilenameMapping* item1 = (FilenameMapping*)key1.pointer;
FilenameMapping* item2 = (FilenameMapping*)key2.pointer;
return strcmp(item1->rmapName, item2->rmapName) == 0 && strcmp(item1->tmapName, item2->tmapName) == 0;
}
U_CDECL_END
/* Simple uppercase a string */
static char *toUpperStr(char *str) {
char *origStr = str;
while (*str) {
*str = toupper(*str);
str++;
}
return origStr;
}
/* Trim off all line endings. */
static char *trim(char *str) {
int32_t lastIdx = (int32_t)(strlen(str) - 1);
while (lastIdx > 0 && (str[lastIdx] == '\r' || str[lastIdx] == '\n')) {
str[lastIdx--] = 0;
}
return str;
}
FilenameMappingHistory::FilenameMappingHistory() :
throwAway(U_ZERO_ERROR),
sortedByResult(NULL, compareByUTS22, throwAway),
origFilename(NULL)
{
}
FilenameMappingHistory::~FilenameMappingHistory() {
if (origFilename) {
free(origFilename);
}
}
void FilenameMappingHistory::addItem(UVector *vect, FilenameMapping *key, UErrorCode *status) {
vect->addElement(key, *status);
hashByRmap.put(key->rmapName, vect, *status);
sortedByResult.sortedInsert((void*)key, sortByUTS22, *status);
}
FilenameMappingHistory *FilenameMappingHistory::create(const char *filename, UErrorCode *status) {
FilenameMappingHistory *retVal = new FilenameMappingHistory();
FILE *file = fopen(filename, "r");
int32_t lineNum = 1;
char line[1024];
char *str, *prevStr;
char *itemStr[ITEM_COUNT];
if (file == NULL) {
*status = U_FILE_ACCESS_ERROR;
return NULL;
}
retVal->origFilename = strdup(filename);
while (fgets(line, sizeof(line), file) != NULL) {
*status = U_ZERO_ERROR;
trim(line);
if (line[0] != '#' && line[0] != 0) {
prevStr = line;
str = prevStr;
for (int32_t idx = ITEM_UTS22_NAME; idx < ITEM_COUNT; idx++) {
str = strchr(str, ',');
// Make sure we parse the last field. The comma is in between fields.
if (idx < ITEM_COUNT-1) {
if (str == NULL) {
fprintf(stderr, "Parse error for history file on line %d", lineNum);
*status = U_PARSE_ERROR;
return NULL;
}
str[0] = 0;
}
//fprintf(stderr, "%s", prevStr);
itemStr[idx] = prevStr;
if (idx < ITEM_COUNT-1) {
// Get ready to parse the next item on the line.
str++;
prevStr = str;
}
}
//fprintf(stderr, "%s,%s,%s\n", itemStr[ITEM_UTS22_NAME], itemStr[ITEM_RMAP_NAME], itemStr[ITEM_TMAP_NAME]);
FilenameMapping *item = new FilenameMapping(itemStr[ITEM_UTS22_NAME],
toUpperStr(itemStr[ITEM_RMAP_NAME]),
toUpperStr(itemStr[ITEM_TMAP_NAME]));
const UHashElement *elem = retVal->hashByRmap.find(itemStr[ITEM_RMAP_NAME]);
UVector *vect;
if (elem == NULL) {
// New mapping
vect = new UVector(freeFilenameMapping, compareFilenameMapping, *status);
}
else {
// Mapping conflict for the RPMAP. We will have to be careful about this in the future.
vect = (UVector*)(elem->value.pointer);
if (vect->contains(item)) {
fprintf(stderr, "Duplicate R?MAP/T?MAP combination in history file on line %d\n", lineNum);
*status = U_PARSE_ERROR;
return NULL;
}
}
if (retVal->sortedByResult.indexOf(item) >= 0) {
fprintf(stderr, "Duplicate result in history file on line %d\n", lineNum);
*status = U_PARSE_ERROR;
return NULL;
}
retVal->addItem(vect, item, status);
}
lineNum++;
}
fclose(file);
return retVal;
}
void FilenameMappingHistory::writeHistoryFile(UErrorCode *status) {
int32_t pos = -1;
FILE *file = fopen(origFilename, "w");
if (file == NULL) {
*status = U_FILE_ACCESS_ERROR;
return;
}
fprintf(file, "# This file was machine generated by the rptp2ucm tool\n");
for (int32_t idx = 0; idx < sortedByResult.size(); idx++) {
FilenameMapping *item = (FilenameMapping *)sortedByResult.elementAt(idx);
fprintf(file, "%s,%s,%s\n", item->uts22Name, item->rmapName, item->tmapName);
}
fclose(file);
}
U_CFUNC UBool getCCSIDValues(uint32_t value, uint16_t *unicodeCCSID, uint16_t *ccsid) {
*unicodeCCSID = 0;
*ccsid = 0;
/* is this really a Unicode conversion table? - get the CCSID */
*unicodeCCSID=value&0xffff;
if(*unicodeCCSID==13488
|| *unicodeCCSID==17584)
{
*ccsid = (uint16_t)(value>>16);
}
else {
*unicodeCCSID=value>>16;
if(*unicodeCCSID==13488 /* Unicode 2.0, UTF-16BE with IBM PUA */
|| *unicodeCCSID==17584 /* Unicode 3.0, UTF-16BE with IBM PUA */
|| *unicodeCCSID==1200 /* UTF-16BE with IBM PUA */
|| *unicodeCCSID==1232 /* UTF-32BE with IBM PUA */
|| *unicodeCCSID==21680 /* Unicode 4.0, UTF-16BE with IBM PUA */
|| *unicodeCCSID==61956 /* UTF-16BE with Microsoft HKSCS-Big 5 PUA */
)
{
*ccsid = (uint16_t)(value&0xffff);
} else {
return FALSE;
}
}
return TRUE;
}
static const char *getUnicodeSuffix(uint16_t unicode) {
switch (unicode) {
case 13488:
return "_U2"; /* Unicode 2.0 */
case 17584:
return "_U3"; /* Unicode 3.0 */
case 21680:
return "_U4"; /* Unicode 4.0 */
/*case 25776:
return "_U4.1";*/ /* Not used */
case 61956:
return "_MS"; /* Microsoft PUA extensions */
}
return "";
}
static char *generateFileName(const char *rpmapFilename, const char *tpmapFilename, uint16_t year, UBool useOldFormat) {
char filename[1024];
char *s = NULL;
uint32_t value = strtoul(rpmapFilename, &s, 16);
uint16_t unicode, ccsid;
int32_t length;
getCCSIDValues(value, &unicode, &ccsid);
length=sprintf(filename, "ibm-%u_", ccsid);
filename[length++]=toupper(rpmapFilename[10]); /* P or X */
filename[length++]=toupper(rpmapFilename[14]); /* last 3 suffix characters */
filename[length++]=toupper(rpmapFilename[15]);
filename[length++]=toupper(rpmapFilename[16]);
if (!useOldFormat) {
filename[length++]='_';
filename[length++]=toupper(tpmapFilename[10]); /* P or X */
filename[length++]=toupper(tpmapFilename[14]); /* last 3 suffix characters */
filename[length++]=toupper(tpmapFilename[15]);
filename[length++]=toupper(tpmapFilename[16]);
}
length+=sprintf(filename+length, "-%d", year);
if (!useOldFormat) {
strcat(filename, getUnicodeSuffix(unicode));
}
strcat(filename, ".ucm");
return strdup(filename);
}
const char *FilenameMappingHistory::getFilename(const char *rmapFilename, const char *tmapFilename, uint16_t year, UErrorCode *status) {
const char *retVal = NULL;
UVector *vect = NULL;
UBool useOldNameFormat = TRUE;
int idx;
const UHashElement *elem;
char *rmapFilenameDup = toUpperStr(strdup(rmapFilename));
char *tmapFilenameDup = toUpperStr(strdup(tmapFilename));
rmapFilename = rmapFilenameDup;
tmapFilename = tmapFilenameDup;
elem = hashByRmap.find(rmapFilename);
if (elem != NULL) {
FilenameMapping tempVal("", rmapFilename, tmapFilename);
// We already know about this mapping table. Get the old value.
vect = (UVector*)(elem->value.pointer);
idx = vect->indexOf(&tempVal);
if (idx >= 0) {
FilenameMapping *prevItem = (FilenameMapping*)vect->elementAt(idx);
retVal = prevItem->uts22Name;
free(rmapFilenameDup);
free(tmapFilenameDup);
return retVal;
}
// else More than one TMAP is available.
// This RPMAP has multiple choices, and it's new.
useOldNameFormat = FALSE;
}
else {
// New mapping table. Store information for future reference.
vect = new UVector(freeFilenameMapping, compareFilenameMapping, *status);
}
// We didn't find this name. Make up a new one.
char *fileNameDup = generateFileName(rmapFilename, tmapFilename, year, useOldNameFormat);
FilenameMapping *item = new FilenameMapping(fileNameDup, rmapFilename, tmapFilename);
// Double check that we haven't generated this name in the past.
idx = sortedByResult.indexOf(item);
if (idx >= 0) {
FilenameMapping *foundItem = (FilenameMapping *)sortedByResult.elementAt(idx);
if (strcmp(foundItem->rmapName, rmapFilename) != 0 || strcmp(foundItem->tmapName, tmapFilename) != 0) {
free(fileNameDup); // Another Unicode CCSID conflicts with this table, or some other conflict.
useOldNameFormat = FALSE;
fileNameDup = generateFileName(rmapFilename, tmapFilename, year, useOldNameFormat);
item->uts22Name = fileNameDup;
}
}
else {
free(fileNameDup); // filename was already copied.
}
addItem(vect, item, status);
free(rmapFilenameDup);
free(tmapFilenameDup);
return retVal;
}