blob: 92d7c8cfde13aba5b659d20d8ae1d15e614bfc14 [file] [log] [blame]
diff --git a/source/common/locid.cpp b/source/common/locid.cpp
index feadbcbc..4733af56 100644
--- a/source/common/locid.cpp
+++ b/source/common/locid.cpp
@@ -627,6 +627,17 @@ private:
LocalMemory<const char*>& types,
LocalMemory<int32_t>& replacementIndexes,
int32_t &length, UErrorCode &status);
+
+ // Read the subdivisionAlias data from alias to
+ // strings+types+replacementIndexes
+ // Allocate length items for types, to store the type field.
+ // Allocate length items for replacementIndexes,
+ // to store the index in the strings for the replacement variant.
+ void readSubdivisionAlias(UResourceBundle* alias,
+ UniqueCharStrings* strings,
+ LocalMemory<const char*>& types,
+ LocalMemory<int32_t>& replacementIndexes,
+ int32_t &length, UErrorCode &status);
};
/**
@@ -647,6 +658,7 @@ public:
const CharStringMap& scriptMap() const { return script; }
const CharStringMap& territoryMap() const { return territory; }
const CharStringMap& variantMap() const { return variant; }
+ const CharStringMap& subdivisionMap() const { return subdivision; }
static void U_CALLCONV loadData(UErrorCode &status);
static UBool U_CALLCONV cleanup();
@@ -658,11 +670,13 @@ private:
CharStringMap scriptMap,
CharStringMap territoryMap,
CharStringMap variantMap,
+ CharStringMap subdivisionMap,
CharString* strings)
: language(std::move(languageMap)),
script(std::move(scriptMap)),
territory(std::move(territoryMap)),
variant(std::move(variantMap)),
+ subdivision(std::move(subdivisionMap)),
strings(strings) {
}
@@ -676,6 +690,7 @@ private:
CharStringMap script;
CharStringMap territory;
CharStringMap variant;
+ CharStringMap subdivision;
CharString* strings;
friend class AliasDataBuilder;
@@ -866,6 +881,34 @@ AliasDataBuilder::readVariantAlias(
status);
}
+/**
+ * Read the subdivisionAlias data from alias to strings+types+replacementIndexes.
+ * Allocate length items for types, to store the type field. Allocate length
+ * items for replacementIndexes, to store the index in the strings for the
+ * replacement regions.
+ */
+void
+AliasDataBuilder::readSubdivisionAlias(
+ UResourceBundle* alias,
+ UniqueCharStrings* strings,
+ LocalMemory<const char*>& types,
+ LocalMemory<int32_t>& replacementIndexes,
+ int32_t &length,
+ UErrorCode &status)
+{
+ return readAlias(
+ alias, strings, types, replacementIndexes, length,
+#if U_DEBUG
+ [](const char* type) {
+ U_ASSERT(uprv_strlen(type) >= 3 && uprv_strlen(type) <= 8);
+ },
+#else
+ [](const char*) {},
+#endif
+ [](const UnicodeString&) { },
+ status);
+}
+
/**
* Initializes the alias data from the ICU resource bundles. The alias data
* contains alias of language, country, script and variants.
@@ -905,12 +948,14 @@ AliasDataBuilder::build(UErrorCode &status) {
ures_getByKey(metadataAlias.getAlias(), "territory", nullptr, &status));
LocalUResourceBundlePointer variantAlias(
ures_getByKey(metadataAlias.getAlias(), "variant", nullptr, &status));
+ LocalUResourceBundlePointer subdivisionAlias(
+ ures_getByKey(metadataAlias.getAlias(), "subdivision", nullptr, &status));
if (U_FAILURE(status)) {
return nullptr;
}
int32_t languagesLength = 0, scriptLength = 0, territoryLength = 0,
- variantLength = 0;
+ variantLength = 0, subdivisionLength = 0;
// Read the languageAlias into languageTypes, languageReplacementIndexes
// and strings
@@ -955,6 +1000,16 @@ AliasDataBuilder::build(UErrorCode &status) {
variantReplacementIndexes,
variantLength, status);
+ // Read the subdivisionAlias into subdivisionTypes, subdivisionReplacementIndexes
+ // and strings
+ LocalMemory<const char*> subdivisionTypes;
+ LocalMemory<int32_t> subdivisionReplacementIndexes;
+ readSubdivisionAlias(subdivisionAlias.getAlias(),
+ &strings,
+ subdivisionTypes,
+ subdivisionReplacementIndexes,
+ subdivisionLength, status);
+
if (U_FAILURE(status)) {
return nullptr;
}
@@ -994,6 +1049,14 @@ AliasDataBuilder::build(UErrorCode &status) {
status);
}
+ // Build the subdivisionMap from subdivisionTypes & subdivisionReplacementIndexes.
+ CharStringMap subdivisionMap(2, status);
+ for (int32_t i = 0; U_SUCCESS(status) && i < subdivisionLength; i++) {
+ subdivisionMap.put(subdivisionTypes[i],
+ strings.get(subdivisionReplacementIndexes[i]),
+ status);
+ }
+
if (U_FAILURE(status)) {
return nullptr;
}
@@ -1004,6 +1067,7 @@ AliasDataBuilder::build(UErrorCode &status) {
std::move(scriptMap),
std::move(territoryMap),
std::move(variantMap),
+ std::move(subdivisionMap),
strings.orphanCharStrings());
if (data == nullptr) {
@@ -1105,6 +1169,14 @@ private:
// Replace by using variantAlias.
bool replaceVariant(UErrorCode& status);
+
+ // Replace by using subdivisionAlias.
+ bool replaceSubdivision(StringPiece subdivision,
+ CharString& output, UErrorCode& status);
+
+ // Replace transformed extensions.
+ bool replaceTransformedExtensions(
+ CharString& transformedExtensions, CharString& output, UErrorCode& status);
};
CharString&
@@ -1433,6 +1505,99 @@ AliasReplacer::replaceVariant(UErrorCode& status)
return false;
}
+bool
+AliasReplacer::replaceSubdivision(
+ StringPiece subdivision, CharString& output, UErrorCode& status)
+{
+ if (U_FAILURE(status)) {
+ return false;
+ }
+ const char *replacement = data->subdivisionMap().get(subdivision.data());
+ if (replacement != nullptr) {
+ const char* firstSpace = uprv_strchr(replacement, ' ');
+ // Found replacement data for this subdivision.
+ size_t len = (firstSpace != nullptr) ?
+ (firstSpace - replacement) : uprv_strlen(replacement);
+ // Ignore len == 2, see CLDR-14312
+ if (3 <= len && len <= 8) {
+ output.append(replacement, (int32_t)len, status);
+ }
+ return true;
+ }
+ return false;
+}
+
+bool
+AliasReplacer::replaceTransformedExtensions(
+ CharString& transformedExtensions, CharString& output, UErrorCode& status)
+{
+ // The content of the transformedExtensions will be modified in this
+ // function to NULL-terminating (tkey-tvalue) pairs.
+ if (U_FAILURE(status)) {
+ return false;
+ }
+ int32_t len = transformedExtensions.length();
+ const char* str = transformedExtensions.data();
+ const char* tkey = ultag_getTKeyStart(str);
+ int32_t tlangLen = (tkey == str) ? 0 :
+ ((tkey == nullptr) ? len : (tkey - str - 1));
+ CharStringByteSink sink(&output);
+ if (tlangLen > 0) {
+ Locale tlang = LocaleBuilder()
+ .setLanguageTag(StringPiece(str, tlangLen))
+ .build(status);
+ tlang.canonicalize(status);
+ tlang.toLanguageTag(sink, status);
+ if (U_FAILURE(status)) {
+ return false;
+ }
+ T_CString_toLowerCase(output.data());
+ }
+ if (tkey != nullptr) {
+ // We need to sort the tfields by tkey
+ UVector tfields(status);
+ if (U_FAILURE(status)) {
+ return false;
+ }
+ do {
+ const char* tvalue = uprv_strchr(tkey, '-');
+ if (tvalue == nullptr) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+ const char* nextTKey = ultag_getTKeyStart(tvalue);
+ if (nextTKey != nullptr) {
+ *((char*)(nextTKey-1)) = '\0'; // NULL terminate tvalue
+ }
+ tfields.insertElementAt((void*)tkey, tfields.size(), status);
+ if (U_FAILURE(status)) {
+ return false;
+ }
+ tkey = nextTKey;
+ } while (tkey != nullptr);
+ tfields.sort([](UElement e1, UElement e2) -> int8_t {
+ return uprv_strcmp(
+ (const char*)e1.pointer, (const char*)e2.pointer);
+ }, status);
+ for (int32_t i = 0; i < tfields.size(); i++) {
+ if (output.length() > 0) {
+ output.append('-', status);
+ }
+ const char* tfield = (const char*) tfields.elementAt(i);
+ const char* tvalue = uprv_strchr(tfield, '-');
+ // Split the "tkey-tvalue" pair string so that we can canonicalize the tvalue.
+ U_ASSERT(tvalue != nullptr);
+ *((char*)tvalue++) = '\0'; // NULL terminate tkey
+ output.append(tfield, status).append('-', status);
+ const char* bcpTValue = ulocimp_toBcpType(tfield, tvalue, nullptr, nullptr);
+ output.append((bcpTValue == nullptr) ? tvalue : bcpTValue, status);
+ }
+ }
+ if (U_FAILURE(status)) {
+ return false;
+ }
+ return true;
+}
+
CharString&
AliasReplacer::outputToString(
CharString& out, UErrorCode status)
@@ -1495,7 +1660,6 @@ AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode status)
region = nullptr;
}
const char* variantsStr = locale.getVariant();
- const char* extensionsStr = locale_getKeywordsStart(locale.getName());
CharString variantsBuff(variantsStr, -1, status);
if (!variantsBuff.isEmpty()) {
if (U_FAILURE(status)) { return false; }
@@ -1559,11 +1723,52 @@ AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode status)
if (U_FAILURE(status)) { return false; }
// Nothing changed and we know the order of the vaiants are not change
// because we have no variant or only one.
- if (changed == 0 && variants.size() <= 1) {
+ const char* extensionsStr = locale_getKeywordsStart(locale.getName());
+ if (changed == 0 && variants.size() <= 1 && extensionsStr == nullptr) {
return false;
}
outputToString(out, status);
+ if (U_FAILURE(status)) {
+ return false;
+ }
if (extensionsStr != nullptr) {
+ changed = 0;
+ Locale temp(locale);
+ LocalPointer<icu::StringEnumeration> iter(locale.createKeywords(status));
+ if (U_SUCCESS(status) && !iter.isNull()) {
+ const char* key;
+ while ((key = iter->next(nullptr, status)) != nullptr) {
+ if (uprv_strcmp("sd", key) == 0 || uprv_strcmp("rg", key) == 0 ||
+ uprv_strcmp("t", key) == 0) {
+ CharString value;
+ CharStringByteSink valueSink(&value);
+ locale.getKeywordValue(key, valueSink, status);
+ if (U_FAILURE(status)) {
+ status = U_ZERO_ERROR;
+ continue;
+ }
+ CharString replacement;
+ if (uprv_strlen(key) == 2) {
+ if (replaceSubdivision(value.toStringPiece(), replacement, status)) {
+ changed++;
+ temp.setKeywordValue(key, replacement.data(), status);
+ }
+ } else {
+ U_ASSERT(uprv_strcmp(key, "t") == 0);
+ if (replaceTransformedExtensions(value, replacement, status)) {
+ changed++;
+ temp.setKeywordValue(key, replacement.data(), status);
+ }
+ }
+ if (U_FAILURE(status)) {
+ return false;
+ }
+ }
+ }
+ }
+ if (changed != 0) {
+ extensionsStr = locale_getKeywordsStart(temp.getName());
+ }
out.append(extensionsStr, status);
}
if (U_FAILURE(status)) {
@@ -1571,8 +1776,6 @@ AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode status)
}
// If the tag is not changed, return.
if (uprv_strcmp(out.data(), locale.getName()) == 0) {
- U_ASSERT(changed == 0);
- U_ASSERT(variants.size() > 1);
out.clear();
return false;
}
diff --git a/source/common/loclikelysubtags.cpp b/source/common/loclikelysubtags.cpp
index a031bfa5..aa592e6e 100644
--- a/source/common/loclikelysubtags.cpp
+++ b/source/common/loclikelysubtags.cpp
@@ -320,7 +320,8 @@ XLikelySubtags::~XLikelySubtags() {
LSR XLikelySubtags::makeMaximizedLsrFrom(const Locale &locale, UErrorCode &errorCode) const {
const char *name = locale.getName();
if (uprv_isAtSign(name[0]) && name[1] == 'x' && name[2] == '=') { // name.startsWith("@x=")
- // Private use language tag x-subtag-subtag...
+ // Private use language tag x-subtag-subtag... which CLDR changes to
+ // und-x-subtag-subtag...
return LSR(name, "", "", LSR::EXPLICIT_LSR);
}
return makeMaximizedLsr(locale.getLanguage(), locale.getScript(), locale.getCountry(),
diff --git a/source/common/uloc_tag.cpp b/source/common/uloc_tag.cpp
index 7f7fd911..ee1cedf3 100644
--- a/source/common/uloc_tag.cpp
+++ b/source/common/uloc_tag.cpp
@@ -129,7 +129,6 @@ static const char* const LEGACY[] = {
// Legacy tags with no preferred value in the IANA
// registry. Kept for now for the backward compatibility
// because ICU has mapped them this way.
- "cel-gaulish", "xtg-x-cel-gaulish",
"i-default", "en-x-i-default",
"i-enochian", "und-x-i-enochian",
"i-mingo", "see-x-i-mingo",
@@ -647,6 +646,22 @@ _isTKey(const char* s, int32_t len)
return FALSE;
}
+U_CAPI const char * U_EXPORT2
+ultag_getTKeyStart(const char *localeID) {
+ const char *result = localeID;
+ const char *sep;
+ while((sep = uprv_strchr(result, SEP)) != nullptr) {
+ if (_isTKey(result, sep - result)) {
+ return result;
+ }
+ result = ++sep;
+ }
+ if (_isTKey(result, -1)) {
+ return result;
+ }
+ return nullptr;
+}
+
static UBool
_isTValue(const char* s, int32_t len)
{
@@ -671,9 +686,13 @@ _isTransformedExtensionSubtag(int32_t& state, const char* s, int32_t len)
const int32_t kGotTKey = -1; // Got tkey, wait for tvalue. ERROR if stop here.
const int32_t kGotTValue = 6; // Got tvalue, wait for tkey, tvalue or end
+
+ if (len < 0) {
+ len = (int32_t)uprv_strlen(s);
+ }
switch (state) {
case kStart:
- if (ultag_isLanguageSubtag(s, len)) {
+ if (ultag_isLanguageSubtag(s, len) && len != 4) {
state = kGotLanguage;
return TRUE;
}
@@ -1775,11 +1794,6 @@ _appendKeywords(ULanguageTag* langtag, icu::ByteSink& sink, UErrorCode* status)
return;
}
- /* Determine if variants already exists */
- if (ultag_getVariantsSize(langtag)) {
- posixVariant = TRUE;
- }
-
n = ultag_getExtensionsSize(langtag);
/* resolve locale keywords and reordering keys */
@@ -1787,6 +1801,11 @@ _appendKeywords(ULanguageTag* langtag, icu::ByteSink& sink, UErrorCode* status)
key = ultag_getExtensionKey(langtag, i);
type = ultag_getExtensionValue(langtag, i);
if (*key == LDMLEXT) {
+ /* Determine if variants already exists */
+ if (ultag_getVariantsSize(langtag)) {
+ posixVariant = TRUE;
+ }
+
_appendLDMLExtensionAsKeywords(type, &kwdFirst, extPool, kwdBuf, &posixVariant, status);
if (U_FAILURE(*status)) {
break;
@@ -2692,8 +2711,7 @@ ulocimp_toLanguageTag(const char* localeID,
if (U_SUCCESS(tmpStatus)) {
if (ultag_isPrivateuseValueSubtags(buf.data(), buf.length())) {
/* return private use only tag */
- static const char PREFIX[] = { PRIVATEUSE, SEP };
- sink.Append(PREFIX, sizeof(PREFIX));
+ sink.Append("und-x-", 6);
sink.Append(buf.data(), buf.length());
done = TRUE;
} else if (strict) {
diff --git a/source/common/ulocimp.h b/source/common/ulocimp.h
index 5691fe9a..1f796aa2 100644
--- a/source/common/ulocimp.h
+++ b/source/common/ulocimp.h
@@ -286,6 +286,9 @@ ultag_isUnicodeLocaleType(const char* s, int32_t len);
U_CFUNC UBool
ultag_isVariantSubtags(const char* s, int32_t len);
+U_CAPI const char * U_EXPORT2
+ultag_getTKeyStart(const char *localeID);
+
U_CFUNC const char*
ulocimp_toBcpKey(const char* key);