| diff --git a/source/common/locid.cpp b/source/common/locid.cpp |
| index feadbcbc..4733af56 100644 |
| --- a/source/common/locid.cpp |
| +++ b/source/common/locid.cpp |
| @@ -627,6 +627,17 @@ private: |
| LocalMemory<const char*>& types, |
| LocalMemory<int32_t>& replacementIndexes, |
| int32_t &length, UErrorCode &status); |
| + |
| + // Read the subdivisionAlias data from alias to |
| + // strings+types+replacementIndexes |
| + // Allocate length items for types, to store the type field. |
| + // Allocate length items for replacementIndexes, |
| + // to store the index in the strings for the replacement variant. |
| + void readSubdivisionAlias(UResourceBundle* alias, |
| + UniqueCharStrings* strings, |
| + LocalMemory<const char*>& types, |
| + LocalMemory<int32_t>& replacementIndexes, |
| + int32_t &length, UErrorCode &status); |
| }; |
| |
| /** |
| @@ -647,6 +658,7 @@ public: |
| const CharStringMap& scriptMap() const { return script; } |
| const CharStringMap& territoryMap() const { return territory; } |
| const CharStringMap& variantMap() const { return variant; } |
| + const CharStringMap& subdivisionMap() const { return subdivision; } |
| |
| static void U_CALLCONV loadData(UErrorCode &status); |
| static UBool U_CALLCONV cleanup(); |
| @@ -658,11 +670,13 @@ private: |
| CharStringMap scriptMap, |
| CharStringMap territoryMap, |
| CharStringMap variantMap, |
| + CharStringMap subdivisionMap, |
| CharString* strings) |
| : language(std::move(languageMap)), |
| script(std::move(scriptMap)), |
| territory(std::move(territoryMap)), |
| variant(std::move(variantMap)), |
| + subdivision(std::move(subdivisionMap)), |
| strings(strings) { |
| } |
| |
| @@ -676,6 +690,7 @@ private: |
| CharStringMap script; |
| CharStringMap territory; |
| CharStringMap variant; |
| + CharStringMap subdivision; |
| CharString* strings; |
| |
| friend class AliasDataBuilder; |
| @@ -866,6 +881,34 @@ AliasDataBuilder::readVariantAlias( |
| status); |
| } |
| |
| +/** |
| + * Read the subdivisionAlias data from alias to strings+types+replacementIndexes. |
| + * Allocate length items for types, to store the type field. Allocate length |
| + * items for replacementIndexes, to store the index in the strings for the |
| + * replacement regions. |
| + */ |
| +void |
| +AliasDataBuilder::readSubdivisionAlias( |
| + UResourceBundle* alias, |
| + UniqueCharStrings* strings, |
| + LocalMemory<const char*>& types, |
| + LocalMemory<int32_t>& replacementIndexes, |
| + int32_t &length, |
| + UErrorCode &status) |
| +{ |
| + return readAlias( |
| + alias, strings, types, replacementIndexes, length, |
| +#if U_DEBUG |
| + [](const char* type) { |
| + U_ASSERT(uprv_strlen(type) >= 3 && uprv_strlen(type) <= 8); |
| + }, |
| +#else |
| + [](const char*) {}, |
| +#endif |
| + [](const UnicodeString&) { }, |
| + status); |
| +} |
| + |
| /** |
| * Initializes the alias data from the ICU resource bundles. The alias data |
| * contains alias of language, country, script and variants. |
| @@ -905,12 +948,14 @@ AliasDataBuilder::build(UErrorCode &status) { |
| ures_getByKey(metadataAlias.getAlias(), "territory", nullptr, &status)); |
| LocalUResourceBundlePointer variantAlias( |
| ures_getByKey(metadataAlias.getAlias(), "variant", nullptr, &status)); |
| + LocalUResourceBundlePointer subdivisionAlias( |
| + ures_getByKey(metadataAlias.getAlias(), "subdivision", nullptr, &status)); |
| |
| if (U_FAILURE(status)) { |
| return nullptr; |
| } |
| int32_t languagesLength = 0, scriptLength = 0, territoryLength = 0, |
| - variantLength = 0; |
| + variantLength = 0, subdivisionLength = 0; |
| |
| // Read the languageAlias into languageTypes, languageReplacementIndexes |
| // and strings |
| @@ -955,6 +1000,16 @@ AliasDataBuilder::build(UErrorCode &status) { |
| variantReplacementIndexes, |
| variantLength, status); |
| |
| + // Read the subdivisionAlias into subdivisionTypes, subdivisionReplacementIndexes |
| + // and strings |
| + LocalMemory<const char*> subdivisionTypes; |
| + LocalMemory<int32_t> subdivisionReplacementIndexes; |
| + readSubdivisionAlias(subdivisionAlias.getAlias(), |
| + &strings, |
| + subdivisionTypes, |
| + subdivisionReplacementIndexes, |
| + subdivisionLength, status); |
| + |
| if (U_FAILURE(status)) { |
| return nullptr; |
| } |
| @@ -994,6 +1049,14 @@ AliasDataBuilder::build(UErrorCode &status) { |
| status); |
| } |
| |
| + // Build the subdivisionMap from subdivisionTypes & subdivisionReplacementIndexes. |
| + CharStringMap subdivisionMap(2, status); |
| + for (int32_t i = 0; U_SUCCESS(status) && i < subdivisionLength; i++) { |
| + subdivisionMap.put(subdivisionTypes[i], |
| + strings.get(subdivisionReplacementIndexes[i]), |
| + status); |
| + } |
| + |
| if (U_FAILURE(status)) { |
| return nullptr; |
| } |
| @@ -1004,6 +1067,7 @@ AliasDataBuilder::build(UErrorCode &status) { |
| std::move(scriptMap), |
| std::move(territoryMap), |
| std::move(variantMap), |
| + std::move(subdivisionMap), |
| strings.orphanCharStrings()); |
| |
| if (data == nullptr) { |
| @@ -1105,6 +1169,14 @@ private: |
| |
| // Replace by using variantAlias. |
| bool replaceVariant(UErrorCode& status); |
| + |
| + // Replace by using subdivisionAlias. |
| + bool replaceSubdivision(StringPiece subdivision, |
| + CharString& output, UErrorCode& status); |
| + |
| + // Replace transformed extensions. |
| + bool replaceTransformedExtensions( |
| + CharString& transformedExtensions, CharString& output, UErrorCode& status); |
| }; |
| |
| CharString& |
| @@ -1433,6 +1505,99 @@ AliasReplacer::replaceVariant(UErrorCode& status) |
| return false; |
| } |
| |
| +bool |
| +AliasReplacer::replaceSubdivision( |
| + StringPiece subdivision, CharString& output, UErrorCode& status) |
| +{ |
| + if (U_FAILURE(status)) { |
| + return false; |
| + } |
| + const char *replacement = data->subdivisionMap().get(subdivision.data()); |
| + if (replacement != nullptr) { |
| + const char* firstSpace = uprv_strchr(replacement, ' '); |
| + // Found replacement data for this subdivision. |
| + size_t len = (firstSpace != nullptr) ? |
| + (firstSpace - replacement) : uprv_strlen(replacement); |
| + // Ignore len == 2, see CLDR-14312 |
| + if (3 <= len && len <= 8) { |
| + output.append(replacement, (int32_t)len, status); |
| + } |
| + return true; |
| + } |
| + return false; |
| +} |
| + |
| +bool |
| +AliasReplacer::replaceTransformedExtensions( |
| + CharString& transformedExtensions, CharString& output, UErrorCode& status) |
| +{ |
| + // The content of the transformedExtensions will be modified in this |
| + // function to NULL-terminating (tkey-tvalue) pairs. |
| + if (U_FAILURE(status)) { |
| + return false; |
| + } |
| + int32_t len = transformedExtensions.length(); |
| + const char* str = transformedExtensions.data(); |
| + const char* tkey = ultag_getTKeyStart(str); |
| + int32_t tlangLen = (tkey == str) ? 0 : |
| + ((tkey == nullptr) ? len : (tkey - str - 1)); |
| + CharStringByteSink sink(&output); |
| + if (tlangLen > 0) { |
| + Locale tlang = LocaleBuilder() |
| + .setLanguageTag(StringPiece(str, tlangLen)) |
| + .build(status); |
| + tlang.canonicalize(status); |
| + tlang.toLanguageTag(sink, status); |
| + if (U_FAILURE(status)) { |
| + return false; |
| + } |
| + T_CString_toLowerCase(output.data()); |
| + } |
| + if (tkey != nullptr) { |
| + // We need to sort the tfields by tkey |
| + UVector tfields(status); |
| + if (U_FAILURE(status)) { |
| + return false; |
| + } |
| + do { |
| + const char* tvalue = uprv_strchr(tkey, '-'); |
| + if (tvalue == nullptr) { |
| + status = U_ILLEGAL_ARGUMENT_ERROR; |
| + } |
| + const char* nextTKey = ultag_getTKeyStart(tvalue); |
| + if (nextTKey != nullptr) { |
| + *((char*)(nextTKey-1)) = '\0'; // NULL terminate tvalue |
| + } |
| + tfields.insertElementAt((void*)tkey, tfields.size(), status); |
| + if (U_FAILURE(status)) { |
| + return false; |
| + } |
| + tkey = nextTKey; |
| + } while (tkey != nullptr); |
| + tfields.sort([](UElement e1, UElement e2) -> int8_t { |
| + return uprv_strcmp( |
| + (const char*)e1.pointer, (const char*)e2.pointer); |
| + }, status); |
| + for (int32_t i = 0; i < tfields.size(); i++) { |
| + if (output.length() > 0) { |
| + output.append('-', status); |
| + } |
| + const char* tfield = (const char*) tfields.elementAt(i); |
| + const char* tvalue = uprv_strchr(tfield, '-'); |
| + // Split the "tkey-tvalue" pair string so that we can canonicalize the tvalue. |
| + U_ASSERT(tvalue != nullptr); |
| + *((char*)tvalue++) = '\0'; // NULL terminate tkey |
| + output.append(tfield, status).append('-', status); |
| + const char* bcpTValue = ulocimp_toBcpType(tfield, tvalue, nullptr, nullptr); |
| + output.append((bcpTValue == nullptr) ? tvalue : bcpTValue, status); |
| + } |
| + } |
| + if (U_FAILURE(status)) { |
| + return false; |
| + } |
| + return true; |
| +} |
| + |
| CharString& |
| AliasReplacer::outputToString( |
| CharString& out, UErrorCode status) |
| @@ -1495,7 +1660,6 @@ AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode status) |
| region = nullptr; |
| } |
| const char* variantsStr = locale.getVariant(); |
| - const char* extensionsStr = locale_getKeywordsStart(locale.getName()); |
| CharString variantsBuff(variantsStr, -1, status); |
| if (!variantsBuff.isEmpty()) { |
| if (U_FAILURE(status)) { return false; } |
| @@ -1559,11 +1723,52 @@ AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode status) |
| if (U_FAILURE(status)) { return false; } |
| // Nothing changed and we know the order of the vaiants are not change |
| // because we have no variant or only one. |
| - if (changed == 0 && variants.size() <= 1) { |
| + const char* extensionsStr = locale_getKeywordsStart(locale.getName()); |
| + if (changed == 0 && variants.size() <= 1 && extensionsStr == nullptr) { |
| return false; |
| } |
| outputToString(out, status); |
| + if (U_FAILURE(status)) { |
| + return false; |
| + } |
| if (extensionsStr != nullptr) { |
| + changed = 0; |
| + Locale temp(locale); |
| + LocalPointer<icu::StringEnumeration> iter(locale.createKeywords(status)); |
| + if (U_SUCCESS(status) && !iter.isNull()) { |
| + const char* key; |
| + while ((key = iter->next(nullptr, status)) != nullptr) { |
| + if (uprv_strcmp("sd", key) == 0 || uprv_strcmp("rg", key) == 0 || |
| + uprv_strcmp("t", key) == 0) { |
| + CharString value; |
| + CharStringByteSink valueSink(&value); |
| + locale.getKeywordValue(key, valueSink, status); |
| + if (U_FAILURE(status)) { |
| + status = U_ZERO_ERROR; |
| + continue; |
| + } |
| + CharString replacement; |
| + if (uprv_strlen(key) == 2) { |
| + if (replaceSubdivision(value.toStringPiece(), replacement, status)) { |
| + changed++; |
| + temp.setKeywordValue(key, replacement.data(), status); |
| + } |
| + } else { |
| + U_ASSERT(uprv_strcmp(key, "t") == 0); |
| + if (replaceTransformedExtensions(value, replacement, status)) { |
| + changed++; |
| + temp.setKeywordValue(key, replacement.data(), status); |
| + } |
| + } |
| + if (U_FAILURE(status)) { |
| + return false; |
| + } |
| + } |
| + } |
| + } |
| + if (changed != 0) { |
| + extensionsStr = locale_getKeywordsStart(temp.getName()); |
| + } |
| out.append(extensionsStr, status); |
| } |
| if (U_FAILURE(status)) { |
| @@ -1571,8 +1776,6 @@ AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode status) |
| } |
| // If the tag is not changed, return. |
| if (uprv_strcmp(out.data(), locale.getName()) == 0) { |
| - U_ASSERT(changed == 0); |
| - U_ASSERT(variants.size() > 1); |
| out.clear(); |
| return false; |
| } |
| diff --git a/source/common/loclikelysubtags.cpp b/source/common/loclikelysubtags.cpp |
| index a031bfa5..aa592e6e 100644 |
| --- a/source/common/loclikelysubtags.cpp |
| +++ b/source/common/loclikelysubtags.cpp |
| @@ -320,7 +320,8 @@ XLikelySubtags::~XLikelySubtags() { |
| LSR XLikelySubtags::makeMaximizedLsrFrom(const Locale &locale, UErrorCode &errorCode) const { |
| const char *name = locale.getName(); |
| if (uprv_isAtSign(name[0]) && name[1] == 'x' && name[2] == '=') { // name.startsWith("@x=") |
| - // Private use language tag x-subtag-subtag... |
| + // Private use language tag x-subtag-subtag... which CLDR changes to |
| + // und-x-subtag-subtag... |
| return LSR(name, "", "", LSR::EXPLICIT_LSR); |
| } |
| return makeMaximizedLsr(locale.getLanguage(), locale.getScript(), locale.getCountry(), |
| diff --git a/source/common/uloc_tag.cpp b/source/common/uloc_tag.cpp |
| index 7f7fd911..ee1cedf3 100644 |
| --- a/source/common/uloc_tag.cpp |
| +++ b/source/common/uloc_tag.cpp |
| @@ -129,7 +129,6 @@ static const char* const LEGACY[] = { |
| // Legacy tags with no preferred value in the IANA |
| // registry. Kept for now for the backward compatibility |
| // because ICU has mapped them this way. |
| - "cel-gaulish", "xtg-x-cel-gaulish", |
| "i-default", "en-x-i-default", |
| "i-enochian", "und-x-i-enochian", |
| "i-mingo", "see-x-i-mingo", |
| @@ -647,6 +646,22 @@ _isTKey(const char* s, int32_t len) |
| return FALSE; |
| } |
| |
| +U_CAPI const char * U_EXPORT2 |
| +ultag_getTKeyStart(const char *localeID) { |
| + const char *result = localeID; |
| + const char *sep; |
| + while((sep = uprv_strchr(result, SEP)) != nullptr) { |
| + if (_isTKey(result, sep - result)) { |
| + return result; |
| + } |
| + result = ++sep; |
| + } |
| + if (_isTKey(result, -1)) { |
| + return result; |
| + } |
| + return nullptr; |
| +} |
| + |
| static UBool |
| _isTValue(const char* s, int32_t len) |
| { |
| @@ -671,9 +686,13 @@ _isTransformedExtensionSubtag(int32_t& state, const char* s, int32_t len) |
| const int32_t kGotTKey = -1; // Got tkey, wait for tvalue. ERROR if stop here. |
| const int32_t kGotTValue = 6; // Got tvalue, wait for tkey, tvalue or end |
| |
| + |
| + if (len < 0) { |
| + len = (int32_t)uprv_strlen(s); |
| + } |
| switch (state) { |
| case kStart: |
| - if (ultag_isLanguageSubtag(s, len)) { |
| + if (ultag_isLanguageSubtag(s, len) && len != 4) { |
| state = kGotLanguage; |
| return TRUE; |
| } |
| @@ -1775,11 +1794,6 @@ _appendKeywords(ULanguageTag* langtag, icu::ByteSink& sink, UErrorCode* status) |
| return; |
| } |
| |
| - /* Determine if variants already exists */ |
| - if (ultag_getVariantsSize(langtag)) { |
| - posixVariant = TRUE; |
| - } |
| - |
| n = ultag_getExtensionsSize(langtag); |
| |
| /* resolve locale keywords and reordering keys */ |
| @@ -1787,6 +1801,11 @@ _appendKeywords(ULanguageTag* langtag, icu::ByteSink& sink, UErrorCode* status) |
| key = ultag_getExtensionKey(langtag, i); |
| type = ultag_getExtensionValue(langtag, i); |
| if (*key == LDMLEXT) { |
| + /* Determine if variants already exists */ |
| + if (ultag_getVariantsSize(langtag)) { |
| + posixVariant = TRUE; |
| + } |
| + |
| _appendLDMLExtensionAsKeywords(type, &kwdFirst, extPool, kwdBuf, &posixVariant, status); |
| if (U_FAILURE(*status)) { |
| break; |
| @@ -2692,8 +2711,7 @@ ulocimp_toLanguageTag(const char* localeID, |
| if (U_SUCCESS(tmpStatus)) { |
| if (ultag_isPrivateuseValueSubtags(buf.data(), buf.length())) { |
| /* return private use only tag */ |
| - static const char PREFIX[] = { PRIVATEUSE, SEP }; |
| - sink.Append(PREFIX, sizeof(PREFIX)); |
| + sink.Append("und-x-", 6); |
| sink.Append(buf.data(), buf.length()); |
| done = TRUE; |
| } else if (strict) { |
| diff --git a/source/common/ulocimp.h b/source/common/ulocimp.h |
| index 5691fe9a..1f796aa2 100644 |
| --- a/source/common/ulocimp.h |
| +++ b/source/common/ulocimp.h |
| @@ -286,6 +286,9 @@ ultag_isUnicodeLocaleType(const char* s, int32_t len); |
| U_CFUNC UBool |
| ultag_isVariantSubtags(const char* s, int32_t len); |
| |
| +U_CAPI const char * U_EXPORT2 |
| +ultag_getTKeyStart(const char *localeID); |
| + |
| U_CFUNC const char* |
| ulocimp_toBcpKey(const char* key); |
| |