Cherrypick the upstream PR 1140
ICU-13786 Fix addLikelySubtags/minimizeSubtags #1140
Bug: https://unicode-org.atlassian.net/browse/ICU-13786
PR: https://github.com/unicode-org/icu/pull/1140
Bug: v8:10448
Change-Id: Id5e8b0037bbe76941fa89f7564d49cf27a990b2e
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/deps/icu/+/2231777
Reviewed-by: Jungshik Shin <jshin@chromium.org>
diff --git a/README.chromium b/README.chromium
index a722026..91d3eb2 100644
--- a/README.chromium
+++ b/README.chromium
@@ -232,5 +232,9 @@
https://github.com/unicode-org/icu/pull/951 that use ures and caused
30ms startup performance regression.
- patches/turnOffNewCodeInLocaleCanonical.patch
- - upstream bug:
https://unicode-org.atlassian.net/browse/ICU-21139
+
+9. Fix Locale addLikelySubtag
+ - patches/localeAddLikely.patch
+ - upstream bug:
+ https://unicode-org.atlassian.net/browse/ICU-13786
diff --git a/patches/localeAddLikely.patch b/patches/localeAddLikely.patch
new file mode 100644
index 0000000..0c71c11
--- /dev/null
+++ b/patches/localeAddLikely.patch
@@ -0,0 +1,244 @@
+diff --git a/source/common/loclikely.cpp b/source/common/loclikely.cpp
+index a4a4181c..54c4a464 100644
+--- a/source/common/loclikely.cpp
++++ b/source/common/loclikely.cpp
+@@ -826,7 +826,7 @@ error:
+ } \
+ } UPRV_BLOCK_MACRO_END
+
+-static void
++static UBool
+ _uloc_addLikelySubtags(const char* localeID,
+ icu::ByteSink& sink,
+ UErrorCode* err) {
+@@ -897,15 +897,22 @@ _uloc_addLikelySubtags(const char* localeID,
+ sink.Append(localeID, localIDLength);
+ }
+
+- return;
++ return success;
+
+ error:
+
+ if (!U_FAILURE(*err)) {
+ *err = U_ILLEGAL_ARGUMENT_ERROR;
+ }
++ return FALSE;
+ }
+
++// Add likely subtags to the sink
++// return true if the value in the sink is produced by a match during the lookup
++// return false if the value in the sink is the same as input because there are
++// no match after the lookup.
++static UBool _ulocimp_addLikelySubtags(const char*, icu::ByteSink&, UErrorCode*);
++
+ static void
+ _uloc_minimizeSubtags(const char* localeID,
+ icu::ByteSink& sink,
+@@ -921,6 +928,7 @@ _uloc_minimizeSubtags(const char* localeID,
+ const char* trailing = "";
+ int32_t trailingLength = 0;
+ int32_t trailingIndex = 0;
++ UBool successGetMax = FALSE;
+
+ if(U_FAILURE(*err)) {
+ goto error;
+@@ -961,7 +969,7 @@ _uloc_minimizeSubtags(const char* localeID,
+ {
+ icu::CharString base;
+ {
+- icu::CharStringByteSink sink(&base);
++ icu::CharStringByteSink baseSink(&base);
+ createTagString(
+ lang,
+ langLength,
+@@ -971,7 +979,7 @@ _uloc_minimizeSubtags(const char* localeID,
+ regionLength,
+ NULL,
+ 0,
+- sink,
++ baseSink,
+ err);
+ }
+
+@@ -980,8 +988,8 @@ _uloc_minimizeSubtags(const char* localeID,
+ * from AddLikelySubtags.
+ **/
+ {
+- icu::CharStringByteSink sink(&maximizedTagBuffer);
+- ulocimp_addLikelySubtags(base.data(), sink, err);
++ icu::CharStringByteSink maxSink(&maximizedTagBuffer);
++ successGetMax = _ulocimp_addLikelySubtags(base.data(), maxSink, err);
+ }
+ }
+
+@@ -989,13 +997,40 @@ _uloc_minimizeSubtags(const char* localeID,
+ goto error;
+ }
+
++ if (!successGetMax) {
++ /**
++ * If we got here, return the locale ID parameter unchanged.
++ **/
++ const int32_t localeIDLength = (int32_t)uprv_strlen(localeID);
++ sink.Append(localeID, localeIDLength);
++ return;
++ }
++
++ // In the following, the lang, script, region are referring to those in
++ // the maximizedTagBuffer, not the one in the localeID.
++ langLength = sizeof(lang);
++ scriptLength = sizeof(script);
++ regionLength = sizeof(region);
++ parseTagString(
++ maximizedTagBuffer.data(),
++ lang,
++ &langLength,
++ script,
++ &scriptLength,
++ region,
++ ®ionLength,
++ err);
++ if(U_FAILURE(*err)) {
++ goto error;
++ }
++
+ /**
+ * Start first with just the language.
+ **/
+ {
+ icu::CharString tagBuffer;
+ {
+- icu::CharStringByteSink sink(&tagBuffer);
++ icu::CharStringByteSink tagSink(&tagBuffer);
+ createLikelySubtagsString(
+ lang,
+ langLength,
+@@ -1005,14 +1040,15 @@ _uloc_minimizeSubtags(const char* localeID,
+ 0,
+ NULL,
+ 0,
+- sink,
++ tagSink,
+ err);
+ }
+
+ if(U_FAILURE(*err)) {
+ goto error;
+ }
+- else if (!tagBuffer.isEmpty() && uprv_strnicmp(
++ else if (!tagBuffer.isEmpty() &&
++ uprv_strnicmp(
+ maximizedTagBuffer.data(),
+ tagBuffer.data(),
+ tagBuffer.length()) == 0) {
+@@ -1039,7 +1075,7 @@ _uloc_minimizeSubtags(const char* localeID,
+
+ icu::CharString tagBuffer;
+ {
+- icu::CharStringByteSink sink(&tagBuffer);
++ icu::CharStringByteSink tagSink(&tagBuffer);
+ createLikelySubtagsString(
+ lang,
+ langLength,
+@@ -1049,14 +1085,15 @@ _uloc_minimizeSubtags(const char* localeID,
+ regionLength,
+ NULL,
+ 0,
+- sink,
++ tagSink,
+ err);
+ }
+
+ if(U_FAILURE(*err)) {
+ goto error;
+ }
+- else if (uprv_strnicmp(
++ else if (!tagBuffer.isEmpty() &&
++ uprv_strnicmp(
+ maximizedTagBuffer.data(),
+ tagBuffer.data(),
+ tagBuffer.length()) == 0) {
+@@ -1081,10 +1118,10 @@ _uloc_minimizeSubtags(const char* localeID,
+ * since trying with all three subtags would only yield the
+ * maximal version that we already have.
+ **/
+- if (scriptLength > 0 && regionLength > 0) {
++ if (scriptLength > 0) {
+ icu::CharString tagBuffer;
+ {
+- icu::CharStringByteSink sink(&tagBuffer);
++ icu::CharStringByteSink tagSink(&tagBuffer);
+ createLikelySubtagsString(
+ lang,
+ langLength,
+@@ -1094,14 +1131,15 @@ _uloc_minimizeSubtags(const char* localeID,
+ 0,
+ NULL,
+ 0,
+- sink,
++ tagSink,
+ err);
+ }
+
+ if(U_FAILURE(*err)) {
+ goto error;
+ }
+- else if (uprv_strnicmp(
++ else if (!tagBuffer.isEmpty() &&
++ uprv_strnicmp(
+ maximizedTagBuffer.data(),
+ tagBuffer.data(),
+ tagBuffer.length()) == 0) {
+@@ -1123,10 +1161,19 @@ _uloc_minimizeSubtags(const char* localeID,
+
+ {
+ /**
+- * If we got here, return the locale ID parameter.
++ * If we got here, return the max + trail.
+ **/
+- const int32_t localeIDLength = (int32_t)uprv_strlen(localeID);
+- sink.Append(localeID, localeIDLength);
++ createTagString(
++ lang,
++ langLength,
++ script,
++ scriptLength,
++ region,
++ regionLength,
++ trailing,
++ trailingLength,
++ sink,
++ err);
+ return;
+ }
+
+@@ -1193,15 +1240,23 @@ uloc_addLikelySubtags(const char* localeID,
+ return reslen;
+ }
+
+-U_CAPI void U_EXPORT2
+-ulocimp_addLikelySubtags(const char* localeID,
+- icu::ByteSink& sink,
+- UErrorCode* status) {
++static UBool
++_ulocimp_addLikelySubtags(const char* localeID,
++ icu::ByteSink& sink,
++ UErrorCode* status) {
+ char localeBuffer[ULOC_FULLNAME_CAPACITY];
+
+ if (do_canonicalize(localeID, localeBuffer, sizeof localeBuffer, status)) {
+- _uloc_addLikelySubtags(localeBuffer, sink, status);
++ return _uloc_addLikelySubtags(localeBuffer, sink, status);
+ }
++ return FALSE;
++}
++
++U_CAPI void U_EXPORT2
++ulocimp_addLikelySubtags(const char* localeID,
++ icu::ByteSink& sink,
++ UErrorCode* status) {
++ _ulocimp_addLikelySubtags(localeID, sink, status);
+ }
+
+ U_CAPI int32_t U_EXPORT2
diff --git a/source/common/loclikely.cpp b/source/common/loclikely.cpp
index a4a4181..54c4a46 100644
--- a/source/common/loclikely.cpp
+++ b/source/common/loclikely.cpp
@@ -826,7 +826,7 @@
} \
} UPRV_BLOCK_MACRO_END
-static void
+static UBool
_uloc_addLikelySubtags(const char* localeID,
icu::ByteSink& sink,
UErrorCode* err) {
@@ -897,15 +897,22 @@
sink.Append(localeID, localIDLength);
}
- return;
+ return success;
error:
if (!U_FAILURE(*err)) {
*err = U_ILLEGAL_ARGUMENT_ERROR;
}
+ return FALSE;
}
+// Add likely subtags to the sink
+// return true if the value in the sink is produced by a match during the lookup
+// return false if the value in the sink is the same as input because there are
+// no match after the lookup.
+static UBool _ulocimp_addLikelySubtags(const char*, icu::ByteSink&, UErrorCode*);
+
static void
_uloc_minimizeSubtags(const char* localeID,
icu::ByteSink& sink,
@@ -921,6 +928,7 @@
const char* trailing = "";
int32_t trailingLength = 0;
int32_t trailingIndex = 0;
+ UBool successGetMax = FALSE;
if(U_FAILURE(*err)) {
goto error;
@@ -961,7 +969,7 @@
{
icu::CharString base;
{
- icu::CharStringByteSink sink(&base);
+ icu::CharStringByteSink baseSink(&base);
createTagString(
lang,
langLength,
@@ -971,7 +979,7 @@
regionLength,
NULL,
0,
- sink,
+ baseSink,
err);
}
@@ -980,8 +988,8 @@
* from AddLikelySubtags.
**/
{
- icu::CharStringByteSink sink(&maximizedTagBuffer);
- ulocimp_addLikelySubtags(base.data(), sink, err);
+ icu::CharStringByteSink maxSink(&maximizedTagBuffer);
+ successGetMax = _ulocimp_addLikelySubtags(base.data(), maxSink, err);
}
}
@@ -989,13 +997,40 @@
goto error;
}
+ if (!successGetMax) {
+ /**
+ * If we got here, return the locale ID parameter unchanged.
+ **/
+ const int32_t localeIDLength = (int32_t)uprv_strlen(localeID);
+ sink.Append(localeID, localeIDLength);
+ return;
+ }
+
+ // In the following, the lang, script, region are referring to those in
+ // the maximizedTagBuffer, not the one in the localeID.
+ langLength = sizeof(lang);
+ scriptLength = sizeof(script);
+ regionLength = sizeof(region);
+ parseTagString(
+ maximizedTagBuffer.data(),
+ lang,
+ &langLength,
+ script,
+ &scriptLength,
+ region,
+ ®ionLength,
+ err);
+ if(U_FAILURE(*err)) {
+ goto error;
+ }
+
/**
* Start first with just the language.
**/
{
icu::CharString tagBuffer;
{
- icu::CharStringByteSink sink(&tagBuffer);
+ icu::CharStringByteSink tagSink(&tagBuffer);
createLikelySubtagsString(
lang,
langLength,
@@ -1005,14 +1040,15 @@
0,
NULL,
0,
- sink,
+ tagSink,
err);
}
if(U_FAILURE(*err)) {
goto error;
}
- else if (!tagBuffer.isEmpty() && uprv_strnicmp(
+ else if (!tagBuffer.isEmpty() &&
+ uprv_strnicmp(
maximizedTagBuffer.data(),
tagBuffer.data(),
tagBuffer.length()) == 0) {
@@ -1039,7 +1075,7 @@
icu::CharString tagBuffer;
{
- icu::CharStringByteSink sink(&tagBuffer);
+ icu::CharStringByteSink tagSink(&tagBuffer);
createLikelySubtagsString(
lang,
langLength,
@@ -1049,14 +1085,15 @@
regionLength,
NULL,
0,
- sink,
+ tagSink,
err);
}
if(U_FAILURE(*err)) {
goto error;
}
- else if (uprv_strnicmp(
+ else if (!tagBuffer.isEmpty() &&
+ uprv_strnicmp(
maximizedTagBuffer.data(),
tagBuffer.data(),
tagBuffer.length()) == 0) {
@@ -1081,10 +1118,10 @@
* since trying with all three subtags would only yield the
* maximal version that we already have.
**/
- if (scriptLength > 0 && regionLength > 0) {
+ if (scriptLength > 0) {
icu::CharString tagBuffer;
{
- icu::CharStringByteSink sink(&tagBuffer);
+ icu::CharStringByteSink tagSink(&tagBuffer);
createLikelySubtagsString(
lang,
langLength,
@@ -1094,14 +1131,15 @@
0,
NULL,
0,
- sink,
+ tagSink,
err);
}
if(U_FAILURE(*err)) {
goto error;
}
- else if (uprv_strnicmp(
+ else if (!tagBuffer.isEmpty() &&
+ uprv_strnicmp(
maximizedTagBuffer.data(),
tagBuffer.data(),
tagBuffer.length()) == 0) {
@@ -1123,10 +1161,19 @@
{
/**
- * If we got here, return the locale ID parameter.
+ * If we got here, return the max + trail.
**/
- const int32_t localeIDLength = (int32_t)uprv_strlen(localeID);
- sink.Append(localeID, localeIDLength);
+ createTagString(
+ lang,
+ langLength,
+ script,
+ scriptLength,
+ region,
+ regionLength,
+ trailing,
+ trailingLength,
+ sink,
+ err);
return;
}
@@ -1193,15 +1240,23 @@
return reslen;
}
+static UBool
+_ulocimp_addLikelySubtags(const char* localeID,
+ icu::ByteSink& sink,
+ UErrorCode* status) {
+ char localeBuffer[ULOC_FULLNAME_CAPACITY];
+
+ if (do_canonicalize(localeID, localeBuffer, sizeof localeBuffer, status)) {
+ return _uloc_addLikelySubtags(localeBuffer, sink, status);
+ }
+ return FALSE;
+}
+
U_CAPI void U_EXPORT2
ulocimp_addLikelySubtags(const char* localeID,
icu::ByteSink& sink,
UErrorCode* status) {
- char localeBuffer[ULOC_FULLNAME_CAPACITY];
-
- if (do_canonicalize(localeID, localeBuffer, sizeof localeBuffer, status)) {
- _uloc_addLikelySubtags(localeBuffer, sink, status);
- }
+ _ulocimp_addLikelySubtags(localeID, sink, status);
}
U_CAPI int32_t U_EXPORT2