Cherrypick the upstream PR 1140

ICU-13786 Fix addLikelySubtags/minimizeSubtags #1140

Bug: https://unicode-org.atlassian.net/browse/ICU-13786
PR: https://github.com/unicode-org/icu/pull/1140

Bug: v8:10448
Change-Id: Id5e8b0037bbe76941fa89f7564d49cf27a990b2e
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/deps/icu/+/2231777
Reviewed-by: Jungshik Shin <jshin@chromium.org>
diff --git a/README.chromium b/README.chromium
index a722026..91d3eb2 100644
--- a/README.chromium
+++ b/README.chromium
@@ -232,5 +232,9 @@
    https://github.com/unicode-org/icu/pull/951 that use ures and caused
    30ms startup performance regression.
   - patches/turnOffNewCodeInLocaleCanonical.patch
-  - upstream bug:
     https://unicode-org.atlassian.net/browse/ICU-21139
+
+9. Fix Locale addLikelySubtag
+  - patches/localeAddLikely.patch
+  - upstream bug:
+    https://unicode-org.atlassian.net/browse/ICU-13786
diff --git a/patches/localeAddLikely.patch b/patches/localeAddLikely.patch
new file mode 100644
index 0000000..0c71c11
--- /dev/null
+++ b/patches/localeAddLikely.patch
@@ -0,0 +1,244 @@
+diff --git a/source/common/loclikely.cpp b/source/common/loclikely.cpp
+index a4a4181c..54c4a464 100644
+--- a/source/common/loclikely.cpp
++++ b/source/common/loclikely.cpp
+@@ -826,7 +826,7 @@ error:
+     } \
+ } UPRV_BLOCK_MACRO_END
+ 
+-static void
++static UBool
+ _uloc_addLikelySubtags(const char* localeID,
+                        icu::ByteSink& sink,
+                        UErrorCode* err) {
+@@ -897,15 +897,22 @@ _uloc_addLikelySubtags(const char* localeID,
+         sink.Append(localeID, localIDLength);
+     }
+ 
+-    return;
++    return success;
+ 
+ error:
+ 
+     if (!U_FAILURE(*err)) {
+         *err = U_ILLEGAL_ARGUMENT_ERROR;
+     }
++    return FALSE;
+ }
+ 
++// Add likely subtags to the sink
++// return true if the value in the sink is produced by a match during the lookup
++// return false if the value in the sink is the same as input because there are
++// no match after the lookup.
++static UBool _ulocimp_addLikelySubtags(const char*, icu::ByteSink&, UErrorCode*);
++
+ static void
+ _uloc_minimizeSubtags(const char* localeID,
+                       icu::ByteSink& sink,
+@@ -921,6 +928,7 @@ _uloc_minimizeSubtags(const char* localeID,
+     const char* trailing = "";
+     int32_t trailingLength = 0;
+     int32_t trailingIndex = 0;
++    UBool successGetMax = FALSE;
+ 
+     if(U_FAILURE(*err)) {
+         goto error;
+@@ -961,7 +969,7 @@ _uloc_minimizeSubtags(const char* localeID,
+     {
+         icu::CharString base;
+         {
+-            icu::CharStringByteSink sink(&base);
++            icu::CharStringByteSink baseSink(&base);
+             createTagString(
+                 lang,
+                 langLength,
+@@ -971,7 +979,7 @@ _uloc_minimizeSubtags(const char* localeID,
+                 regionLength,
+                 NULL,
+                 0,
+-                sink,
++                baseSink,
+                 err);
+         }
+ 
+@@ -980,8 +988,8 @@ _uloc_minimizeSubtags(const char* localeID,
+          * from AddLikelySubtags.
+          **/
+         {
+-            icu::CharStringByteSink sink(&maximizedTagBuffer);
+-            ulocimp_addLikelySubtags(base.data(), sink, err);
++            icu::CharStringByteSink maxSink(&maximizedTagBuffer);
++            successGetMax = _ulocimp_addLikelySubtags(base.data(), maxSink, err);
+         }
+     }
+ 
+@@ -989,13 +997,40 @@ _uloc_minimizeSubtags(const char* localeID,
+         goto error;
+     }
+ 
++    if (!successGetMax) {
++        /**
++         * If we got here, return the locale ID parameter unchanged.
++         **/
++        const int32_t localeIDLength = (int32_t)uprv_strlen(localeID);
++        sink.Append(localeID, localeIDLength);
++        return;
++    }
++
++    // In the following, the lang, script, region are referring to those in
++    // the maximizedTagBuffer, not the one in the localeID.
++    langLength = sizeof(lang);
++    scriptLength = sizeof(script);
++    regionLength = sizeof(region);
++    parseTagString(
++        maximizedTagBuffer.data(),
++        lang,
++        &langLength,
++        script,
++        &scriptLength,
++        region,
++        &regionLength,
++        err);
++    if(U_FAILURE(*err)) {
++        goto error;
++    }
++
+     /**
+      * Start first with just the language.
+      **/
+     {
+         icu::CharString tagBuffer;
+         {
+-            icu::CharStringByteSink sink(&tagBuffer);
++            icu::CharStringByteSink tagSink(&tagBuffer);
+             createLikelySubtagsString(
+                 lang,
+                 langLength,
+@@ -1005,14 +1040,15 @@ _uloc_minimizeSubtags(const char* localeID,
+                 0,
+                 NULL,
+                 0,
+-                sink,
++                tagSink,
+                 err);
+         }
+ 
+         if(U_FAILURE(*err)) {
+             goto error;
+         }
+-        else if (!tagBuffer.isEmpty() && uprv_strnicmp(
++        else if (!tagBuffer.isEmpty() &&
++                 uprv_strnicmp(
+                     maximizedTagBuffer.data(),
+                     tagBuffer.data(),
+                     tagBuffer.length()) == 0) {
+@@ -1039,7 +1075,7 @@ _uloc_minimizeSubtags(const char* localeID,
+ 
+         icu::CharString tagBuffer;
+         {
+-            icu::CharStringByteSink sink(&tagBuffer);
++            icu::CharStringByteSink tagSink(&tagBuffer);
+             createLikelySubtagsString(
+                 lang,
+                 langLength,
+@@ -1049,14 +1085,15 @@ _uloc_minimizeSubtags(const char* localeID,
+                 regionLength,
+                 NULL,
+                 0,
+-                sink,
++                tagSink,
+                 err);
+         }
+ 
+         if(U_FAILURE(*err)) {
+             goto error;
+         }
+-        else if (uprv_strnicmp(
++        else if (!tagBuffer.isEmpty() &&
++                 uprv_strnicmp(
+                     maximizedTagBuffer.data(),
+                     tagBuffer.data(),
+                     tagBuffer.length()) == 0) {
+@@ -1081,10 +1118,10 @@ _uloc_minimizeSubtags(const char* localeID,
+      * since trying with all three subtags would only yield the
+      * maximal version that we already have.
+      **/
+-    if (scriptLength > 0 && regionLength > 0) {
++    if (scriptLength > 0) {
+         icu::CharString tagBuffer;
+         {
+-            icu::CharStringByteSink sink(&tagBuffer);
++            icu::CharStringByteSink tagSink(&tagBuffer);
+             createLikelySubtagsString(
+                 lang,
+                 langLength,
+@@ -1094,14 +1131,15 @@ _uloc_minimizeSubtags(const char* localeID,
+                 0,
+                 NULL,
+                 0,
+-                sink,
++                tagSink,
+                 err);
+         }
+ 
+         if(U_FAILURE(*err)) {
+             goto error;
+         }
+-        else if (uprv_strnicmp(
++        else if (!tagBuffer.isEmpty() &&
++                 uprv_strnicmp(
+                     maximizedTagBuffer.data(),
+                     tagBuffer.data(),
+                     tagBuffer.length()) == 0) {
+@@ -1123,10 +1161,19 @@ _uloc_minimizeSubtags(const char* localeID,
+ 
+     {
+         /**
+-         * If we got here, return the locale ID parameter.
++         * If we got here, return the max + trail.
+          **/
+-        const int32_t localeIDLength = (int32_t)uprv_strlen(localeID);
+-        sink.Append(localeID, localeIDLength);
++        createTagString(
++                    lang,
++                    langLength,
++                    script,
++                    scriptLength,
++                    region,
++                    regionLength,
++                    trailing,
++                    trailingLength,
++                    sink,
++                    err);
+         return;
+     }
+ 
+@@ -1193,15 +1240,23 @@ uloc_addLikelySubtags(const char* localeID,
+     return reslen;
+ }
+ 
+-U_CAPI void U_EXPORT2
+-ulocimp_addLikelySubtags(const char* localeID,
+-                         icu::ByteSink& sink,
+-                         UErrorCode* status) {
++static UBool
++_ulocimp_addLikelySubtags(const char* localeID,
++                          icu::ByteSink& sink,
++                          UErrorCode* status) {
+     char localeBuffer[ULOC_FULLNAME_CAPACITY];
+ 
+     if (do_canonicalize(localeID, localeBuffer, sizeof localeBuffer, status)) {
+-        _uloc_addLikelySubtags(localeBuffer, sink, status);
++        return _uloc_addLikelySubtags(localeBuffer, sink, status);
+     }
++    return FALSE;
++}
++
++U_CAPI void U_EXPORT2
++ulocimp_addLikelySubtags(const char* localeID,
++                         icu::ByteSink& sink,
++                         UErrorCode* status) {
++    _ulocimp_addLikelySubtags(localeID, sink, status);
+ }
+ 
+ U_CAPI int32_t U_EXPORT2
diff --git a/source/common/loclikely.cpp b/source/common/loclikely.cpp
index a4a4181..54c4a46 100644
--- a/source/common/loclikely.cpp
+++ b/source/common/loclikely.cpp
@@ -826,7 +826,7 @@
     } \
 } UPRV_BLOCK_MACRO_END
 
-static void
+static UBool
 _uloc_addLikelySubtags(const char* localeID,
                        icu::ByteSink& sink,
                        UErrorCode* err) {
@@ -897,15 +897,22 @@
         sink.Append(localeID, localIDLength);
     }
 
-    return;
+    return success;
 
 error:
 
     if (!U_FAILURE(*err)) {
         *err = U_ILLEGAL_ARGUMENT_ERROR;
     }
+    return FALSE;
 }
 
+// Add likely subtags to the sink
+// return true if the value in the sink is produced by a match during the lookup
+// return false if the value in the sink is the same as input because there are
+// no match after the lookup.
+static UBool _ulocimp_addLikelySubtags(const char*, icu::ByteSink&, UErrorCode*);
+
 static void
 _uloc_minimizeSubtags(const char* localeID,
                       icu::ByteSink& sink,
@@ -921,6 +928,7 @@
     const char* trailing = "";
     int32_t trailingLength = 0;
     int32_t trailingIndex = 0;
+    UBool successGetMax = FALSE;
 
     if(U_FAILURE(*err)) {
         goto error;
@@ -961,7 +969,7 @@
     {
         icu::CharString base;
         {
-            icu::CharStringByteSink sink(&base);
+            icu::CharStringByteSink baseSink(&base);
             createTagString(
                 lang,
                 langLength,
@@ -971,7 +979,7 @@
                 regionLength,
                 NULL,
                 0,
-                sink,
+                baseSink,
                 err);
         }
 
@@ -980,8 +988,8 @@
          * from AddLikelySubtags.
          **/
         {
-            icu::CharStringByteSink sink(&maximizedTagBuffer);
-            ulocimp_addLikelySubtags(base.data(), sink, err);
+            icu::CharStringByteSink maxSink(&maximizedTagBuffer);
+            successGetMax = _ulocimp_addLikelySubtags(base.data(), maxSink, err);
         }
     }
 
@@ -989,13 +997,40 @@
         goto error;
     }
 
+    if (!successGetMax) {
+        /**
+         * If we got here, return the locale ID parameter unchanged.
+         **/
+        const int32_t localeIDLength = (int32_t)uprv_strlen(localeID);
+        sink.Append(localeID, localeIDLength);
+        return;
+    }
+
+    // In the following, the lang, script, region are referring to those in
+    // the maximizedTagBuffer, not the one in the localeID.
+    langLength = sizeof(lang);
+    scriptLength = sizeof(script);
+    regionLength = sizeof(region);
+    parseTagString(
+        maximizedTagBuffer.data(),
+        lang,
+        &langLength,
+        script,
+        &scriptLength,
+        region,
+        &regionLength,
+        err);
+    if(U_FAILURE(*err)) {
+        goto error;
+    }
+
     /**
      * Start first with just the language.
      **/
     {
         icu::CharString tagBuffer;
         {
-            icu::CharStringByteSink sink(&tagBuffer);
+            icu::CharStringByteSink tagSink(&tagBuffer);
             createLikelySubtagsString(
                 lang,
                 langLength,
@@ -1005,14 +1040,15 @@
                 0,
                 NULL,
                 0,
-                sink,
+                tagSink,
                 err);
         }
 
         if(U_FAILURE(*err)) {
             goto error;
         }
-        else if (!tagBuffer.isEmpty() && uprv_strnicmp(
+        else if (!tagBuffer.isEmpty() &&
+                 uprv_strnicmp(
                     maximizedTagBuffer.data(),
                     tagBuffer.data(),
                     tagBuffer.length()) == 0) {
@@ -1039,7 +1075,7 @@
 
         icu::CharString tagBuffer;
         {
-            icu::CharStringByteSink sink(&tagBuffer);
+            icu::CharStringByteSink tagSink(&tagBuffer);
             createLikelySubtagsString(
                 lang,
                 langLength,
@@ -1049,14 +1085,15 @@
                 regionLength,
                 NULL,
                 0,
-                sink,
+                tagSink,
                 err);
         }
 
         if(U_FAILURE(*err)) {
             goto error;
         }
-        else if (uprv_strnicmp(
+        else if (!tagBuffer.isEmpty() &&
+                 uprv_strnicmp(
                     maximizedTagBuffer.data(),
                     tagBuffer.data(),
                     tagBuffer.length()) == 0) {
@@ -1081,10 +1118,10 @@
      * since trying with all three subtags would only yield the
      * maximal version that we already have.
      **/
-    if (scriptLength > 0 && regionLength > 0) {
+    if (scriptLength > 0) {
         icu::CharString tagBuffer;
         {
-            icu::CharStringByteSink sink(&tagBuffer);
+            icu::CharStringByteSink tagSink(&tagBuffer);
             createLikelySubtagsString(
                 lang,
                 langLength,
@@ -1094,14 +1131,15 @@
                 0,
                 NULL,
                 0,
-                sink,
+                tagSink,
                 err);
         }
 
         if(U_FAILURE(*err)) {
             goto error;
         }
-        else if (uprv_strnicmp(
+        else if (!tagBuffer.isEmpty() &&
+                 uprv_strnicmp(
                     maximizedTagBuffer.data(),
                     tagBuffer.data(),
                     tagBuffer.length()) == 0) {
@@ -1123,10 +1161,19 @@
 
     {
         /**
-         * If we got here, return the locale ID parameter.
+         * If we got here, return the max + trail.
          **/
-        const int32_t localeIDLength = (int32_t)uprv_strlen(localeID);
-        sink.Append(localeID, localeIDLength);
+        createTagString(
+                    lang,
+                    langLength,
+                    script,
+                    scriptLength,
+                    region,
+                    regionLength,
+                    trailing,
+                    trailingLength,
+                    sink,
+                    err);
         return;
     }
 
@@ -1193,15 +1240,23 @@
     return reslen;
 }
 
+static UBool
+_ulocimp_addLikelySubtags(const char* localeID,
+                          icu::ByteSink& sink,
+                          UErrorCode* status) {
+    char localeBuffer[ULOC_FULLNAME_CAPACITY];
+
+    if (do_canonicalize(localeID, localeBuffer, sizeof localeBuffer, status)) {
+        return _uloc_addLikelySubtags(localeBuffer, sink, status);
+    }
+    return FALSE;
+}
+
 U_CAPI void U_EXPORT2
 ulocimp_addLikelySubtags(const char* localeID,
                          icu::ByteSink& sink,
                          UErrorCode* status) {
-    char localeBuffer[ULOC_FULLNAME_CAPACITY];
-
-    if (do_canonicalize(localeID, localeBuffer, sizeof localeBuffer, status)) {
-        _uloc_addLikelySubtags(localeBuffer, sink, status);
-    }
+    _ulocimp_addLikelySubtags(localeID, sink, status);
 }
 
 U_CAPI int32_t U_EXPORT2