Fix BCP47 language tag validation - extlang and privateuse extension

Extlang subtag can only be preceded by 2 or 3 letter language code.
The length check for the preceding subtag was missing.

In addition, the singleton check for private use extension was buggy.

Add 'USE_CHROMIUM_ICU" to BUILD.gn to let v8 take different code paths
depending on whether or not Chromium's ICU with patches is used. This
distinction can be important for Node.js

TBR=gsathya@chromium.org
Bug: v8:8135
Test: v8: test262/intl402/Intl/getCanonicalLocales/*
Change-Id: I431d582ec5b8e730698067d1795b1107dd3d016f
Reviewed-on: https://chromium-review.googlesource.com/1214645
Reviewed-by: Jungshik Shin <jshin@chromium.org>
diff --git a/BUILD.gn b/BUILD.gn
index c782db0..4d5bd57 100644
--- a/BUILD.gn
+++ b/BUILD.gn
@@ -32,6 +32,9 @@
     # We don't use ICU plugins and dyload is only necessary for them.
     # NaCl-related builds also fail looking for dlfcn.h when it's enabled.
     "U_ENABLE_DYLOAD=0",
+
+    # v8/Blink need to know whether Chromium's copy of ICU is used or not.
+    "USE_CHROMIUM_ICU=1",
   ]
 
   if (!is_component_build) {
diff --git a/README.chromium b/README.chromium
index b3e0738..b8394e5 100644
--- a/README.chromium
+++ b/README.chromium
@@ -268,9 +268,17 @@
   - upstream bug:
     https://unicode-org.atlassian.net/browse/ICU-20039
 
-12. Cherry-pick the upstream CL for quarter support in RelativeDate format
+12. Cherry-pick an upstream CL for quarter support in RelativeDate format
 
   - patches/reldate_quarter.patch
   - upstream bug:
     https://unicode-org.atlassian.net/browse/ICU-20022
   - fix: https://github.com/unicode-org/icu/pull/77
+
+13. Cherry-pick an upstream CL for BCP47 language tag validation
+
+  - patches/langtag_bcp47.patch
+  - upstream bug:
+    https://unicode-org.atlassian.net/browse/ICU-20098
+  - fix: https://github.com/unicode-org/icu/pull/102
+
diff --git a/patches/langtag_bcp47.patch b/patches/langtag_bcp47.patch
new file mode 100644
index 0000000..8d93fd7
--- /dev/null
+++ b/patches/langtag_bcp47.patch
@@ -0,0 +1,24 @@
+diff --git a/icu4c/source/common/uloc_tag.cpp b/icu4c/source/common/uloc_tag.cpp
+index 87b9f63f27..2fb136f22a 100644
+--- a/icu4c/source/common/uloc_tag.cpp
++++ b/icu4c/source/common/uloc_tag.cpp
+@@ -1901,7 +1901,9 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
+                 t->language = T_CString_toLowerCase(pSubtag);
+ 
+                 pLastGoodPosition = pSep;
+-                next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
++                next = SCRT | REGN | VART | EXTS | PRIV;
++                if (subtagLen <= 3)
++                  next |= EXTL;
+                 continue;
+             }
+         }
+@@ -2150,7 +2150,7 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
+             }
+         }
+         if (next & PRIV) {
+-            if (uprv_tolower(*pSubtag) == PRIVATEUSE) {
++            if (uprv_tolower(*pSubtag) == PRIVATEUSE && subtagLen == 1) {
+                 char *pPrivuseVal;
+ 
+                 if (pExtension != NULL) {
diff --git a/source/common/uloc_tag.cpp b/source/common/uloc_tag.cpp
index 99d0c6c..32b3d32 100644
--- a/source/common/uloc_tag.cpp
+++ b/source/common/uloc_tag.cpp
@@ -2014,7 +2014,9 @@
                 t->language = T_CString_toLowerCase(pSubtag);
 
                 pLastGoodPosition = pSep;
-                next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
+                next = SCRT | REGN | VART | EXTS | PRIV;
+                if (subtagLen <= 3)
+                  next |= EXTL;
                 continue;
             }
         }
@@ -2148,7 +2150,7 @@
             }
         }
         if (next & PRIV) {
-            if (uprv_tolower(*pSubtag) == PRIVATEUSE) {
+            if (uprv_tolower(*pSubtag) == PRIVATEUSE && subtagLen == 1) {
                 char *pPrivuseVal;
 
                 if (pExtension != NULL) {