Fix BCP47 language tag validation - extlang and privateuse extension
Extlang subtag can only be preceded by 2 or 3 letter language code.
The length check for the preceding subtag was missing.
In addition, the singleton check for private use extension was buggy.
Add 'USE_CHROMIUM_ICU" to BUILD.gn to let v8 take different code paths
depending on whether or not Chromium's ICU with patches is used. This
distinction can be important for Node.js
TBR=gsathya@chromium.org
Bug: v8:8135
Test: v8: test262/intl402/Intl/getCanonicalLocales/*
Change-Id: I431d582ec5b8e730698067d1795b1107dd3d016f
Reviewed-on: https://chromium-review.googlesource.com/1214645
Reviewed-by: Jungshik Shin <jshin@chromium.org>
diff --git a/BUILD.gn b/BUILD.gn
index c782db0..4d5bd57 100644
--- a/BUILD.gn
+++ b/BUILD.gn
@@ -32,6 +32,9 @@
# We don't use ICU plugins and dyload is only necessary for them.
# NaCl-related builds also fail looking for dlfcn.h when it's enabled.
"U_ENABLE_DYLOAD=0",
+
+ # v8/Blink need to know whether Chromium's copy of ICU is used or not.
+ "USE_CHROMIUM_ICU=1",
]
if (!is_component_build) {
diff --git a/README.chromium b/README.chromium
index b3e0738..b8394e5 100644
--- a/README.chromium
+++ b/README.chromium
@@ -268,9 +268,17 @@
- upstream bug:
https://unicode-org.atlassian.net/browse/ICU-20039
-12. Cherry-pick the upstream CL for quarter support in RelativeDate format
+12. Cherry-pick an upstream CL for quarter support in RelativeDate format
- patches/reldate_quarter.patch
- upstream bug:
https://unicode-org.atlassian.net/browse/ICU-20022
- fix: https://github.com/unicode-org/icu/pull/77
+
+13. Cherry-pick an upstream CL for BCP47 language tag validation
+
+ - patches/langtag_bcp47.patch
+ - upstream bug:
+ https://unicode-org.atlassian.net/browse/ICU-20098
+ - fix: https://github.com/unicode-org/icu/pull/102
+
diff --git a/patches/langtag_bcp47.patch b/patches/langtag_bcp47.patch
new file mode 100644
index 0000000..8d93fd7
--- /dev/null
+++ b/patches/langtag_bcp47.patch
@@ -0,0 +1,24 @@
+diff --git a/icu4c/source/common/uloc_tag.cpp b/icu4c/source/common/uloc_tag.cpp
+index 87b9f63f27..2fb136f22a 100644
+--- a/icu4c/source/common/uloc_tag.cpp
++++ b/icu4c/source/common/uloc_tag.cpp
+@@ -1901,7 +1901,9 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
+ t->language = T_CString_toLowerCase(pSubtag);
+
+ pLastGoodPosition = pSep;
+- next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
++ next = SCRT | REGN | VART | EXTS | PRIV;
++ if (subtagLen <= 3)
++ next |= EXTL;
+ continue;
+ }
+ }
+@@ -2150,7 +2150,7 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
+ }
+ }
+ if (next & PRIV) {
+- if (uprv_tolower(*pSubtag) == PRIVATEUSE) {
++ if (uprv_tolower(*pSubtag) == PRIVATEUSE && subtagLen == 1) {
+ char *pPrivuseVal;
+
+ if (pExtension != NULL) {
diff --git a/source/common/uloc_tag.cpp b/source/common/uloc_tag.cpp
index 99d0c6c..32b3d32 100644
--- a/source/common/uloc_tag.cpp
+++ b/source/common/uloc_tag.cpp
@@ -2014,7 +2014,9 @@
t->language = T_CString_toLowerCase(pSubtag);
pLastGoodPosition = pSep;
- next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
+ next = SCRT | REGN | VART | EXTS | PRIV;
+ if (subtagLen <= 3)
+ next |= EXTL;
continue;
}
}
@@ -2148,7 +2150,7 @@
}
}
if (next & PRIV) {
- if (uprv_tolower(*pSubtag) == PRIVATEUSE) {
+ if (uprv_tolower(*pSubtag) == PRIVATEUSE && subtagLen == 1) {
char *pPrivuseVal;
if (pExtension != NULL) {