Cherry pick 3 patches from the upstream * Fix the undefined behavior in decimal number parsing http://bugs.icu-project.org/trac/changeset/40950 * Fix the handling of non-BMP characters in CJK breakiterator http://www.icu-project.org/trac/changeset/40949 * Limit the recursion depth of UnicodeSet pattern http://bugs.icu-project.org/trac/changeset/40979 TBR=inferno@chromium.org Bug: chromium:799850, chromium:796807, chromium:796752 Test: See the bugs. Change-Id: I1a8909371b601f36faca911039b10d36c7a92c85 Reviewed-on: https://chromium-review.googlesource.com/1009001 Reviewed-by: Jungshik Shin <jshin@chromium.org>

commit: aff99f5c22aded55ee29753ce049e61570294967 [log] [tgz]
author: Jungshik Shin <jshin@chromium.org> Wed Apr 11 17:29:08 2018 -0700
committer: Jungshik Shin <jshin@chromium.org> Thu Apr 12 20:51:41 2018 +0000
tree: 4fa24d98bd5ab53e475543c2c5c803f0eb56cfab
parent: 45fcf149e96f12f506a70418bb2e82f3e5c3e789 [diff]
diff --git a/README.chromium b/README.chromium
index 2fc511e..339c957 100644
--- a/README.chromium
+++ b/README.chromium

@@ -296,3 +296,19 @@
     upstream bug: https://ssl.icu-project.org/trac/ticket/13692
     (62-to-be does not have this issue)
   - bug: crbug.com/829144
+
+11. Limit the recursion depth of UnicodeSet pattern
+
+  - patches/uset_depth.patch
+    upstream bug: https://ssl.icu-project.org/trac/ticket/13547
+
+12. Fix a bug in non-BMP character handling in dictionary-based CJK
+    break iterator.
+
+  - patches/cjkdict_nonbmp.patch
+    upstream bug: https://ssl.icu-project.org/trac/ticket/13549
+
+13. Fix the undefined behavior in decimal number parsing
+
+  - patches/number_ub.patch
+  - upstream bug: https://ssl.icu-project.org/trac/ticket/13550

diff --git a/patches/cjkdict_nonbmp.patch b/patches/cjkdict_nonbmp.patch
new file mode 100644
index 0000000..5f728ac
--- /dev/null
+++ b/patches/cjkdict_nonbmp.patch

@@ -0,0 +1,15 @@
+diff --git a/source/common/dictbe.cpp b/source/common/dictbe.cpp
+index 0dfaf305..d3854bb8 100644
+--- a/source/common/dictbe.cpp
++++ b/source/common/dictbe.cpp
+@@ -1324,8 +1324,8 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
+             }
+             if (katakanaRunLength < kMaxKatakanaGroupLength) {
+                 uint32_t newSnlp = bestSnlp.elementAti(i) + getKatakanaCost(katakanaRunLength);
+-                if (newSnlp < (uint32_t)bestSnlp.elementAti(j)) {
+-                    bestSnlp.setElementAt(newSnlp, j);
++                if (newSnlp < (uint32_t)bestSnlp.elementAti(i+katakanaRunLength)) {
++                    bestSnlp.setElementAt(newSnlp, i+katakanaRunLength);
+                     prev.setElementAt(i, i+katakanaRunLength);  // prev[j] = i;
+                 }
+             }

diff --git a/patches/number_ub.patch b/patches/number_ub.patch
new file mode 100644
index 0000000..e821888
--- /dev/null
+++ b/patches/number_ub.patch

@@ -0,0 +1,18 @@
+Index: source/i18n/decNumber.cpp
+===================================================================
+--- source/i18n/decNumber.cpp	(revision 40949)
++++ source/i18n/decNumber.cpp	(revision 40950)
+@@ -627,10 +627,12 @@
+ 
+       for (; *c=='0' && *(c+1)!='\0';) c++;  /* strip insignificant zeros  */
+       firstexp=c;                            /* save exponent digit place  */
++      uInt uexponent = 0;   /* Avoid undefined behavior on signed int overflow */
+       for (; ;c++) {
+         if (*c<'0' || *c>'9') break;         /* not a digit  */
+-        exponent=X10(exponent)+(Int)*c-(Int)'0';
++        uexponent=X10(uexponent)+(uInt)*c-(uInt)'0';
+         } /* c  */
++      exponent = (Int)uexponent;
+       /* if not now on a '\0', *c must not be a digit  */
+       if (*c!='\0') break;
+ 

diff --git a/patches/uset_depth.patch b/patches/uset_depth.patch
new file mode 100644
index 0000000..fbbc1f3
--- /dev/null
+++ b/patches/uset_depth.patch

@@ -0,0 +1,134 @@
+diff --git a/source/common/unicode/uniset.h b/source/common/unicode/uniset.h
+index 4a4ce193..ed9a3eb7 100644
+--- a/source/common/unicode/uniset.h
++++ b/source/common/unicode/uniset.h
+@@ -1521,6 +1521,7 @@ private:
+                       UnicodeString& rebuiltPat,
+                       uint32_t options,
+                       UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute),
++                      int32_t depth,
+                       UErrorCode& ec);
+ 
+     //----------------------------------------------------------------
+diff --git a/source/common/uniset_closure.cpp b/source/common/uniset_closure.cpp
+index 44bb4bcd..0b7da796 100644
+--- a/source/common/uniset_closure.cpp
++++ b/source/common/uniset_closure.cpp
+@@ -129,7 +129,7 @@ UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern,
+     // _applyPattern calls add() etc., which set pat to empty.
+     UnicodeString rebuiltPat;
+     RuleCharacterIterator chars(pattern, symbols, pos);
+-    applyPattern(chars, symbols, rebuiltPat, options, &UnicodeSet::closeOver, status);
++    applyPattern(chars, symbols, rebuiltPat, options, &UnicodeSet::closeOver, 0, status);
+     if (U_FAILURE(status)) return *this;
+     if (chars.inVariable()) {
+         // syntaxError(chars, "Extra chars in variable value");
+diff --git a/source/common/uniset_props.cpp b/source/common/uniset_props.cpp
+index 1c28a2d8..ceefde05 100644
+--- a/source/common/uniset_props.cpp
++++ b/source/common/uniset_props.cpp
+@@ -257,6 +257,7 @@ const UnicodeSet* UnicodeSet::getInclusions(int32_t src, UErrorCode &status) {
+     return i.fSet;
+ }
+ 
++namespace {
+ 
+ // Cache some sets for other services -------------------------------------- ***
+ void U_CALLCONV createUni32Set(UErrorCode &errorCode) {
+@@ -315,6 +316,8 @@ isPOSIXClose(const UnicodeString &pattern, int32_t pos) {
+ // memory leak checker tools
+ #define _dbgct(me)
+ 
++}  // namespace
++
+ //----------------------------------------------------------------
+ // Constructors &c
+ //----------------------------------------------------------------
+@@ -382,7 +385,7 @@ UnicodeSet::applyPatternIgnoreSpace(const UnicodeString& pattern,
+     // _applyPattern calls add() etc., which set pat to empty.
+     UnicodeString rebuiltPat;
+     RuleCharacterIterator chars(pattern, symbols, pos);
+-    applyPattern(chars, symbols, rebuiltPat, USET_IGNORE_SPACE, NULL, status);
++    applyPattern(chars, symbols, rebuiltPat, USET_IGNORE_SPACE, NULL, 0, status);
+     if (U_FAILURE(status)) return;
+     if (chars.inVariable()) {
+         // syntaxError(chars, "Extra chars in variable value");
+@@ -406,6 +409,8 @@ UBool UnicodeSet::resemblesPattern(const UnicodeString& pattern, int32_t pos) {
+ // Implementation: Pattern parsing
+ //----------------------------------------------------------------
+ 
++namespace {
++
+ /**
+  * A small all-inline class to manage a UnicodeSet pointer.  Add
+  * operator->() etc. as needed.
+@@ -424,6 +429,10 @@ public:
+     }
+ };
+ 
++constexpr int32_t MAX_DEPTH = 100;
++
++}  // namespace
++
+ /**
+  * Parse the pattern from the given RuleCharacterIterator.  The
+  * iterator is advanced over the parsed pattern.
+@@ -443,8 +452,13 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
+                               UnicodeString& rebuiltPat,
+                               uint32_t options,
+                               UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute),
++                              int32_t depth,
+                               UErrorCode& ec) {
+     if (U_FAILURE(ec)) return;
++    if (depth > MAX_DEPTH) {
++        ec = U_ILLEGAL_ARGUMENT_ERROR;
++        return;
++    }
+ 
+     // Syntax characters: [ ] ^ - & { }
+ 
+@@ -579,7 +593,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
+             }
+             switch (setMode) {
+             case 1:
+-                nested->applyPattern(chars, symbols, patLocal, options, caseClosure, ec);
++                nested->applyPattern(chars, symbols, patLocal, options, caseClosure, depth + 1, ec);
+                 break;
+             case 2:
+                 chars.skipIgnored(opts);
+@@ -837,6 +851,8 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
+ // Property set implementation
+ //----------------------------------------------------------------
+ 
++namespace {
++
+ static UBool numericValueFilter(UChar32 ch, void* context) {
+     return u_getNumericValue(ch) == *(double*)context;
+ }
+@@ -868,6 +884,8 @@ static UBool scriptExtensionsFilter(UChar32 ch, void* context) {
+     return uscript_hasScript(ch, *(UScriptCode*)context);
+ }
+ 
++}  // namespace
++
+ /**
+  * Generic filter-based scanning code for UCD property UnicodeSets.
+  */
+@@ -924,6 +942,8 @@ void UnicodeSet::applyFilter(UnicodeSet::Filter filter,
+     }
+ }
+ 
++namespace {
++
+ static UBool mungeCharName(char* dst, const char* src, int32_t dstCapacity) {
+     /* Note: we use ' ' in compiler code page */
+     int32_t j = 0;
+@@ -941,6 +961,8 @@ static UBool mungeCharName(char* dst, const char* src, int32_t dstCapacity) {
+     return TRUE;
+ }
+ 
++}  // namespace
++
+ //----------------------------------------------------------------
+ // Property set API
+ //----------------------------------------------------------------

diff --git a/source/common/dictbe.cpp b/source/common/dictbe.cpp
index 0dfaf30..d3854bb 100644
--- a/source/common/dictbe.cpp
+++ b/source/common/dictbe.cpp

@@ -1324,8 +1324,8 @@
             }
             if (katakanaRunLength < kMaxKatakanaGroupLength) {
                 uint32_t newSnlp = bestSnlp.elementAti(i) + getKatakanaCost(katakanaRunLength);
-                if (newSnlp < (uint32_t)bestSnlp.elementAti(j)) {
-                    bestSnlp.setElementAt(newSnlp, j);
+                if (newSnlp < (uint32_t)bestSnlp.elementAti(i+katakanaRunLength)) {
+                    bestSnlp.setElementAt(newSnlp, i+katakanaRunLength);
                     prev.setElementAt(i, i+katakanaRunLength);  // prev[j] = i;
                 }
             }

diff --git a/source/common/unicode/uniset.h b/source/common/unicode/uniset.h
index 4a4ce19..ed9a3eb 100644
--- a/source/common/unicode/uniset.h
+++ b/source/common/unicode/uniset.h

@@ -1521,6 +1521,7 @@
                       UnicodeString& rebuiltPat,
                       uint32_t options,
                       UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute),
+                      int32_t depth,
                       UErrorCode& ec);
 
     //----------------------------------------------------------------

diff --git a/source/common/uniset_closure.cpp b/source/common/uniset_closure.cpp
index 44bb4bc..0b7da79 100644
--- a/source/common/uniset_closure.cpp
+++ b/source/common/uniset_closure.cpp

@@ -129,7 +129,7 @@
     // _applyPattern calls add() etc., which set pat to empty.
     UnicodeString rebuiltPat;
     RuleCharacterIterator chars(pattern, symbols, pos);
-    applyPattern(chars, symbols, rebuiltPat, options, &UnicodeSet::closeOver, status);
+    applyPattern(chars, symbols, rebuiltPat, options, &UnicodeSet::closeOver, 0, status);
     if (U_FAILURE(status)) return *this;
     if (chars.inVariable()) {
         // syntaxError(chars, "Extra chars in variable value");

diff --git a/source/common/uniset_props.cpp b/source/common/uniset_props.cpp
index 1c28a2d..ceefde0 100644
--- a/source/common/uniset_props.cpp
+++ b/source/common/uniset_props.cpp

@@ -257,6 +257,7 @@
     return i.fSet;
 }
 
+namespace {
 
 // Cache some sets for other services -------------------------------------- ***
 void U_CALLCONV createUni32Set(UErrorCode &errorCode) {
@@ -315,6 +316,8 @@
 // memory leak checker tools
 #define _dbgct(me)
 
+}  // namespace
+
 //----------------------------------------------------------------
 // Constructors &c
 //----------------------------------------------------------------
@@ -382,7 +385,7 @@
     // _applyPattern calls add() etc., which set pat to empty.
     UnicodeString rebuiltPat;
     RuleCharacterIterator chars(pattern, symbols, pos);
-    applyPattern(chars, symbols, rebuiltPat, USET_IGNORE_SPACE, NULL, status);
+    applyPattern(chars, symbols, rebuiltPat, USET_IGNORE_SPACE, NULL, 0, status);
     if (U_FAILURE(status)) return;
     if (chars.inVariable()) {
         // syntaxError(chars, "Extra chars in variable value");
@@ -406,6 +409,8 @@
 // Implementation: Pattern parsing
 //----------------------------------------------------------------
 
+namespace {
+
 /**
  * A small all-inline class to manage a UnicodeSet pointer.  Add
  * operator->() etc. as needed.
@@ -424,6 +429,10 @@
     }
 };
 
+constexpr int32_t MAX_DEPTH = 100;
+
+}  // namespace
+
 /**
  * Parse the pattern from the given RuleCharacterIterator.  The
  * iterator is advanced over the parsed pattern.
@@ -443,8 +452,13 @@
                               UnicodeString& rebuiltPat,
                               uint32_t options,
                               UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute),
+                              int32_t depth,
                               UErrorCode& ec) {
     if (U_FAILURE(ec)) return;
+    if (depth > MAX_DEPTH) {
+        ec = U_ILLEGAL_ARGUMENT_ERROR;
+        return;
+    }
 
     // Syntax characters: [ ] ^ - & { }
 
@@ -579,7 +593,7 @@
             }
             switch (setMode) {
             case 1:
-                nested->applyPattern(chars, symbols, patLocal, options, caseClosure, ec);
+                nested->applyPattern(chars, symbols, patLocal, options, caseClosure, depth + 1, ec);
                 break;
             case 2:
                 chars.skipIgnored(opts);
@@ -837,6 +851,8 @@
 // Property set implementation
 //----------------------------------------------------------------
 
+namespace {
+
 static UBool numericValueFilter(UChar32 ch, void* context) {
     return u_getNumericValue(ch) == *(double*)context;
 }
@@ -868,6 +884,8 @@
     return uscript_hasScript(ch, *(UScriptCode*)context);
 }
 
+}  // namespace
+
 /**
  * Generic filter-based scanning code for UCD property UnicodeSets.
  */
@@ -924,6 +942,8 @@
     }
 }
 
+namespace {
+
 static UBool mungeCharName(char* dst, const char* src, int32_t dstCapacity) {
     /* Note: we use ' ' in compiler code page */
     int32_t j = 0;
@@ -941,6 +961,8 @@
     return TRUE;
 }
 
+}  // namespace
+
 //----------------------------------------------------------------
 // Property set API
 //----------------------------------------------------------------

diff --git a/source/i18n/decNumber.cpp b/source/i18n/decNumber.cpp
index 149062e..cee2f8e 100644
--- a/source/i18n/decNumber.cpp
+++ b/source/i18n/decNumber.cpp

@@ -627,10 +627,12 @@
 
       for (; *c=='0' && *(c+1)!='\0';) c++;  /* strip insignificant zeros  */
       firstexp=c;                            /* save exponent digit place  */
+      uInt uexponent = 0;   /* Avoid undefined behavior on signed int overflow */
       for (; ;c++) {
         if (*c<'0' || *c>'9') break;         /* not a digit  */
-        exponent=X10(exponent)+(Int)*c-(Int)'0';
+        uexponent=X10(uexponent)+(uInt)*c-(uInt)'0';
         } /* c  */
+      exponent = (Int)uexponent;
       /* if not now on a '\0', *c must not be a digit  */
       if (*c!='\0') break;
commit	aff99f5c22aded55ee29753ce049e61570294967	[log] [tgz]
author	Jungshik Shin <jshin@chromium.org>	Wed Apr 11 17:29:08 2018 -0700
committer	Jungshik Shin <jshin@chromium.org>	Thu Apr 12 20:51:41 2018 +0000
tree	4fa24d98bd5ab53e475543c2c5c803f0eb56cfab
parent	45fcf149e96f12f506a70418bb2e82f3e5c3e789 [diff]