Add fuzzer to try ToUpper/ToLower Also change the fromUTF32 function to make the string distrbute more evenly in the valid range of UTF32 but still have a small % of invalid UTF32. Bug: 1078274 Change-Id: I316cb857b7304398d0f660ad09f282be1f23d5b8 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/deps/icu/+/2228417 Reviewed-by: Jungshik Shin <jshin@chromium.org>

commit: 46f53dfc09c520b7c520a089ca473bb0ee29c07e [log] [tgz]
author: Frank Tang <ftang@chromium.org> Wed Jun 03 14:40:04 2020 -0700
committer: Frank Tang <ftang@chromium.org> Mon Jun 08 21:53:26 2020 +0000
tree: 2e921efca46df452b91aa0ea78be9a096f7169eb
parent: 53eca4cea3b3d4a83d286f536cd178cc46805618 [diff]
diff --git a/fuzzers/BUILD.gn b/fuzzers/BUILD.gn
index 07a40c0..0f70ce6 100644
--- a/fuzzers/BUILD.gn
+++ b/fuzzers/BUILD.gn

@@ -12,9 +12,7 @@
 }
 
 source_set("fuzzer_support") {
-  public = [
-    "fuzzer_utils.h",
-  ]
+  public = [ "fuzzer_utils.h" ]
   deps = [
     "//base",
     "//base:i18n",
@@ -24,81 +22,55 @@
 }
 
 fuzzer_test("icu_uregex_open_fuzzer") {
-  sources = [
-    "icu_uregex_open_fuzzer.cc",
-  ]
-  deps = [
-    ":fuzzer_support",
-  ]
+  sources = [ "icu_uregex_open_fuzzer.cc" ]
+  deps = [ ":fuzzer_support" ]
   dict = "icu_regex.dict"
   libfuzzer_options = [ "max_len=128" ]
 }
 
 fuzzer_test("icu_unicode_string_codepage_create_fuzzer") {
-  sources = [
-    "icu_unicode_string_codepage_create_fuzzer.cc",
-  ]
-  deps = [
-    ":fuzzer_support",
-  ]
+  sources = [ "icu_unicode_string_codepage_create_fuzzer.cc" ]
+  deps = [ ":fuzzer_support" ]
   libfuzzer_options = [ "max_len=10240" ]
 }
 
 fuzzer_test("icu_number_format_fuzzer") {
-  sources = [
-    "icu_number_format_fuzzer.cc",
-  ]
-  deps = [
-    ":fuzzer_support",
-  ]
+  sources = [ "icu_number_format_fuzzer.cc" ]
+  deps = [ ":fuzzer_support" ]
   libfuzzer_options = [ "max_len=10240" ]
 }
 
 fuzzer_test("icu_break_iterator_fuzzer") {
-  sources = [
-    "icu_break_iterator_fuzzer.cc",
-  ]
-  deps = [
-    ":fuzzer_support",
-  ]
+  sources = [ "icu_break_iterator_fuzzer.cc" ]
+  deps = [ ":fuzzer_support" ]
   libfuzzer_options = [ "max_len=10240" ]
 }
 
 fuzzer_test("icu_break_iterator_utf32_fuzzer") {
-  sources = [
-    "icu_break_iterator_utf32_fuzzer.cc",
-  ]
-  deps = [
-    ":fuzzer_support",
-  ]
+  sources = [ "icu_break_iterator_utf32_fuzzer.cc" ]
+  deps = [ ":fuzzer_support" ]
   libfuzzer_options = [ "max_len=10240" ]
 }
 
 fuzzer_test("icu_ucasemap_fuzzer") {
-  sources = [
-    "icu_ucasemap_fuzzer.cc",
-  ]
-  deps = [
-    ":fuzzer_support",
-  ]
+  sources = [ "icu_ucasemap_fuzzer.cc" ]
+  deps = [ ":fuzzer_support" ]
+  libfuzzer_options = [ "max_len=10240" ]
+}
+
+fuzzer_test("icu_to_case_fuzzer") {
+  sources = [ "icu_to_case_fuzzer.cc" ]
+  deps = [ ":fuzzer_support" ]
   libfuzzer_options = [ "max_len=10240" ]
 }
 
 fuzzer_test("icu_converter_fuzzer") {
-  sources = [
-    "icu_converter_fuzzer.cc",
-  ]
-  deps = [
-    ":fuzzer_support",
-  ]
+  sources = [ "icu_converter_fuzzer.cc" ]
+  deps = [ ":fuzzer_support" ]
   libfuzzer_options = [ "max_len=10240" ]
 }
 
 fuzzer_test("icu_appendable_fuzzer") {
-  sources = [
-    "icu_appendable_fuzzer.cc",
-  ]
-  deps = [
-    ":fuzzer_support",
-  ]
+  sources = [ "icu_appendable_fuzzer.cc" ]
+  deps = [ ":fuzzer_support" ]
 }

diff --git a/fuzzers/fuzzer_utils.h b/fuzzers/fuzzer_utils.h
index 3572156..19c084f 100644
--- a/fuzzers/fuzzer_utils.h
+++ b/fuzzers/fuzzer_utils.h

@@ -45,7 +45,10 @@
   uchars.resize(size * sizeof(uint8_t) / (sizeof(UChar32)));
   memcpy(uchars.data(), data, uchars.size() * sizeof(UChar32));
   for (size_t i = 0; i < uchars.size(); ++i) {
-    uchars[i] = std::min(uchars[i], UCHAR_MAX_VALUE);
+    // The valid range for UTF32 is [0, UCHAR_MAX_VALUE]
+    // By  % with (UCHAR_MAX_VALUE + 2) we make the output mostly valid  with
+    // a small percentage of (1 / UCHAR_MAX_VALUE) invalid data in UTF8.
+    uchars[i] = uchars[i] % (UCHAR_MAX_VALUE + 2);
   }
 
   return icu::UnicodeString::fromUTF32(uchars.data(), uchars.size());

diff --git a/fuzzers/icu_to_case_fuzzer.cc b/fuzzers/icu_to_case_fuzzer.cc
new file mode 100644
index 0000000..e07d2dd
--- /dev/null
+++ b/fuzzers/icu_to_case_fuzzer.cc

@@ -0,0 +1,50 @@
+// Copyright 2020 The Chromium Authors. All rights reserved.
+
+// Fuzzer for toLower/toUpper
+
+#include <stddef.h>
+#include <stdint.h>
+#include <memory>
+#include "third_party/icu/fuzzers/fuzzer_utils.h"
+#include "third_party/icu/source/common/unicode/ustring.h"
+
+IcuEnvironment* env = new IcuEnvironment();
+
+template <typename T>
+using deleted_unique_ptr = std::unique_ptr<T, std::function<void(T*)>>;
+
+// Most locale case convert the same, but we know ICU case convert are different
+// of the below five
+static const std::array<const char*, 5> kCaseLocales = {{
+    "en",  // root
+    "el",  // Greek
+    "tr",  // Turkish
+    "lt",  // Lithuanian
+    "nl",  // Dutch
+}};
+
+// Entry point for LibFuzzer.
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+  UErrorCode status = U_ZERO_ERROR;
+  icu::UnicodeString str(UnicodeStringFromUtf32(data, size));
+
+  auto rng = CreateRng(data, size);
+  const char* locale = kCaseLocales[rng() % kCaseLocales.size()];
+
+  // Make the dest_size randomly fall in [0, strlen+3]
+  int32_t dest_size = (rng() % (str.length() + 3));
+  std::unique_ptr<UChar[]> dest(new UChar[dest_size]);
+
+  switch (rng() % 2) {
+    case 0:
+      u_strToUpper(dest.get(), dest_size, (const UChar*)str.getBuffer(),
+                   str.length(), locale, &status);
+      break;
+    case 1:
+      u_strToLower(dest.get(), dest_size, (const UChar*)str.getBuffer(),
+                   str.length(), locale, &status);
+      break;
+  }
+
+  return 0;
+}
commit	46f53dfc09c520b7c520a089ca473bb0ee29c07e	[log] [tgz]
author	Frank Tang <ftang@chromium.org>	Wed Jun 03 14:40:04 2020 -0700
committer	Frank Tang <ftang@chromium.org>	Mon Jun 08 21:53:26 2020 +0000
tree	2e921efca46df452b91aa0ea78be9a096f7169eb
parent	53eca4cea3b3d4a83d286f536cd178cc46805618 [diff]