[libfuzzer] adding utf32 variant of break iterator fuzzer.

I will compare utf8/utf32 fuzzer performance and coverage on
ClusterFuzz.
diff --git a/fuzzers/BUILD.gn b/fuzzers/BUILD.gn
index 45645c7..1503550 100644
--- a/fuzzers/BUILD.gn
+++ b/fuzzers/BUILD.gn
@@ -11,13 +11,24 @@
 group("fuzzers") {
 }
 
+source_set("fuzzer_support") {
+  public = [
+    "fuzzer_utils.h",
+  ]
+  deps = [
+    "//base",
+    "//base:i18n",
+    "//third_party/icu",
+  ]
+  public_configs = [ "//third_party/icu:icu_config" ]
+}
+
 fuzzer_test("icu_uregex_open_fuzzer") {
   sources = [
     "icu_uregex_open_fuzzer.cc",
   ]
   deps = [
-    "//base",
-    "//third_party/icu",
+    ":fuzzer_support",
   ]
   dict = "icu_regex.dict"
   libfuzzer_options = [ "max_len=128" ]
@@ -28,8 +39,7 @@
     "icu_unicode_string_codepage_create_fuzzer.cc",
   ]
   deps = [
-    "//base",
-    "//third_party/icu",
+    ":fuzzer_support",
   ]
   seed_corpus = "//third_party/icu/source/test/testdata"
   libfuzzer_options = [ "max_len=10240" ]
@@ -40,9 +50,7 @@
     "icu_number_format_fuzzer.cc",
   ]
   deps = [
-    "//base",
-    "//base:i18n",
-    "//third_party/icu",
+    ":fuzzer_support",
   ]
   seed_corpus = "//third_party/icu/source/test/testdata"
   libfuzzer_options = [ "max_len=10240" ]
@@ -53,9 +61,16 @@
     "icu_break_iterator_fuzzer.cc",
   ]
   deps = [
-    "//base",
-    "//base:i18n",
-    "//third_party/icu",
+    ":fuzzer_support",
+  ]
+}
+
+fuzzer_test("icu_break_iterator_utf32_fuzzer") {
+  sources = [
+    "icu_break_iterator_utf32_fuzzer.cc",
+  ]
+  deps = [
+    ":fuzzer_support",
   ]
   seed_corpus = "//third_party/icu/source/test/testdata"
   libfuzzer_options = [ "max_len=10240" ]
@@ -66,11 +81,8 @@
     "icu_ucasemap_fuzzer.cc",
   ]
   deps = [
-    "//base",
-    "//base:i18n",
-    "//third_party/icu",
+    ":fuzzer_support",
   ]
   seed_corpus = "//third_party/icu/source/test/testdata"
   libfuzzer_options = [ "max_len=10240" ]
 }
-
diff --git a/fuzzers/fuzzer_utils.h b/fuzzers/fuzzer_utils.h
index f9ba849..8b3161a 100644
--- a/fuzzers/fuzzer_utils.h
+++ b/fuzzers/fuzzer_utils.h
@@ -4,10 +4,13 @@
 #define THIRD_PARTY_ICU_FUZZERS_FUZZER_UTILS_H_
 
 #include <assert.h>
+#include <algorithm>
 #include <random>
+
 #include "base/at_exit.h"
 #include "base/i18n/icu_util.h"
 #include "third_party/icu/source/common/unicode/locid.h"
+#include "third_party/icu/source/common/unicode/uchar.h"
 
 struct IcuEnvironment {
   IcuEnvironment() {
@@ -31,4 +34,20 @@
   return locales[(*rng)() % num_locales];
 }
 
+icu::UnicodeString UnicodeStringFromUtf8(const uint8_t* data, size_t size) {
+  return icu::UnicodeString::fromUTF8(
+      icu::StringPiece(reinterpret_cast<const char*>(data), size));
+}
+
+icu::UnicodeString UnicodeStringFromUtf32(const uint8_t* data, size_t size) {
+  std::vector<UChar32> uchars;
+  uchars.resize(size * sizeof(uint8_t) / (sizeof(UChar32)));
+  memcpy(uchars.data(), data, uchars.size() * sizeof(UChar32));
+  for (size_t i = 0; i < uchars.size(); ++i) {
+    uchars[i] = std::min(uchars[i], UCHAR_MAX_VALUE);
+  }
+
+  return icu::UnicodeString::fromUTF32(uchars.data(), uchars.size());
+}
+
 #endif  // THIRD_PARTY_ICU_FUZZERS_FUZZER_UTILS_H_
diff --git a/fuzzers/icu_break_iterator_fuzzer.cc b/fuzzers/icu_break_iterator_fuzzer.cc
index b9bc8e5..3b0d336 100644
--- a/fuzzers/icu_break_iterator_fuzzer.cc
+++ b/fuzzers/icu_break_iterator_fuzzer.cc
@@ -11,7 +11,7 @@
 // Entry point for LibFuzzer.
 extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
   UErrorCode status = U_ZERO_ERROR;
-  icu::UnicodeString str(reinterpret_cast<const char*>(data), size);
+  icu::UnicodeString str(UnicodeStringFromUtf8(data, size));
 
   auto rng = CreateRng(data, size);
   const icu::Locale& locale = GetRandomLocale(&rng);
diff --git a/fuzzers/icu_break_iterator_utf32_fuzzer.cc b/fuzzers/icu_break_iterator_utf32_fuzzer.cc
new file mode 100644
index 0000000..ba66c0a
--- /dev/null
+++ b/fuzzers/icu_break_iterator_utf32_fuzzer.cc
@@ -0,0 +1,46 @@
+// Copyright 2016 The Chromium Authors. All rights reserved.
+
+#include <stddef.h>
+#include <stdint.h>
+#include <memory>
+#include "third_party/icu/fuzzers/fuzzer_utils.h"
+#include "third_party/icu/source/common/unicode/brkiter.h"
+
+IcuEnvironment* env = new IcuEnvironment();
+
+// Entry point for LibFuzzer.
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+  UErrorCode status = U_ZERO_ERROR;
+  icu::UnicodeString str(UnicodeStringFromUtf32(data, size));
+
+  auto rng = CreateRng(data, size);
+  const icu::Locale& locale = GetRandomLocale(&rng);
+
+  std::unique_ptr<icu::BreakIterator> bi;
+
+  switch (rng() % 5) {
+    case 0:
+      bi.reset(icu::BreakIterator::createWordInstance(locale, status));
+      break;
+    case 1:
+      bi.reset(icu::BreakIterator::createLineInstance(locale, status));
+      break;
+    case 2:
+      bi.reset(icu::BreakIterator::createCharacterInstance(locale, status));
+      break;
+    case 3:
+      bi.reset(icu::BreakIterator::createSentenceInstance(locale, status));
+      break;
+    case 4:
+      bi.reset(icu::BreakIterator::createTitleInstance(locale, status));
+      break;
+  }
+  if (U_FAILURE(status))
+    return 0;
+
+  for (int32_t p = bi->first(); p != icu::BreakIterator::DONE; p = bi->next())
+    if (U_FAILURE(status))
+      return 0;
+
+  return 0;
+}
diff --git a/fuzzers/icu_number_format_fuzzer.cc b/fuzzers/icu_number_format_fuzzer.cc
index cb8c573..ab81e79 100644
--- a/fuzzers/icu_number_format_fuzzer.cc
+++ b/fuzzers/icu_number_format_fuzzer.cc
@@ -21,7 +21,7 @@
       icu::NumberFormat::createInstance(locale, status));
   if (U_FAILURE(status)) return 0;
 
-  icu::UnicodeString str(reinterpret_cast<const char*>(data), size);
+  icu::UnicodeString str(UnicodeStringFromUtf8(data, size));
   icu::Formattable result;
   fmt->parse(str, result, status);