Add fuzzer to try ToUpper/ToLower
Also change the fromUTF32 function to make the string distrbute
more evenly in the valid range of UTF32 but still have a small
% of invalid UTF32.
Bug: 1078274
Change-Id: I316cb857b7304398d0f660ad09f282be1f23d5b8
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/deps/icu/+/2228417
Reviewed-by: Jungshik Shin <jshin@chromium.org>
diff --git a/fuzzers/BUILD.gn b/fuzzers/BUILD.gn
index 07a40c0..0f70ce6 100644
--- a/fuzzers/BUILD.gn
+++ b/fuzzers/BUILD.gn
@@ -12,9 +12,7 @@
}
source_set("fuzzer_support") {
- public = [
- "fuzzer_utils.h",
- ]
+ public = [ "fuzzer_utils.h" ]
deps = [
"//base",
"//base:i18n",
@@ -24,81 +22,55 @@
}
fuzzer_test("icu_uregex_open_fuzzer") {
- sources = [
- "icu_uregex_open_fuzzer.cc",
- ]
- deps = [
- ":fuzzer_support",
- ]
+ sources = [ "icu_uregex_open_fuzzer.cc" ]
+ deps = [ ":fuzzer_support" ]
dict = "icu_regex.dict"
libfuzzer_options = [ "max_len=128" ]
}
fuzzer_test("icu_unicode_string_codepage_create_fuzzer") {
- sources = [
- "icu_unicode_string_codepage_create_fuzzer.cc",
- ]
- deps = [
- ":fuzzer_support",
- ]
+ sources = [ "icu_unicode_string_codepage_create_fuzzer.cc" ]
+ deps = [ ":fuzzer_support" ]
libfuzzer_options = [ "max_len=10240" ]
}
fuzzer_test("icu_number_format_fuzzer") {
- sources = [
- "icu_number_format_fuzzer.cc",
- ]
- deps = [
- ":fuzzer_support",
- ]
+ sources = [ "icu_number_format_fuzzer.cc" ]
+ deps = [ ":fuzzer_support" ]
libfuzzer_options = [ "max_len=10240" ]
}
fuzzer_test("icu_break_iterator_fuzzer") {
- sources = [
- "icu_break_iterator_fuzzer.cc",
- ]
- deps = [
- ":fuzzer_support",
- ]
+ sources = [ "icu_break_iterator_fuzzer.cc" ]
+ deps = [ ":fuzzer_support" ]
libfuzzer_options = [ "max_len=10240" ]
}
fuzzer_test("icu_break_iterator_utf32_fuzzer") {
- sources = [
- "icu_break_iterator_utf32_fuzzer.cc",
- ]
- deps = [
- ":fuzzer_support",
- ]
+ sources = [ "icu_break_iterator_utf32_fuzzer.cc" ]
+ deps = [ ":fuzzer_support" ]
libfuzzer_options = [ "max_len=10240" ]
}
fuzzer_test("icu_ucasemap_fuzzer") {
- sources = [
- "icu_ucasemap_fuzzer.cc",
- ]
- deps = [
- ":fuzzer_support",
- ]
+ sources = [ "icu_ucasemap_fuzzer.cc" ]
+ deps = [ ":fuzzer_support" ]
+ libfuzzer_options = [ "max_len=10240" ]
+}
+
+fuzzer_test("icu_to_case_fuzzer") {
+ sources = [ "icu_to_case_fuzzer.cc" ]
+ deps = [ ":fuzzer_support" ]
libfuzzer_options = [ "max_len=10240" ]
}
fuzzer_test("icu_converter_fuzzer") {
- sources = [
- "icu_converter_fuzzer.cc",
- ]
- deps = [
- ":fuzzer_support",
- ]
+ sources = [ "icu_converter_fuzzer.cc" ]
+ deps = [ ":fuzzer_support" ]
libfuzzer_options = [ "max_len=10240" ]
}
fuzzer_test("icu_appendable_fuzzer") {
- sources = [
- "icu_appendable_fuzzer.cc",
- ]
- deps = [
- ":fuzzer_support",
- ]
+ sources = [ "icu_appendable_fuzzer.cc" ]
+ deps = [ ":fuzzer_support" ]
}
diff --git a/fuzzers/fuzzer_utils.h b/fuzzers/fuzzer_utils.h
index 3572156..19c084f 100644
--- a/fuzzers/fuzzer_utils.h
+++ b/fuzzers/fuzzer_utils.h
@@ -45,7 +45,10 @@
uchars.resize(size * sizeof(uint8_t) / (sizeof(UChar32)));
memcpy(uchars.data(), data, uchars.size() * sizeof(UChar32));
for (size_t i = 0; i < uchars.size(); ++i) {
- uchars[i] = std::min(uchars[i], UCHAR_MAX_VALUE);
+ // The valid range for UTF32 is [0, UCHAR_MAX_VALUE]
+ // By % with (UCHAR_MAX_VALUE + 2) we make the output mostly valid with
+ // a small percentage of (1 / UCHAR_MAX_VALUE) invalid data in UTF8.
+ uchars[i] = uchars[i] % (UCHAR_MAX_VALUE + 2);
}
return icu::UnicodeString::fromUTF32(uchars.data(), uchars.size());
diff --git a/fuzzers/icu_to_case_fuzzer.cc b/fuzzers/icu_to_case_fuzzer.cc
new file mode 100644
index 0000000..e07d2dd
--- /dev/null
+++ b/fuzzers/icu_to_case_fuzzer.cc
@@ -0,0 +1,50 @@
+// Copyright 2020 The Chromium Authors. All rights reserved.
+
+// Fuzzer for toLower/toUpper
+
+#include <stddef.h>
+#include <stdint.h>
+#include <memory>
+#include "third_party/icu/fuzzers/fuzzer_utils.h"
+#include "third_party/icu/source/common/unicode/ustring.h"
+
+IcuEnvironment* env = new IcuEnvironment();
+
+template <typename T>
+using deleted_unique_ptr = std::unique_ptr<T, std::function<void(T*)>>;
+
+// Most locale case convert the same, but we know ICU case convert are different
+// of the below five
+static const std::array<const char*, 5> kCaseLocales = {{
+ "en", // root
+ "el", // Greek
+ "tr", // Turkish
+ "lt", // Lithuanian
+ "nl", // Dutch
+}};
+
+// Entry point for LibFuzzer.
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+ UErrorCode status = U_ZERO_ERROR;
+ icu::UnicodeString str(UnicodeStringFromUtf32(data, size));
+
+ auto rng = CreateRng(data, size);
+ const char* locale = kCaseLocales[rng() % kCaseLocales.size()];
+
+ // Make the dest_size randomly fall in [0, strlen+3]
+ int32_t dest_size = (rng() % (str.length() + 3));
+ std::unique_ptr<UChar[]> dest(new UChar[dest_size]);
+
+ switch (rng() % 2) {
+ case 0:
+ u_strToUpper(dest.get(), dest_size, (const UChar*)str.getBuffer(),
+ str.length(), locale, &status);
+ break;
+ case 1:
+ u_strToLower(dest.get(), dest_size, (const UChar*)str.getBuffer(),
+ str.length(), locale, &status);
+ break;
+ }
+
+ return 0;
+}