| // Copyright 2020 The Fuchsia Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include <lib/fidl/coding.h> |
| |
| #include <zxtest/zxtest.h> |
| |
| #define EXPECT_VALID_STRING(input) \ |
| { \ |
| const char* bytes = input; \ |
| uint32_t num_bytes = sizeof(input) - 1; \ |
| EXPECT_EQ(ZX_OK, fidl_validate_string(bytes, num_bytes)); \ |
| } |
| |
| #define EXPECT_INVALID_STRING(input, explanation) \ |
| { \ |
| const char* bytes = input; \ |
| uint32_t num_bytes = sizeof(input) - 1; \ |
| EXPECT_EQ(ZX_ERR_INVALID_ARGS, fidl_validate_string(bytes, num_bytes), explanation); \ |
| } |
| |
| TEST(ValidateString, safe_on_nullptr) { |
| EXPECT_EQ(ZX_ERR_INVALID_ARGS, fidl_validate_string(nullptr, 10)); |
| } |
| |
| TEST(ValidateString, string_with_size_too_big) { |
| uint64_t size_too_big = static_cast<uint64_t>(FIDL_MAX_SIZE) + 1; |
| EXPECT_EQ(ZX_ERR_INVALID_ARGS, fidl_validate_string("", size_too_big)); |
| } |
| |
| TEST(ValidateString, min_max_code_units_and_minus_one_and_plus_one) { |
| EXPECT_VALID_STRING("\x00"); // single byte, min: 0 |
| EXPECT_VALID_STRING("\x7f"); // single byte, max: 127 |
| EXPECT_VALID_STRING("\xc2\x80"); // two bytes, min: 128 |
| EXPECT_VALID_STRING("\xdf\xbf"); // two bytes, max: 2047 |
| EXPECT_VALID_STRING("\xe1\x80\x80"); // three bytes, min: 2048 |
| EXPECT_VALID_STRING("\xef\xbf\xbf"); // three bytes, max: 65535 |
| EXPECT_VALID_STRING("\xf0\x90\x80\x80"); // four bytes, min: 65536 |
| EXPECT_VALID_STRING("\xf4\x8f\xbf\xbf"); // four bytes, max: 1114111 |
| |
| EXPECT_INVALID_STRING("\x80", "1 above max single byte"); |
| EXPECT_INVALID_STRING("\xc2\x7f", "1 below min two bytes"); |
| EXPECT_INVALID_STRING("\xdf\xc0", "1 above max two bytes"); |
| EXPECT_INVALID_STRING("\xe1\x80\x7f", "1 below min three bytes"); |
| EXPECT_INVALID_STRING("\xef\xbf\xc0", "1 above max three bytes"); |
| EXPECT_INVALID_STRING("\xf0\x80\x80\x80", "1 below min four bytes"); |
| EXPECT_INVALID_STRING("\xf7\xbf\xbf\xc0", "1 above max four bytes"); |
| } |
| |
| TEST(ValidateString, invalid_continuations) { |
| // 1 test for the first following byte of an initial two byte value not having the high bit. |
| EXPECT_VALID_STRING("\xc2\x80"); |
| EXPECT_INVALID_STRING("\xc2\x7f", "first byte following two byte value not starting with 0b10"); |
| |
| // 2 tests for the first and second following byte of an initial three byte value not having the |
| // high bit set. |
| EXPECT_INVALID_STRING("\xe1\x7f\x80", |
| "first byte following three byte value not starting with 0b10"); |
| EXPECT_INVALID_STRING("\xe1\x80\x7f", |
| "second byte following three byte value not starting with 0b10"); |
| |
| // 3 tests for the first, second, and third following byte of an initial four byte value not |
| // having the high bit set. |
| EXPECT_VALID_STRING("\xf0\x90\x80\x80"); |
| EXPECT_INVALID_STRING("\xf0\x7f\x80\x80", |
| "first byte following four byte value not starting with 0b10"); |
| EXPECT_INVALID_STRING("\xf0\x90\x7f\x80", |
| "second byte following four byte value not starting with 0b10"); |
| EXPECT_INVALID_STRING("\xf0\x90\x80\x7f", |
| "third byte following four byte value not starting with 0b10"); |
| } |
| |
| TEST(ValidateString, only_shortest_encoding_is_valid) { |
| // All encodings of slash, only the shortest is valid. |
| // |
| // For further details, see "code unit" defined to be 'The minimal bit |
| // combination that can represent a unit of encoded text for processing or |
| // interchange.' |
| EXPECT_VALID_STRING("\x2f"); |
| EXPECT_INVALID_STRING("\xc0\xaf", "slash (2)"); |
| EXPECT_INVALID_STRING("\xe0\x80\xaf", "slash (3)"); |
| EXPECT_INVALID_STRING("\xf0\x80\x80\xaf", "slash (4)"); |
| } |
| |
| TEST(ValidateString, valid_noncharacter_codepoints) { |
| EXPECT_VALID_STRING("\xd8\x9d"); // U+061D |
| EXPECT_VALID_STRING("\xd7\xb6"); // U+05F6 |
| EXPECT_VALID_STRING("\xe0\xab\xb4"); // U+0AF4 |
| EXPECT_VALID_STRING("\xe0\xb1\x92"); // U+0C52 |
| EXPECT_VALID_STRING("\xf0\x9e\x91\x94"); // U+1E454 |
| EXPECT_VALID_STRING("\xf0\x9f\xa5\xb8"); // U+1F978 |
| } |
| |
| TEST(ValidateString, various) { |
| EXPECT_VALID_STRING(""); |
| EXPECT_VALID_STRING("a"); |
| EXPECT_VALID_STRING("€"); // \xe2\x82\xac |
| |
| // Mix and match from min_max_code_units_and_minus_one_and_plus_one |
| EXPECT_VALID_STRING("\x00\xf4\x8f\xbf\xbf\x7f\xf0\x90\x80\x80\xc2\x80"); |
| EXPECT_VALID_STRING("\xdf\xbf\xef\xbf\xbf\xe1\x80\x80"); |
| |
| // UTF-8 BOM |
| EXPECT_VALID_STRING("\xef\xbb\xbf"); |
| EXPECT_INVALID_STRING("\xef", "Partial UTF-8 BOM (1)"); |
| EXPECT_INVALID_STRING("\xef\xbb", "Partial UTF-8 BOM (2)"); |
| |
| EXPECT_INVALID_STRING("\xdf\x80\x80", "invalid partial sequence"); |
| EXPECT_INVALID_STRING("\xe0\x80\x80", "long U+0000, non shortest form"); |
| EXPECT_VALID_STRING("\xe1\x80\x80"); |
| |
| // All the following test cases are taken from Chromium's |
| // streaming_utf8_validator_unittest.cc |
| // |
| // Some are duplicative to other tests, and have been kept to ease |
| // comparison and translation of the tests. |
| |
| EXPECT_VALID_STRING("\r"); |
| EXPECT_VALID_STRING("\n"); |
| EXPECT_VALID_STRING("a"); |
| EXPECT_VALID_STRING("\xc2\x81"); |
| EXPECT_VALID_STRING("\xe1\x80\xbf"); |
| EXPECT_VALID_STRING("\xf1\x80\xa0\xbf"); |
| EXPECT_VALID_STRING("\xef\xbb\xbf"); // UTF-8 BOM |
| |
| // always invalid bytes |
| EXPECT_INVALID_STRING("\xc0", ""); |
| EXPECT_INVALID_STRING("\xc1", ""); |
| EXPECT_INVALID_STRING("\xf5", ""); |
| EXPECT_INVALID_STRING("\xf6", ""); |
| EXPECT_INVALID_STRING("\xf7", ""); |
| EXPECT_INVALID_STRING("\xf8", ""); |
| EXPECT_INVALID_STRING("\xf9", ""); |
| EXPECT_INVALID_STRING("\xfa", ""); |
| EXPECT_INVALID_STRING("\xfb", ""); |
| EXPECT_INVALID_STRING("\xfc", ""); |
| EXPECT_INVALID_STRING("\xfd", ""); |
| EXPECT_INVALID_STRING("\xfe", ""); |
| EXPECT_INVALID_STRING("\xff", ""); |
| |
| // surrogate code points |
| EXPECT_INVALID_STRING("\xed\xa0\x80", "U+D800, high surrogate, first"); |
| EXPECT_INVALID_STRING("\xed\xb0\x80", "low surrogate, first"); |
| EXPECT_INVALID_STRING("\xed\xbf\xbf", "low surrogate, last"); |
| |
| // overlong sequences |
| EXPECT_INVALID_STRING("\xc0\x80", "U+0000"); |
| EXPECT_INVALID_STRING("\xc1\x80", "\"A\""); |
| EXPECT_INVALID_STRING("\xc1\x81", "\"B\""); |
| EXPECT_INVALID_STRING("\xe0\x80\x80", "U+0000"); |
| EXPECT_INVALID_STRING("\xe0\x82\x80", "U+0080"); |
| EXPECT_INVALID_STRING("\xe0\x9f\xbf", "U+07ff"); |
| EXPECT_INVALID_STRING("\xf0\x80\x80\x8D", "U+000D"); |
| EXPECT_INVALID_STRING("\xf0\x80\x82\x91", "U+0091"); |
| EXPECT_INVALID_STRING("\xf0\x80\xa0\x80", "U+0800"); |
| EXPECT_INVALID_STRING("\xf0\x8f\xbb\xbf", "U+FEFF (BOM)"); |
| EXPECT_INVALID_STRING("\xf8\x80\x80\x80\xbf", "U+003F"); |
| EXPECT_INVALID_STRING("\xfc\x80\x80\x80\xa0\xa5", ""); |
| |
| // Beyond U+10FFFF |
| EXPECT_INVALID_STRING("\xf4\x90\x80\x80", "U+110000"); |
| EXPECT_INVALID_STRING("\xf8\xa0\xbf\x80\xbf", "5 bytes"); |
| EXPECT_INVALID_STRING("\xfc\x9c\xbf\x80\xbf\x80", "6 bytes"); |
| |
| // BOMs in UTF-16(BE|LE) |
| EXPECT_INVALID_STRING("\xfe\xff", "BOMs in UTF-16 BE"); |
| EXPECT_INVALID_STRING("\xff\xfe", "BOMs in UTF-16 LE"); |
| } |