zircon/system/utest/utf_conversion/main.cpp - fuchsia/ - Git at Google

 // Copyright 2018 The Fuchsia Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include <endian.h>
 #include <stdio.h>
 #include <unittest/unittest.h>
 #include <utf_conversion/utf_conversion.h>

 #include <fbl/algorithm.h>

 #if (BYTE_ORDER == BIG_ENDIAN)
 static constexpr uint32_t HOST_ENDIAN_FLAG   = UTF_CONVERT_FLAG_FORCE_BIG_ENDIAN;
 static constexpr uint32_t INVERT_ENDIAN_FLAG = UTF_CONVERT_FLAG_FORCE_LITTLE_ENDIAN;
 #else
 static constexpr uint32_t HOST_ENDIAN_FLAG   = UTF_CONVERT_FLAG_FORCE_LITTLE_ENDIAN;
 static constexpr uint32_t INVERT_ENDIAN_FLAG = UTF_CONVERT_FLAG_FORCE_BIG_ENDIAN;
 #endif

 #define ASSERT_UTF8_EQ(expected, expected_len, actual, actual_bytes, enc_len, msg)  \
     do {                                                                            \
         ASSERT_GE(actual_bytes, expected_len, msg);                                 \
         ASSERT_EQ(expected_len, enc_len, msg);                                      \
         ASSERT_BYTES_EQ(expected, actual, expected_len, msg);                       \
     } while(false)

 static bool utf16to8_bad_args(void) {
     BEGIN_TEST;

     uint16_t src;
     uint8_t dst = 0xFE;
     size_t dst_len;
     zx_status_t res;

     // Bad destination buffer with non-zero destination length
     dst_len = 1;
     res = utf16_to_utf8(&src, 1, nullptr, &dst_len);
     ASSERT_EQ(ZX_ERR_INVALID_ARGS, res, "null dst should fail with INVALID_ARGS");
     ASSERT_EQ(1, dst_len, "dst_len modified after conversion with invalid args");

     // Bad dest len pointer
     res = utf16_to_utf8(&src, 1, &dst, nullptr);
     ASSERT_EQ(ZX_ERR_INVALID_ARGS, res, "null dst_len should fail with INVALID_ARGS");
     ASSERT_EQ(0xFE, dst, "dst modified after conversion with invalid args");

     // Bad (undefined) flags
     res = utf16_to_utf8(&src, 1, &dst, &dst_len, 0x80000000);
     ASSERT_EQ(ZX_ERR_INVALID_ARGS, res, "undefined flags should fail with INVALID_ARGS");
     ASSERT_EQ(1, dst_len, "dst_len modified after conversion with invalid args");
     ASSERT_EQ(0xFE, dst, "dst modified after conversion with invalid args");

     // A null dest buffer is allowed if (and only if) the dst_len is zero.
     // Practical use cases include using the converter to determine the length
     // needed to hold a converted string.
     dst_len = 0;
     src = 0xAB;
     res = utf16_to_utf8(&src, 1, nullptr, &dst_len);
     ASSERT_EQ(ZX_OK, res, "null dst with zero dst_len should succeed");
     ASSERT_EQ(2, dst_len, "encoded size of 0xAB should be 2!");

     END_TEST;
 }

 static bool utf16to8_empty_source(void) {
     BEGIN_TEST;

     uint16_t src;
     static const uint8_t expected[] = { 0xA1, 0xB2, 0xC3, 0xD4 };
     uint8_t actual[sizeof(expected)];
     size_t dst_len;
     zx_status_t res;

     // Check to make sure that attempting to encode a zero length source results
     // in a length of zero and no changes to the destination buffer.
     memcpy(actual, expected, sizeof(actual));
     dst_len = sizeof(actual);
     res = utf16_to_utf8(&src, 0,actual, &dst_len);
     ASSERT_EQ(ZX_OK, res, "zero length string conversion failed");
     ASSERT_EQ(0, dst_len, "dst_len should be zero after zero length string conversion");
     ASSERT_BYTES_EQ(expected, actual, sizeof(actual),
                     "dst buffer modified after zero length string conversion");

     dst_len = sizeof(actual);
     res = utf16_to_utf8(nullptr, 1,actual, &dst_len);
     ASSERT_EQ(ZX_OK, res, "null source string conversion failed");
     ASSERT_EQ(0, dst_len, "dst_len should be zero after null source string conversion");
     ASSERT_BYTES_EQ(expected, actual, sizeof(actual),
                     "dst buffer modified after null source string conversion");

     END_TEST;
 }

 static bool utf16to8_simple_codepoints(void) {
     BEGIN_TEST;

     static const struct {
         uint16_t src;
         uint8_t expected[3];
         size_t  expected_len;
     } TEST_VECTORS[] = {
         // 1 byte UTF-8 codepoints (U+0000, U+007F)
         { 0x0000, { 0x00 }, 1 },
         { 0x0001, { 0x01 }, 1 },
         { 0x007f, { 0x7f }, 1 },

         // 2 byte UTF-8 codepoints (U+0080, U+07FF)
         { 0x0080, { 0xC2, 0x80 }, 2 },
         { 0x0456, { 0xD1, 0x96 }, 2 },
         { 0x07FF, { 0xDF, 0xBF }, 2 },

         // 3 byte UTF-8 codepoints (U+0800, U+07FF)
         // Note: we are skipping the (theoretically illegal) unpaired surrogate
         // range (U+D800, U+DFFF) here.  There is a separate test for support of
         // unpaired surrogates.
         { 0x0800, { 0xE0, 0xA0, 0x80 }, 3 },
         { 0x4567, { 0xE4, 0x95, 0xA7 }, 3 },
         { 0xD7FF, { 0xED, 0x9F, 0xBF }, 3 },
         { 0xE000, { 0xEE, 0x80, 0x80 }, 3 },
         { 0xE456, { 0xEE, 0x91, 0x96 }, 3 },
         { 0xFFFF, { 0xEF, 0xBF, 0xBF }, 3 },
     };

     uint8_t actual[3];
     for (const auto& v : TEST_VECTORS) {
         char case_id[64];
         size_t encoded_len = sizeof(actual);
         zx_status_t res;

         snprintf(case_id, sizeof(case_id), "case id [0x%04hx]", v.src);
         ::memset(actual, 0xAB, sizeof(actual));

         res = utf16_to_utf8(&v.src, 1, actual, &encoded_len);
         ASSERT_EQ(ZX_OK, res, case_id);
         ASSERT_LE(v.expected_len, sizeof(v.expected), case_id);
         ASSERT_UTF8_EQ(v.expected, v.expected_len,
                        actual, sizeof(actual),
                        encoded_len, case_id);
     }

     END_TEST;
 }

 static bool utf16to8_paired_surrogates(void) {
     BEGIN_TEST;

     // All paired surrogate encodings are going to be 4 byte UTF-8 codepoints (U+010000, U+10FFFF)
     static const struct {
         uint16_t src[2];
         uint8_t expected[4];
     } TEST_VECTORS[] = {
         { { 0xD800, 0xDC00 }, { 0xF0, 0x90, 0x80, 0x80 } }, // U+10000
         { { 0xD811, 0xDD67 }, { 0xF0, 0x94, 0x95, 0xA7 } }, // U+14567
         { { 0xDA6F, 0xDCDE }, { 0xF2, 0xAB, 0xB3, 0x9E } }, // U+ABCDE
         { { 0xDBBF, 0xDFFF }, { 0xF3, 0xBF, 0xBF, 0xBF } }, // U+FFFFF
         { { 0xDBC0, 0xDC00 }, { 0xF4, 0x80, 0x80, 0x80 } }, // U+100000
         { { 0xDBD1, 0xDD67 }, { 0xF4, 0x84, 0x95, 0xA7 } }, // U+104567
         { { 0xDBFF, 0xDFFF }, { 0xF4, 0x8F, 0xBF, 0xBF } }, // U+10FFFF
     };

     uint8_t actual[4];
     for (const auto& v : TEST_VECTORS) {
         char case_id[64];
         size_t encoded_len = sizeof(actual);
         zx_status_t res;

         snprintf(case_id, sizeof(case_id), "case id [0x%04hx : 0x%04hx]", v.src[0], v.src[1]);
         ::memset(actual, 0xAB, sizeof(actual));

         res = utf16_to_utf8(v.src, fbl::count_of(v.src), actual, &encoded_len);
         ASSERT_EQ(ZX_OK, res, case_id);
         ASSERT_UTF8_EQ(v.expected, sizeof(v.expected),
                        actual, sizeof(actual),
                        encoded_len, case_id);
     }

     END_TEST;
 }

 static bool utf16to8_unpaired_surrogates(void) {
     BEGIN_TEST;

     static const struct {
         uint16_t src;
         uint8_t expected[3];
     } TEST_VECTORS[] = {
         // All unpaired surrogates are technically supposed to be illegal, but
         // apparently there are systems out there who use them any (Wikipedia
         // claims that Windows allows unpaired surrogates in file names encoded
         // using UTF-16)
         //
         // Unpaired surrogates are 16 bits wide, so they will require a 3-byte
         // UTF-8 encoding.
         { 0xD800, { 0xED, 0xA0, 0x80 } },
         { 0xD945, { 0xED, 0xA5, 0x85 } },
         { 0xDBFF, { 0xED, 0xAF, 0xBF } },
         { 0xDC00, { 0xED, 0xB0, 0x80 } },
         { 0xDD45, { 0xED, 0xB5, 0x85 } },
         { 0xDFFF, { 0xED, 0xBF, 0xBF } },
     };
     uint8_t replace[3] = { 0xEF, 0xBF, 0xBD };
     uint8_t actual[3];
     for (const auto& v : TEST_VECTORS) {
         char case_id[64];
         size_t encoded_len = sizeof(actual);
         zx_status_t res;

         // Attempt to encode the unpaired surrogate, but do not specify that we
         // want to preserve it.  We should end up with the encoded form of the
         // replacement character (U+FFFD) instead.
         snprintf(case_id, sizeof(case_id), "case id [0x%04hx, replace]", v.src);
         ::memset(actual, 0xAB, sizeof(actual));

         encoded_len = sizeof(actual);
         res = utf16_to_utf8(&v.src, 1, actual, &encoded_len);
         ASSERT_EQ(ZX_OK, res, case_id);
         ASSERT_UTF8_EQ(replace, sizeof(replace), actual, sizeof(actual),
                        encoded_len, case_id);

         // Do it again, but this time tell the converter to preserve the
         // unpaired surrogate instead.
         snprintf(case_id, sizeof(case_id), "case id [0x%04hx, preserve]", v.src);
         ::memset(actual, 0xAB, sizeof(actual));

         encoded_len = sizeof(actual);
         res = utf16_to_utf8(&v.src, 1, actual, &encoded_len,
                             UTF_CONVERT_FLAG_PRESERVE_UNPAIRED_SURROGATES);
         ASSERT_EQ(ZX_OK, res, case_id);
         ASSERT_UTF8_EQ(v.expected, sizeof(v.expected), actual, sizeof(actual),
                        encoded_len, case_id);
     }

     END_TEST;
 }

 static bool utf16to8_dst_buffer_lengths(void) {
     BEGIN_TEST;

     const uint16_t src[] = { 'T', 'e', 's', 't' };
     const uint8_t expected[] = { 'T', 'e', 's', 't' };
     uint8_t actual[16];

     // Perform a conversion, but test three cases.
     //
     // 1) The destination buffer size is exactly what is required.
     // 2) The destination buffer size is more than what is required.
     // 3) The destination buffer size is less than what is required.
     static const size_t DST_LENGTHS[] = { sizeof(expected), sizeof(actual), sizeof(expected) >> 1 };
     for (const auto& d : DST_LENGTHS) {
         char case_id[64];
         size_t encoded_len = d;
         zx_status_t res;

         snprintf(case_id, sizeof(case_id), "case id [needed %zu, provided %zu]",
                  sizeof(expected), d);
         ::memset(actual, 0xAB, sizeof(actual));

         ASSERT_LE(encoded_len, sizeof(actual), case_id);
         res = utf16_to_utf8(src, fbl::count_of(src), actual, &encoded_len);

         ASSERT_EQ(ZX_OK, res, case_id);
         ASSERT_EQ(sizeof(expected), encoded_len, case_id);
         static_assert(sizeof(expected) <= sizeof(actual),
                       "'actual' buffer must be large enough to hold 'expected' result");
         ASSERT_BYTES_EQ(expected, actual, d < encoded_len ? d : encoded_len, case_id);

         if (d < sizeof(actual)) {
             uint8_t pattern[sizeof(actual)];
             ::memset(pattern, 0xAB, sizeof(pattern));
             ASSERT_BYTES_EQ(actual + d, pattern, sizeof(actual) - d, case_id);
         }
     }

     END_TEST;
 }

 static bool utf16to8_endianness_and_bom(void) {
     BEGIN_TEST;

     static const struct {
         uint16_t src[5];
         bool host_order;
     } SOURCES[] = {
         { { 0xFEFF, 'T', 'e', 's', 't' }, true },
         { { __bswap16(0xFEFF),
             __bswap16('T'),
             __bswap16('e'),
             __bswap16('s'),
             __bswap16('t'),
             }, false }
     };

     const uint8_t bom_removed[] = { 'T', 'e', 's', 't' };
     const uint8_t bom_removed_inverted[] = {
         0xE5, 0x90, 0x80, 0xE6, 0x94, 0x80, 0xE7,
         0x8C, 0x80, 0xE7, 0x90, 0x80 };
     const uint8_t bom_encoded[] = { 0xEF, 0xBB, 0xBF, 'T', 'e', 's', 't' };
     const uint8_t bom_encoded_inverted[] = {
         0xEF, 0xBF, 0xBE, 0xE5, 0x90, 0x80, 0xE6,
         0x94, 0x80, 0xE7, 0x8C, 0x80, 0xE7, 0x90,
         0x80 };
     uint8_t actual[fbl::count_of(bom_encoded_inverted)];

 #define EXPECT(e) { e, sizeof(e) }
     static const struct {
         uint32_t flags;
         struct {
             const uint8_t* exp;
             size_t len;
         } host;
         struct {
             const uint8_t* exp;
             size_t len;
         } inv;
     } EXPECTED[] {
         { 0,
           EXPECT(bom_encoded), EXPECT(bom_encoded) },
         { UTF_CONVERT_FLAG_DISCARD_BOM,
           EXPECT(bom_removed), EXPECT(bom_removed) },
         { HOST_ENDIAN_FLAG,
           EXPECT(bom_encoded), EXPECT(bom_encoded_inverted) },
         { HOST_ENDIAN_FLAG | UTF_CONVERT_FLAG_DISCARD_BOM,
           EXPECT(bom_removed), EXPECT(bom_removed_inverted) },
         { INVERT_ENDIAN_FLAG,
           EXPECT(bom_encoded_inverted), EXPECT(bom_encoded) },
         { INVERT_ENDIAN_FLAG | UTF_CONVERT_FLAG_DISCARD_BOM,
           EXPECT(bom_removed_inverted), EXPECT(bom_removed) },
     };
 #undef EXPECT

     for (const auto& s : SOURCES) {
         for (const auto& e : EXPECTED) {
             char case_id[64];
             zx_status_t res;
             size_t enc_len = sizeof(actual);

             ::memset(actual, 0xAB, sizeof(actual));
             snprintf(case_id, sizeof(case_id), "case id [%s BOM, %s endian]",
                      (e.flags & UTF_CONVERT_FLAG_DISCARD_BOM) ? "discard" : "encode",
                      (e.flags & HOST_ENDIAN_FLAG) ? "host" :
                      (e.flags & INVERT_ENDIAN_FLAG) ? "invert" : "detect");

             res = utf16_to_utf8(s.src, fbl::count_of(s.src), actual, &enc_len, e.flags);
             ASSERT_EQ(ZX_OK, res, case_id);

             if (s.host_order) {
                 ASSERT_UTF8_EQ(e.host.exp, e.host.len, actual, sizeof(actual), enc_len, case_id);
             } else {
                 ASSERT_UTF8_EQ(e.inv.exp, e.inv.len, actual, sizeof(actual), enc_len, case_id);
             }
         }
     }

     END_TEST;
 }

 BEGIN_TEST_CASE(utf_conversion_tests)
 RUN_TEST(utf16to8_bad_args);
 RUN_TEST(utf16to8_empty_source);
 RUN_TEST(utf16to8_simple_codepoints);
 RUN_TEST(utf16to8_paired_surrogates);
 RUN_TEST(utf16to8_unpaired_surrogates);
 RUN_TEST(utf16to8_dst_buffer_lengths);
 RUN_TEST(utf16to8_endianness_and_bom);
 END_TEST_CASE(utf_conversion_tests)
	// Copyright 2018 The Fuchsia Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#include <endian.h>
	#include <stdio.h>
	#include <unittest/unittest.h>
	#include <utf_conversion/utf_conversion.h>

	#include <fbl/algorithm.h>

	#if (BYTE_ORDER == BIG_ENDIAN)
	static constexpr uint32_t HOST_ENDIAN_FLAG = UTF_CONVERT_FLAG_FORCE_BIG_ENDIAN;
	static constexpr uint32_t INVERT_ENDIAN_FLAG = UTF_CONVERT_FLAG_FORCE_LITTLE_ENDIAN;
	#else
	static constexpr uint32_t HOST_ENDIAN_FLAG = UTF_CONVERT_FLAG_FORCE_LITTLE_ENDIAN;
	static constexpr uint32_t INVERT_ENDIAN_FLAG = UTF_CONVERT_FLAG_FORCE_BIG_ENDIAN;
	#endif

	#define ASSERT_UTF8_EQ(expected, expected_len, actual, actual_bytes, enc_len, msg) \
	do { \
	ASSERT_GE(actual_bytes, expected_len, msg); \
	ASSERT_EQ(expected_len, enc_len, msg); \
	ASSERT_BYTES_EQ(expected, actual, expected_len, msg); \
	} while(false)

	static bool utf16to8_bad_args(void) {
	BEGIN_TEST;

	uint16_t src;
	uint8_t dst = 0xFE;
	size_t dst_len;
	zx_status_t res;

	// Bad destination buffer with non-zero destination length
	dst_len = 1;
	res = utf16_to_utf8(&src, 1, nullptr, &dst_len);
	ASSERT_EQ(ZX_ERR_INVALID_ARGS, res, "null dst should fail with INVALID_ARGS");
	ASSERT_EQ(1, dst_len, "dst_len modified after conversion with invalid args");

	// Bad dest len pointer
	res = utf16_to_utf8(&src, 1, &dst, nullptr);
	ASSERT_EQ(ZX_ERR_INVALID_ARGS, res, "null dst_len should fail with INVALID_ARGS");
	ASSERT_EQ(0xFE, dst, "dst modified after conversion with invalid args");

	// Bad (undefined) flags
	res = utf16_to_utf8(&src, 1, &dst, &dst_len, 0x80000000);
	ASSERT_EQ(ZX_ERR_INVALID_ARGS, res, "undefined flags should fail with INVALID_ARGS");
	ASSERT_EQ(1, dst_len, "dst_len modified after conversion with invalid args");
	ASSERT_EQ(0xFE, dst, "dst modified after conversion with invalid args");

	// A null dest buffer is allowed if (and only if) the dst_len is zero.
	// Practical use cases include using the converter to determine the length
	// needed to hold a converted string.
	dst_len = 0;
	src = 0xAB;
	res = utf16_to_utf8(&src, 1, nullptr, &dst_len);
	ASSERT_EQ(ZX_OK, res, "null dst with zero dst_len should succeed");
	ASSERT_EQ(2, dst_len, "encoded size of 0xAB should be 2!");

	END_TEST;
	}

	static bool utf16to8_empty_source(void) {
	BEGIN_TEST;

	uint16_t src;
	static const uint8_t expected[] = { 0xA1, 0xB2, 0xC3, 0xD4 };
	uint8_t actual[sizeof(expected)];
	size_t dst_len;
	zx_status_t res;

	// Check to make sure that attempting to encode a zero length source results
	// in a length of zero and no changes to the destination buffer.
	memcpy(actual, expected, sizeof(actual));
	dst_len = sizeof(actual);
	res = utf16_to_utf8(&src, 0,actual, &dst_len);
	ASSERT_EQ(ZX_OK, res, "zero length string conversion failed");
	ASSERT_EQ(0, dst_len, "dst_len should be zero after zero length string conversion");
	ASSERT_BYTES_EQ(expected, actual, sizeof(actual),
	"dst buffer modified after zero length string conversion");

	dst_len = sizeof(actual);
	res = utf16_to_utf8(nullptr, 1,actual, &dst_len);
	ASSERT_EQ(ZX_OK, res, "null source string conversion failed");
	ASSERT_EQ(0, dst_len, "dst_len should be zero after null source string conversion");
	ASSERT_BYTES_EQ(expected, actual, sizeof(actual),
	"dst buffer modified after null source string conversion");

	END_TEST;
	}

	static bool utf16to8_simple_codepoints(void) {
	BEGIN_TEST;

	static const struct {
	uint16_t src;
	uint8_t expected[3];
	size_t expected_len;
	} TEST_VECTORS[] = {
	// 1 byte UTF-8 codepoints (U+0000, U+007F)
	{ 0x0000, { 0x00 }, 1 },
	{ 0x0001, { 0x01 }, 1 },
	{ 0x007f, { 0x7f }, 1 },

	// 2 byte UTF-8 codepoints (U+0080, U+07FF)
	{ 0x0080, { 0xC2, 0x80 }, 2 },
	{ 0x0456, { 0xD1, 0x96 }, 2 },
	{ 0x07FF, { 0xDF, 0xBF }, 2 },

	// 3 byte UTF-8 codepoints (U+0800, U+07FF)
	// Note: we are skipping the (theoretically illegal) unpaired surrogate
	// range (U+D800, U+DFFF) here. There is a separate test for support of
	// unpaired surrogates.
	{ 0x0800, { 0xE0, 0xA0, 0x80 }, 3 },
	{ 0x4567, { 0xE4, 0x95, 0xA7 }, 3 },
	{ 0xD7FF, { 0xED, 0x9F, 0xBF }, 3 },
	{ 0xE000, { 0xEE, 0x80, 0x80 }, 3 },
	{ 0xE456, { 0xEE, 0x91, 0x96 }, 3 },
	{ 0xFFFF, { 0xEF, 0xBF, 0xBF }, 3 },
	};

	uint8_t actual[3];
	for (const auto& v : TEST_VECTORS) {
	char case_id[64];
	size_t encoded_len = sizeof(actual);
	zx_status_t res;

	snprintf(case_id, sizeof(case_id), "case id [0x%04hx]", v.src);
	::memset(actual, 0xAB, sizeof(actual));

	res = utf16_to_utf8(&v.src, 1, actual, &encoded_len);
	ASSERT_EQ(ZX_OK, res, case_id);
	ASSERT_LE(v.expected_len, sizeof(v.expected), case_id);
	ASSERT_UTF8_EQ(v.expected, v.expected_len,
	actual, sizeof(actual),
	encoded_len, case_id);
	}

	END_TEST;
	}

	static bool utf16to8_paired_surrogates(void) {
	BEGIN_TEST;

	// All paired surrogate encodings are going to be 4 byte UTF-8 codepoints (U+010000, U+10FFFF)
	static const struct {
	uint16_t src[2];
	uint8_t expected[4];
	} TEST_VECTORS[] = {
	{ { 0xD800, 0xDC00 }, { 0xF0, 0x90, 0x80, 0x80 } }, // U+10000
	{ { 0xD811, 0xDD67 }, { 0xF0, 0x94, 0x95, 0xA7 } }, // U+14567
	{ { 0xDA6F, 0xDCDE }, { 0xF2, 0xAB, 0xB3, 0x9E } }, // U+ABCDE
	{ { 0xDBBF, 0xDFFF }, { 0xF3, 0xBF, 0xBF, 0xBF } }, // U+FFFFF
	{ { 0xDBC0, 0xDC00 }, { 0xF4, 0x80, 0x80, 0x80 } }, // U+100000
	{ { 0xDBD1, 0xDD67 }, { 0xF4, 0x84, 0x95, 0xA7 } }, // U+104567
	{ { 0xDBFF, 0xDFFF }, { 0xF4, 0x8F, 0xBF, 0xBF } }, // U+10FFFF
	};

	uint8_t actual[4];
	for (const auto& v : TEST_VECTORS) {
	char case_id[64];
	size_t encoded_len = sizeof(actual);
	zx_status_t res;

	snprintf(case_id, sizeof(case_id), "case id [0x%04hx : 0x%04hx]", v.src[0], v.src[1]);
	::memset(actual, 0xAB, sizeof(actual));

	res = utf16_to_utf8(v.src, fbl::count_of(v.src), actual, &encoded_len);
	ASSERT_EQ(ZX_OK, res, case_id);
	ASSERT_UTF8_EQ(v.expected, sizeof(v.expected),
	actual, sizeof(actual),
	encoded_len, case_id);
	}

	END_TEST;
	}

	static bool utf16to8_unpaired_surrogates(void) {
	BEGIN_TEST;

	static const struct {
	uint16_t src;
	uint8_t expected[3];
	} TEST_VECTORS[] = {
	// All unpaired surrogates are technically supposed to be illegal, but
	// apparently there are systems out there who use them any (Wikipedia
	// claims that Windows allows unpaired surrogates in file names encoded
	// using UTF-16)
	//
	// Unpaired surrogates are 16 bits wide, so they will require a 3-byte
	// UTF-8 encoding.
	{ 0xD800, { 0xED, 0xA0, 0x80 } },
	{ 0xD945, { 0xED, 0xA5, 0x85 } },
	{ 0xDBFF, { 0xED, 0xAF, 0xBF } },
	{ 0xDC00, { 0xED, 0xB0, 0x80 } },
	{ 0xDD45, { 0xED, 0xB5, 0x85 } },
	{ 0xDFFF, { 0xED, 0xBF, 0xBF } },
	};
	uint8_t replace[3] = { 0xEF, 0xBF, 0xBD };
	uint8_t actual[3];
	for (const auto& v : TEST_VECTORS) {
	char case_id[64];
	size_t encoded_len = sizeof(actual);
	zx_status_t res;

	// Attempt to encode the unpaired surrogate, but do not specify that we
	// want to preserve it. We should end up with the encoded form of the
	// replacement character (U+FFFD) instead.
	snprintf(case_id, sizeof(case_id), "case id [0x%04hx, replace]", v.src);
	::memset(actual, 0xAB, sizeof(actual));

	encoded_len = sizeof(actual);
	res = utf16_to_utf8(&v.src, 1, actual, &encoded_len);
	ASSERT_EQ(ZX_OK, res, case_id);
	ASSERT_UTF8_EQ(replace, sizeof(replace), actual, sizeof(actual),
	encoded_len, case_id);

	// Do it again, but this time tell the converter to preserve the
	// unpaired surrogate instead.
	snprintf(case_id, sizeof(case_id), "case id [0x%04hx, preserve]", v.src);
	::memset(actual, 0xAB, sizeof(actual));

	encoded_len = sizeof(actual);
	res = utf16_to_utf8(&v.src, 1, actual, &encoded_len,
	UTF_CONVERT_FLAG_PRESERVE_UNPAIRED_SURROGATES);
	ASSERT_EQ(ZX_OK, res, case_id);
	ASSERT_UTF8_EQ(v.expected, sizeof(v.expected), actual, sizeof(actual),
	encoded_len, case_id);
	}

	END_TEST;
	}

	static bool utf16to8_dst_buffer_lengths(void) {
	BEGIN_TEST;

	const uint16_t src[] = { 'T', 'e', 's', 't' };
	const uint8_t expected[] = { 'T', 'e', 's', 't' };
	uint8_t actual[16];

	// Perform a conversion, but test three cases.
	//
	// 1) The destination buffer size is exactly what is required.
	// 2) The destination buffer size is more than what is required.
	// 3) The destination buffer size is less than what is required.
	static const size_t DST_LENGTHS[] = { sizeof(expected), sizeof(actual), sizeof(expected) >> 1 };
	for (const auto& d : DST_LENGTHS) {
	char case_id[64];
	size_t encoded_len = d;
	zx_status_t res;

	snprintf(case_id, sizeof(case_id), "case id [needed %zu, provided %zu]",
	sizeof(expected), d);
	::memset(actual, 0xAB, sizeof(actual));

	ASSERT_LE(encoded_len, sizeof(actual), case_id);
	res = utf16_to_utf8(src, fbl::count_of(src), actual, &encoded_len);

	ASSERT_EQ(ZX_OK, res, case_id);
	ASSERT_EQ(sizeof(expected), encoded_len, case_id);
	static_assert(sizeof(expected) <= sizeof(actual),
	"'actual' buffer must be large enough to hold 'expected' result");
	ASSERT_BYTES_EQ(expected, actual, d < encoded_len ? d : encoded_len, case_id);

	if (d < sizeof(actual)) {
	uint8_t pattern[sizeof(actual)];
	::memset(pattern, 0xAB, sizeof(pattern));
	ASSERT_BYTES_EQ(actual + d, pattern, sizeof(actual) - d, case_id);
	}
	}

	END_TEST;
	}

	static bool utf16to8_endianness_and_bom(void) {
	BEGIN_TEST;

	static const struct {
	uint16_t src[5];
	bool host_order;
	} SOURCES[] = {
	{ { 0xFEFF, 'T', 'e', 's', 't' }, true },
	{ { __bswap16(0xFEFF),
	__bswap16('T'),
	__bswap16('e'),
	__bswap16('s'),
	__bswap16('t'),
	}, false }
	};

	const uint8_t bom_removed[] = { 'T', 'e', 's', 't' };
	const uint8_t bom_removed_inverted[] = {
	0xE5, 0x90, 0x80, 0xE6, 0x94, 0x80, 0xE7,
	0x8C, 0x80, 0xE7, 0x90, 0x80 };
	const uint8_t bom_encoded[] = { 0xEF, 0xBB, 0xBF, 'T', 'e', 's', 't' };
	const uint8_t bom_encoded_inverted[] = {
	0xEF, 0xBF, 0xBE, 0xE5, 0x90, 0x80, 0xE6,
	0x94, 0x80, 0xE7, 0x8C, 0x80, 0xE7, 0x90,
	0x80 };
	uint8_t actual[fbl::count_of(bom_encoded_inverted)];

	#define EXPECT(e) { e, sizeof(e) }
	static const struct {
	uint32_t flags;
	struct {
	const uint8_t* exp;
	size_t len;
	} host;
	struct {
	const uint8_t* exp;
	size_t len;
	} inv;
	} EXPECTED[] {
	{ 0,
	EXPECT(bom_encoded), EXPECT(bom_encoded) },
	{ UTF_CONVERT_FLAG_DISCARD_BOM,
	EXPECT(bom_removed), EXPECT(bom_removed) },
	{ HOST_ENDIAN_FLAG,
	EXPECT(bom_encoded), EXPECT(bom_encoded_inverted) },
	{ HOST_ENDIAN_FLAG \| UTF_CONVERT_FLAG_DISCARD_BOM,
	EXPECT(bom_removed), EXPECT(bom_removed_inverted) },
	{ INVERT_ENDIAN_FLAG,
	EXPECT(bom_encoded_inverted), EXPECT(bom_encoded) },
	{ INVERT_ENDIAN_FLAG \| UTF_CONVERT_FLAG_DISCARD_BOM,
	EXPECT(bom_removed_inverted), EXPECT(bom_removed) },
	};
	#undef EXPECT

	for (const auto& s : SOURCES) {
	for (const auto& e : EXPECTED) {
	char case_id[64];
	zx_status_t res;
	size_t enc_len = sizeof(actual);

	::memset(actual, 0xAB, sizeof(actual));
	snprintf(case_id, sizeof(case_id), "case id [%s BOM, %s endian]",
	(e.flags & UTF_CONVERT_FLAG_DISCARD_BOM) ? "discard" : "encode",
	(e.flags & HOST_ENDIAN_FLAG) ? "host" :
	(e.flags & INVERT_ENDIAN_FLAG) ? "invert" : "detect");

	res = utf16_to_utf8(s.src, fbl::count_of(s.src), actual, &enc_len, e.flags);
	ASSERT_EQ(ZX_OK, res, case_id);

	if (s.host_order) {
	ASSERT_UTF8_EQ(e.host.exp, e.host.len, actual, sizeof(actual), enc_len, case_id);
	} else {
	ASSERT_UTF8_EQ(e.inv.exp, e.inv.len, actual, sizeof(actual), enc_len, case_id);
	}
	}
	}

	END_TEST;
	}

	BEGIN_TEST_CASE(utf_conversion_tests)
	RUN_TEST(utf16to8_bad_args);
	RUN_TEST(utf16to8_empty_source);
	RUN_TEST(utf16to8_simple_codepoints);
	RUN_TEST(utf16to8_paired_surrogates);
	RUN_TEST(utf16to8_unpaired_surrogates);
	RUN_TEST(utf16to8_dst_buffer_lengths);
	RUN_TEST(utf16to8_endianness_and_bom);
	END_TEST_CASE(utf_conversion_tests)