| /*- |
| * Copyright (c) 2011 Michihiro NAKAJIMA |
| * All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR |
| * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
| * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
| * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, |
| * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
| * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| #include "test.h" |
| __FBSDID("$FreeBSD"); |
| |
| #include <locale.h> |
| |
| /* |
| * The sample tar file was made in LANG=KOI8-R and it contains two |
| * files the charset of which are different. |
| * - the filename of first file is stored in BINARY mode. |
| * - the filename of second file is stored in UTF-8. |
| * |
| * Whenever hdrcharset option is specified, we will correctly read the |
| * filename of second file, which is stored in UTF-8 by default. |
| */ |
| |
| static void |
| test_read_format_tar_filename_KOI8R_CP866(const char *refname) |
| { |
| struct archive *a; |
| struct archive_entry *ae; |
| |
| /* |
| * Read filename in ru_RU.CP866 with "hdrcharset=KOI8-R" option. |
| * We should correctly read two filenames. |
| */ |
| if (NULL == setlocale(LC_ALL, "Russian_Russia.866") && |
| NULL == setlocale(LC_ALL, "ru_RU.CP866")) { |
| skipping("ru_RU.CP866 locale not available on this system."); |
| return; |
| } |
| |
| /* Test if the platform can convert from UTF-8. */ |
| assert((a = archive_read_new()) != NULL); |
| assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_tar(a)); |
| if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=UTF-8")) { |
| assertEqualInt(ARCHIVE_OK, archive_read_free(a)); |
| skipping("This system cannot convert character-set" |
| " from UTF-8 to CP866."); |
| return; |
| } |
| assertEqualInt(ARCHIVE_OK, archive_read_free(a)); |
| |
| assert((a = archive_read_new()) != NULL); |
| assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a)); |
| assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a)); |
| if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=KOI8-R")) { |
| skipping("This system cannot convert character-set" |
| " from KOI8-R to CP866."); |
| goto next_test; |
| } |
| assertEqualIntA(a, ARCHIVE_OK, |
| archive_read_open_filename(a, refname, 10240)); |
| |
| /* Verify regular first file. */ |
| assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); |
| assertEqualString("\x8f\x90\x88\x82\x85\x92", |
| archive_entry_pathname(ae)); |
| assertEqualInt(6, archive_entry_size(ae)); |
| assertEqualInt(archive_entry_is_encrypted(ae), 0); |
| assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED); |
| |
| /* Verify regular second file. */ |
| assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); |
| assertEqualString("\xaf\xe0\xa8\xa2\xa5\xe2", |
| archive_entry_pathname(ae)); |
| assertEqualInt(6, archive_entry_size(ae)); |
| assertEqualInt(archive_entry_is_encrypted(ae), 0); |
| assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED); |
| |
| |
| /* End of archive. */ |
| assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); |
| |
| /* Verify archive format. */ |
| assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0)); |
| assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE, |
| archive_format(a)); |
| |
| /* Close the archive. */ |
| assertEqualInt(ARCHIVE_OK, archive_read_close(a)); |
| next_test: |
| assertEqualInt(ARCHIVE_OK, archive_read_free(a)); |
| |
| |
| /* |
| * Read filename in ru_RU.CP866 without "hdrcharset=KOI8-R" option. |
| * The filename we can properly read is only second file. |
| */ |
| |
| assert((a = archive_read_new()) != NULL); |
| assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a)); |
| assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a)); |
| assertEqualIntA(a, ARCHIVE_OK, |
| archive_read_open_filename(a, refname, 10240)); |
| |
| /* |
| * Verify regular first file. |
| * The filename is not translated to CP866 because hdrcharset |
| * attribute is BINARY and there is not way to know its charset. |
| */ |
| assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); |
| /* A filename is in KOI8-R. */ |
| assertEqualString("\xf0\xf2\xe9\xf7\xe5\xf4", |
| archive_entry_pathname(ae)); |
| assertEqualInt(6, archive_entry_size(ae)); |
| assertEqualInt(archive_entry_is_encrypted(ae), 0); |
| assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED); |
| |
| /* |
| * Verify regular second file. |
| * The filename is translated from UTF-8 to CP866 |
| */ |
| assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); |
| assertEqualString("\xaf\xe0\xa8\xa2\xa5\xe2", |
| archive_entry_pathname(ae)); |
| assertEqualInt(6, archive_entry_size(ae)); |
| assertEqualInt(archive_entry_is_encrypted(ae), 0); |
| assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED); |
| |
| |
| /* End of archive. */ |
| assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); |
| |
| /* Verify archive format. */ |
| assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0)); |
| assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE, |
| archive_format(a)); |
| |
| /* Close the archive. */ |
| assertEqualInt(ARCHIVE_OK, archive_read_close(a)); |
| assertEqualInt(ARCHIVE_OK, archive_read_free(a)); |
| } |
| |
| static void |
| test_read_format_tar_filename_KOI8R_UTF8(const char *refname) |
| { |
| struct archive *a; |
| struct archive_entry *ae; |
| |
| /* |
| * Read filename in en_US.UTF-8 with "hdrcharset=KOI8-R" option. |
| * We should correctly read two filenames. |
| */ |
| if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) { |
| skipping("en_US.UTF-8 locale not available on this system."); |
| return; |
| } |
| |
| assert((a = archive_read_new()) != NULL); |
| assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a)); |
| assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a)); |
| if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=KOI8-R")) { |
| assertEqualInt(ARCHIVE_OK, archive_read_free(a)); |
| skipping("This system cannot convert character-set" |
| " from KOI8-R to UTF-8."); |
| return; |
| } |
| assertEqualIntA(a, ARCHIVE_OK, |
| archive_read_open_filename(a, refname, 10240)); |
| |
| /* Verify regular file. */ |
| assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); |
| assertEqualString("\xd0\x9f\xd0\xa0\xd0\x98\xd0\x92\xd0\x95\xd0\xa2", |
| archive_entry_pathname(ae)); |
| assertEqualInt(6, archive_entry_size(ae)); |
| assertEqualInt(archive_entry_is_encrypted(ae), 0); |
| assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED); |
| |
| /* Verify regular file. */ |
| assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); |
| assertEqualString("\xd0\xbf\xd1\x80\xd0\xb8\xd0\xb2\xd0\xb5\xd1\x82", |
| archive_entry_pathname(ae)); |
| assertEqualInt(6, archive_entry_size(ae)); |
| assertEqualInt(archive_entry_is_encrypted(ae), 0); |
| assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED); |
| |
| /* Verify encryption status */ |
| assertEqualInt(archive_entry_is_encrypted(ae), 0); |
| assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED); |
| |
| /* End of archive. */ |
| assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); |
| |
| /* Verify archive format. */ |
| assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0)); |
| assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE, |
| archive_format(a)); |
| |
| /* Verify encryption status */ |
| assertEqualInt(archive_entry_is_encrypted(ae), 0); |
| assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED); |
| |
| /* Close the archive. */ |
| assertEqualInt(ARCHIVE_OK, archive_read_close(a)); |
| assertEqualInt(ARCHIVE_OK, archive_read_free(a)); |
| |
| /* |
| * Read filename in en_US.UTF-8 without "hdrcharset=KOI8-R" option. |
| * The filename we can properly read is only second file. |
| */ |
| |
| assert((a = archive_read_new()) != NULL); |
| assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a)); |
| assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a)); |
| assertEqualIntA(a, ARCHIVE_OK, |
| archive_read_open_filename(a, refname, 10240)); |
| |
| /* |
| * Verify regular first file. |
| * The filename is not translated to UTF-8 because hdrcharset |
| * attribute is BINARY and there is not way to know its charset. |
| */ |
| assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); |
| /* A filename is in KOI8-R. */ |
| assertEqualString("\xf0\xf2\xe9\xf7\xe5\xf4", |
| archive_entry_pathname(ae)); |
| assertEqualInt(6, archive_entry_size(ae)); |
| |
| /* Verify encryption status */ |
| assertEqualInt(archive_entry_is_encrypted(ae), 0); |
| assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED); |
| |
| /* |
| * Verify regular second file. |
| */ |
| assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); |
| assertEqualString("\xd0\xbf\xd1\x80\xd0\xb8\xd0\xb2\xd0\xb5\xd1\x82", |
| archive_entry_pathname(ae)); |
| assertEqualInt(6, archive_entry_size(ae)); |
| |
| |
| /* End of archive. */ |
| assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); |
| |
| /* Verify archive format. */ |
| assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0)); |
| assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE, |
| archive_format(a)); |
| |
| /* Close the archive. */ |
| assertEqualInt(ARCHIVE_OK, archive_read_close(a)); |
| assertEqualInt(ARCHIVE_OK, archive_read_free(a)); |
| } |
| |
| static void |
| test_read_format_tar_filename_KOI8R_CP1251(const char *refname) |
| { |
| struct archive *a; |
| struct archive_entry *ae; |
| |
| /* |
| * Read filename in CP1251 with "hdrcharset=KOI8-R" option. |
| * We should correctly read two filenames. |
| */ |
| if (NULL == setlocale(LC_ALL, "Russian_Russia") && |
| NULL == setlocale(LC_ALL, "ru_RU.CP1251")) { |
| skipping("CP1251 locale not available on this system."); |
| return; |
| } |
| |
| /* Test if the platform can convert from UTF-8. */ |
| assert((a = archive_read_new()) != NULL); |
| assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_tar(a)); |
| if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=UTF-8")) { |
| assertEqualInt(ARCHIVE_OK, archive_read_free(a)); |
| skipping("This system cannot convert character-set" |
| " from UTF-8 to CP1251."); |
| return; |
| } |
| assertEqualInt(ARCHIVE_OK, archive_read_free(a)); |
| |
| assert((a = archive_read_new()) != NULL); |
| assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a)); |
| assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a)); |
| if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=KOI8-R")) { |
| skipping("This system cannot convert character-set" |
| " from KOI8-R to CP1251."); |
| goto next_test; |
| } |
| assertEqualIntA(a, ARCHIVE_OK, |
| archive_read_open_filename(a, refname, 10240)); |
| |
| /* Verify regular first file. */ |
| assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); |
| assertEqualString("\xcf\xd0\xc8\xc2\xc5\xd2", |
| archive_entry_pathname(ae)); |
| assertEqualInt(6, archive_entry_size(ae)); |
| assertEqualInt(archive_entry_is_encrypted(ae), 0); |
| assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED); |
| |
| /* Verify regular second file. */ |
| assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); |
| assertEqualString("\xef\xf0\xe8\xe2\xe5\xf2", |
| archive_entry_pathname(ae)); |
| assertEqualInt(6, archive_entry_size(ae)); |
| assertEqualInt(archive_entry_is_encrypted(ae), 0); |
| assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED); |
| |
| |
| /* End of archive. */ |
| assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); |
| |
| /* Verify archive format. */ |
| assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0)); |
| assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE, |
| archive_format(a)); |
| |
| /* Close the archive. */ |
| assertEqualInt(ARCHIVE_OK, archive_read_close(a)); |
| next_test: |
| assertEqualInt(ARCHIVE_OK, archive_read_free(a)); |
| |
| /* |
| * Read filename in CP1251 without "hdrcharset=KOI8-R" option. |
| * The filename we can properly read is only second file. |
| */ |
| |
| assert((a = archive_read_new()) != NULL); |
| assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a)); |
| assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a)); |
| assertEqualIntA(a, ARCHIVE_OK, |
| archive_read_open_filename(a, refname, 10240)); |
| |
| /* |
| * Verify regular first file. |
| * The filename is not translated to CP1251 because hdrcharset |
| * attribute is BINARY and there is not way to know its charset. |
| */ |
| assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); |
| /* A filename is in KOI8-R. */ |
| assertEqualString("\xf0\xf2\xe9\xf7\xe5\xf4", |
| archive_entry_pathname(ae)); |
| assertEqualInt(6, archive_entry_size(ae)); |
| assertEqualInt(archive_entry_is_encrypted(ae), 0); |
| assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED); |
| |
| /* |
| * Verify regular second file. |
| */ |
| assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); |
| assertEqualString("\xef\xf0\xe8\xe2\xe5\xf2", |
| archive_entry_pathname(ae)); |
| assertEqualInt(6, archive_entry_size(ae)); |
| assertEqualInt(archive_entry_is_encrypted(ae), 0); |
| assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED); |
| |
| |
| /* End of archive. */ |
| assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); |
| |
| /* Verify archive format. */ |
| assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0)); |
| assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE, |
| archive_format(a)); |
| |
| /* Close the archive. */ |
| assertEqualInt(ARCHIVE_OK, archive_read_close(a)); |
| assertEqualInt(ARCHIVE_OK, archive_read_free(a)); |
| } |
| |
| |
| DEFINE_TEST(test_read_format_tar_filename) |
| { |
| const char *refname = "test_read_format_tar_filename_koi8r.tar.Z"; |
| |
| extract_reference_file(refname); |
| test_read_format_tar_filename_KOI8R_CP866(refname); |
| test_read_format_tar_filename_KOI8R_UTF8(refname); |
| test_read_format_tar_filename_KOI8R_CP1251(refname); |
| } |