| /* |
| CFLocaleIdentifier.c |
| Copyright (c) 2002-2016, Apple Inc. and the Swift project authors |
| |
| Portions Copyright (c) 2014-2016 Apple Inc. and the Swift project authors |
| Licensed under Apache License v2.0 with Runtime Library Exception |
| See http://swift.org/LICENSE.txt for license information |
| See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors |
| Responsibility: David Smith |
| |
| CFLocaleIdentifier.c defines |
| - enum value kLocaleIdentifierCStringMax |
| - structs KeyStringToResultString, SpecialCaseUpdates |
| and provides the following data for the functions |
| CFLocaleCreateCanonicalLocaleIdentifierFromScriptManagerCodes, |
| CFLocaleCreateCanonicalLocaleIdentifierFromString |
| CFLocaleCreateCanonicalLanguageIdentifierFromString |
| |
| 1. static const char * regionCodeToLocaleString[]; enum kNumRegionCodeToLocaleString; |
| map RegionCode 0..kNumRegionCodeToLocaleString-1 to canonical locale string |
| |
| 2. static const char * langCodeToLocaleString[]; enum kNumLangCodeToLocaleString; |
| map LangCode 0..kNumLangCodeToLocaleString-1 to canonical locale string |
| |
| 3. static const KeyStringToResultString oldAppleLocaleToCanonical[]; enum kNumOldAppleLocaleToCanonical; |
| map old Apple string oldAppleLocaleToCanonical[n].key |
| to canonical locale string oldAppleLocaleToCanonical[n].result |
| for n = 0..kNumOldAppleLocaleToCanonical-1 |
| |
| 4. static const KeyStringToResultString localeStringPrefixToCanonical[]; enum kNumLocaleStringPrefixToCanonical; |
| map non-canonical language prefix (3-letter, obsolete) localeStringPrefixToCanonical[].key |
| to updated replacement localeStringPrefixToCanonical[].result |
| for n = 0..kNumLocaleStringPrefixToCanonical-1 |
| |
| 5. static const SpecialCaseUpdates specialCases[]; |
| various special cases for updating region codes, or for updating language codes based on region codes |
| |
| 6. static const KeyStringToResultString localeStringRegionToDefaults[]; enum kNumLocaleStringRegionToDefaults; |
| map locale string region tag localeStringRegionToDefaults[n].key |
| to default substrings to delete localeStringRegionToDefaults[n].result |
| for n = 0..kNumLocaleStringRegionToDefaults-1 |
| |
| 7. static const KeyStringToResultString localeStringPrefixToDefaults[]; enum kNumLocaleStringPrefixToDefaults; |
| map locale string initial part localeStringPrefixToDefaults[n].key |
| to default substrings to delete localeStringPrefixToDefaults[n].result |
| for n = 0..kNumLocaleStringPrefixToDefaults-1 |
| |
| 8. static const KeyStringToResultString appleLocaleToLanguageString[]; enum kNumAppleLocaleToLanguageString; |
| map Apple locale string appleLocaleToLanguageString[].key |
| to equivalent language string appleLocaleToLanguageString[].result |
| for n = 0..kNumAppleLocaleToLanguageString-1 |
| |
| */ |
| |
| #include <CoreFoundation/CFString.h> |
| #include <CoreFoundation/CFCalendar.h> |
| #include <ctype.h> |
| #include <string.h> |
| #include <stdlib.h> |
| #include <stdio.h> |
| #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX |
| #include <unicode/uloc.h> |
| #else |
| #define ULOC_KEYWORD_SEPARATOR '@' |
| #define ULOC_FULLNAME_CAPACITY 56 |
| #define ULOC_KEYWORD_AND_VALUES_CAPACITY 100 |
| #endif |
| #include "CFInternal.h" |
| #include "CFLocaleInternal.h" |
| |
| // Max byte length of locale identifier (ASCII) as C string, including terminating null byte |
| enum { |
| kLocaleIdentifierCStringMax = ULOC_FULLNAME_CAPACITY + ULOC_KEYWORD_AND_VALUES_CAPACITY // currently 56 + 100 |
| }; |
| |
| // KeyStringToResultString struct used in data tables for CFLocaleCreateCanonicalLocaleIdentifierFromString |
| struct KeyStringToResultString { |
| const char * key; |
| const char * result; |
| }; |
| typedef struct KeyStringToResultString KeyStringToResultString; |
| |
| // SpecialCaseUpdates struct used in data tables for CFLocaleCreateCanonicalLocaleIdentifierFromString |
| struct SpecialCaseUpdates { |
| const char * lang; |
| const char * reg1; |
| const char * update1; |
| const char * reg2; |
| const char * update2; |
| }; |
| typedef struct SpecialCaseUpdates SpecialCaseUpdates; |
| |
| |
| static const char * const regionCodeToLocaleString[] = { |
| // map RegionCode (array index) to canonical locale string |
| // |
| // canon. string region code; language code; [comment] [ # __CFBundleLocaleAbbreviationsArray |
| // -------- ------------ ------------------ ------------ -------- string, if different ] |
| "en_US", // 0 verUS; 0 langEnglish; |
| "fr_FR", // 1 verFrance; 1 langFrench; |
| "en_GB", // 2 verBritain; 0 langEnglish; |
| "de_DE", // 3 verGermany; 2 langGerman; |
| "it_IT", // 4 verItaly; 3 langItalian; |
| "nl_NL", // 5 verNetherlands; 4 langDutch; |
| "nl_BE", // 6 verFlemish; 34 langFlemish (redundant, =Dutch); |
| "sv_SE", // 7 verSweden; 5 langSwedish; |
| "es_ES", // 8 verSpain; 6 langSpanish; |
| "da_DK", // 9 verDenmark; 7 langDanish; |
| "pt_PT", // 10 verPortugal; 8 langPortuguese; |
| "fr_CA", // 11 verFrCanada; 1 langFrench; |
| "nb_NO", // 12 verNorway; 9 langNorwegian (Bokmal); # "no_NO" |
| "he_IL", // 13 verIsrael; 10 langHebrew; |
| "ja_JP", // 14 verJapan; 11 langJapanese; |
| "en_AU", // 15 verAustralia; 0 langEnglish; |
| "ar", // 16 verArabic; 12 langArabic; |
| "fi_FI", // 17 verFinland; 13 langFinnish; |
| "fr_CH", // 18 verFrSwiss; 1 langFrench; |
| "de_CH", // 19 verGrSwiss; 2 langGerman; |
| "el_GR", // 20 verGreece; 14 langGreek (modern)-Grek-mono; |
| "is_IS", // 21 verIceland; 15 langIcelandic; |
| "mt_MT", // 22 verMalta; 16 langMaltese; |
| "el_CY", // 23 verCyprus; 14 langGreek?; el or tr? guess el # "" |
| "tr_TR", // 24 verTurkey; 17 langTurkish; |
| "hr_HR", // 25 verYugoCroatian; 18 langCroatian; * one-way mapping -> verCroatia |
| "nl_NL", // 26 KCHR, Netherlands; 4 langDutch; * one-way mapping |
| "nl_BE", // 27 KCHR, verFlemish; 34 langFlemish; * one-way mapping |
| "_CA", // 28 KCHR, Canada-en/fr?; -1 none; * one-way mapping # "en_CA" |
| "_CA", // 29 KCHR, Canada-en/fr?; -1 none; * one-way mapping # "en_CA" |
| "pt_PT", // 30 KCHR, Portugal; 8 langPortuguese; * one-way mapping |
| "nb_NO", // 31 KCHR, Norway; 9 langNorwegian (Bokmal); * one-way mapping # "no_NO" |
| "da_DK", // 32 KCHR, Denmark; 7 langDanish; * one-way mapping |
| "hi_IN", // 33 verIndiaHindi; 21 langHindi; |
| "ur_PK", // 34 verPakistanUrdu; 20 langUrdu; |
| "tr_TR", // 35 verTurkishModified; 17 langTurkish; * one-way mapping |
| "it_CH", // 36 verItalianSwiss; 3 langItalian; |
| "en_001", // 37 verInternational; 0 langEnglish; ASCII only # "en" |
| NULL, // 38 *unassigned; -1 none; * one-way mapping # "" |
| "ro_RO", // 39 verRomania; 37 langRomanian; |
| "grc", // 40 verGreekAncient; 148 langGreekAncient -Grek-poly; # "el_GR" |
| "lt_LT", // 41 verLithuania; 24 langLithuanian; |
| "pl_PL", // 42 verPoland; 25 langPolish; |
| "hu_HU", // 43 verHungary; 26 langHungarian; |
| "et_EE", // 44 verEstonia; 27 langEstonian; |
| "lv_LV", // 45 verLatvia; 28 langLatvian; |
| "se", // 46 verSami; 29 langSami; |
| "fo_FO", // 47 verFaroeIsl; 30 langFaroese; |
| "fa_IR", // 48 verIran; 31 langFarsi/Persian; |
| "ru_RU", // 49 verRussia; 32 langRussian; |
| "ga_IE", // 50 verIreland; 35 langIrishGaelic (no dots); |
| "ko_KR", // 51 verKorea; 23 langKorean; |
| "zh_CN", // 52 verChina; 33 langSimpChinese; |
| "zh_TW", // 53 verTaiwan; 19 langTradChinese; |
| "th_TH", // 54 verThailand; 22 langThai; |
| "und", // 55 verScriptGeneric; -1 none; # "" // <1.9> |
| "cs_CZ", // 56 verCzech; 38 langCzech; |
| "sk_SK", // 57 verSlovak; 39 langSlovak; |
| "und", // 58 verEastAsiaGeneric; -1 none; * one-way mapping # "" // <1.9> |
| "hu_HU", // 59 verMagyar; 26 langHungarian; * one-way mapping -> verHungary |
| "bn", // 60 verBengali; 67 langBengali; _IN or _BD? guess generic |
| "be_BY", // 61 verBelarus; 46 langBelorussian; |
| "uk_UA", // 62 verUkraine; 45 langUkrainian; |
| NULL, // 63 *unused; -1 none; * one-way mapping # "" |
| "el_GR", // 64 verGreeceAlt; 14 langGreek (modern)-Grek-mono; * one-way mapping |
| "sr_RS", // 65 verSerbian; 42 langSerbian -Cyrl; // <1.18> |
| "sl_SI", // 66 verSlovenian; 40 langSlovenian; |
| "mk_MK", // 67 verMacedonian; 43 langMacedonian; |
| "hr_HR", // 68 verCroatia; 18 langCroatian; |
| NULL, // 69 *unused; -1 none; * one-way mapping # "" |
| "de-1996", // 70 verGermanReformed; 2 langGerman; 1996 orthogr. # "de_DE" |
| "pt_BR", // 71 verBrazil; 8 langPortuguese; |
| "bg_BG", // 72 verBulgaria; 44 langBulgarian; |
| "ca_ES", // 73 verCatalonia; 130 langCatalan; |
| "mul", // 74 verMultilingual; -1 none; # "" |
| "gd", // 75 verScottishGaelic; 144 langScottishGaelic; |
| "gv", // 76 verManxGaelic; 145 langManxGaelic; |
| "br", // 77 verBreton; 142 langBreton; |
| "iu_CA", // 78 verNunavut; 143 langInuktitut -Cans; |
| "cy", // 79 verWelsh; 128 langWelsh; |
| "_CA", // 80 KCHR, Canada-en/fr?; -1 none; * one-way mapping # "en_CA" |
| "ga-Latg_IE", // 81 verIrishGaelicScrip; 146 langIrishGaelicScript -dots; # "ga_IE" // <xx> |
| "en_CA", // 82 verEngCanada; 0 langEnglish; |
| "dz_BT", // 83 verBhutan; 137 langDzongkha; |
| "hy_AM", // 84 verArmenian; 51 langArmenian; |
| "ka_GE", // 85 verGeorgian; 52 langGeorgian; |
| "es_419", // 86 verSpLatinAmerica; 6 langSpanish; # "es" |
| "es_ES", // 87 KCHR, Spain; 6 langSpanish; * one-way mapping |
| "to_TO", // 88 verTonga; 147 langTongan; |
| "pl_PL", // 89 KCHR, Poland; 25 langPolish; * one-way mapping |
| "ca_ES", // 90 KCHR, Catalonia; 130 langCatalan; * one-way mapping |
| "fr_001", // 91 verFrenchUniversal; 1 langFrench; |
| "de_AT", // 92 verAustria; 2 langGerman; |
| "es_419", // 93 > verSpLatinAmerica; 6 langSpanish; * one-way mapping # "es" |
| "gu_IN", // 94 verGujarati; 69 langGujarati; |
| "pa", // 95 verPunjabi; 70 langPunjabi; _IN or _PK? guess generic |
| "ur_IN", // 96 verIndiaUrdu; 20 langUrdu; |
| "vi_VN", // 97 verVietnam; 80 langVietnamese; |
| "fr_BE", // 98 verFrBelgium; 1 langFrench; |
| "uz_UZ", // 99 verUzbek; 47 langUzbek; |
| "en_SG", // 100 verSingapore; 0 langEnglish?; en, zh, or ms? guess en # "" |
| "nn_NO", // 101 verNynorsk; 151 langNynorsk; # "" |
| "af_ZA", // 102 verAfrikaans; 141 langAfrikaans; |
| "eo", // 103 verEsperanto; 94 langEsperanto; |
| "mr_IN", // 104 verMarathi; 66 langMarathi; |
| "bo", // 105 verTibetan; 63 langTibetan; |
| "ne_NP", // 106 verNepal; 64 langNepali; |
| "kl", // 107 verGreenland; 149 langGreenlandic; |
| "en_IE", // 108 verIrelandEnglish; 0 langEnglish; # (no entry) |
| }; |
| enum { |
| kNumRegionCodeToLocaleString = sizeof(regionCodeToLocaleString)/sizeof(char *) |
| }; |
| |
| static const char * const langCodeToLocaleString[] = { |
| // map LangCode (array index) to canonical locale string |
| // |
| // canon. string language code; [ comment] [ # __CFBundleLanguageAbbreviationsArray |
| // -------- -------------- ---------- -------- string, if different ] |
| "en", // 0 langEnglish; |
| "fr", // 1 langFrench; |
| "de", // 2 langGerman; |
| "it", // 3 langItalian; |
| "nl", // 4 langDutch; |
| "sv", // 5 langSwedish; |
| "es", // 6 langSpanish; |
| "da", // 7 langDanish; |
| "pt", // 8 langPortuguese; |
| "nb", // 9 langNorwegian (Bokmal); # "no" |
| "he", // 10 langHebrew -Hebr; |
| "ja", // 11 langJapanese -Jpan; |
| "ar", // 12 langArabic -Arab; |
| "fi", // 13 langFinnish; |
| "el", // 14 langGreek (modern)-Grek-mono; |
| "is", // 15 langIcelandic; |
| "mt", // 16 langMaltese -Latn; |
| "tr", // 17 langTurkish -Latn; |
| "hr", // 18 langCroatian; |
| "zh-Hant", // 19 langTradChinese; # "zh" |
| "ur", // 20 langUrdu -Arab; |
| "hi", // 21 langHindi -Deva; |
| "th", // 22 langThai -Thai; |
| "ko", // 23 langKorean -Hang; |
| "lt", // 24 langLithuanian; |
| "pl", // 25 langPolish; |
| "hu", // 26 langHungarian; |
| "et", // 27 langEstonian; |
| "lv", // 28 langLatvian; |
| "se", // 29 langSami; |
| "fo", // 30 langFaroese; |
| "fa", // 31 langFarsi/Persian -Arab; |
| "ru", // 32 langRussian -Cyrl; |
| "zh-Hans", // 33 langSimpChinese; # "zh" |
| "nl-BE", // 34 langFlemish (redundant, =Dutch); # "nl" |
| "ga", // 35 langIrishGaelic (no dots); |
| "sq", // 36 langAlbanian; no region codes |
| "ro", // 37 langRomanian; |
| "cs", // 38 langCzech; |
| "sk", // 39 langSlovak; |
| "sl", // 40 langSlovenian; |
| "yi", // 41 langYiddish -Hebr; no region codes |
| "sr", // 42 langSerbian -Cyrl; |
| "mk", // 43 langMacedonian -Cyrl; |
| "bg", // 44 langBulgarian -Cyrl; |
| "uk", // 45 langUkrainian -Cyrl; |
| "be", // 46 langBelorussian -Cyrl; |
| "uz", // 47 langUzbek -Cyrl; also -Latn, -Arab |
| "kk", // 48 langKazakh -Cyrl; no region codes; also -Latn, -Arab |
| "az-Cyrl", // 49 langAzerbaijani -Cyrl; no region codes # "az" |
| "az-Arab", // 50 langAzerbaijanAr -Arab; no region codes # "az" |
| "hy", // 51 langArmenian -Armn; |
| "ka", // 52 langGeorgian -Geor; |
| "mo", // 53 langMoldavian -Cyrl; no region codes |
| "ky", // 54 langKirghiz -Cyrl; no region codes; also -Latn, -Arab |
| "tg", // 55 langTajiki -Cyrl; no region codes; also -Latn, -Arab |
| "tk-Cyrl", // 56 langTurkmen -Cyrl; no region codes; also -Latn, -Arab |
| "mn-Mong", // 57 langMongolian -Mong; no region codes # "mn" |
| "mn", // 58 langMongolianCyr -Cyrl; no region codes # "mn" |
| "ps", // 59 langPashto -Arab; no region codes |
| "ku", // 60 langKurdish -Arab; no region codes |
| "ks", // 61 langKashmiri -Arab; no region codes |
| "sd", // 62 langSindhi -Arab; no region codes |
| "bo", // 63 langTibetan -Tibt; |
| "ne", // 64 langNepali -Deva; |
| "sa", // 65 langSanskrit -Deva; no region codes |
| "mr", // 66 langMarathi -Deva; |
| "bn", // 67 langBengali -Beng; |
| "as", // 68 langAssamese -Beng; no region codes |
| "gu", // 69 langGujarati -Gujr; |
| "pa", // 70 langPunjabi -Guru; |
| "or", // 71 langOriya -Orya; no region codes |
| "ml", // 72 langMalayalam -Mlym; no region codes |
| "kn", // 73 langKannada -Knda; no region codes |
| "ta", // 74 langTamil -Taml; no region codes |
| "te", // 75 langTelugu -Telu; no region codes |
| "si", // 76 langSinhalese -Sinh; no region codes |
| "my", // 77 langBurmese -Mymr; no region codes |
| "km", // 78 langKhmer -Khmr; no region codes |
| "lo", // 79 langLao -Laoo; no region codes |
| "vi", // 80 langVietnamese -Latn; |
| "id", // 81 langIndonesian -Latn; no region codes |
| "fil", // 82 langTagalog -Latn; no region codes |
| "ms", // 83 langMalayRoman -Latn; no region codes # "ms" |
| "ms-Arab", // 84 langMalayArabic -Arab; no region codes # "ms" |
| "am", // 85 langAmharic -Ethi; no region codes |
| "ti", // 86 langTigrinya -Ethi; no region codes |
| "om", // 87 langOromo -Ethi; no region codes |
| "so", // 88 langSomali -Latn; no region codes |
| "sw", // 89 langSwahili -Latn; no region codes |
| "rw", // 90 langKinyarwanda -Latn; no region codes |
| "rn", // 91 langRundi -Latn; no region codes |
| "ny", // 92 langNyanja/Chewa -Latn; no region codes # "" |
| "mg", // 93 langMalagasy -Latn; no region codes |
| "eo", // 94 langEsperanto -Latn; |
| NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, // 95 to 105 (gap) |
| NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, // 106 to 116 (gap) |
| NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, // 107 to 117 (gap) |
| "cy", // 128 langWelsh -Latn; |
| "eu", // 129 langBasque -Latn; no region codes |
| "ca", // 130 langCatalan -Latn; |
| "la", // 131 langLatin -Latn; no region codes |
| "qu", // 132 langQuechua -Latn; no region codes |
| "gn", // 133 langGuarani -Latn; no region codes |
| "ay", // 134 langAymara -Latn; no region codes |
| "tt-Cyrl", // 135 langTatar -Cyrl; no region codes |
| "ug", // 136 langUighur -Arab; no region codes |
| "dz", // 137 langDzongkha -Tibt; |
| "jv", // 138 langJavaneseRom -Latn; no region codes |
| "su", // 139 langSundaneseRom -Latn; no region codes |
| "gl", // 140 langGalician -Latn; no region codes |
| "af", // 141 langAfrikaans -Latn; |
| "br", // 142 langBreton -Latn; |
| "iu", // 143 langInuktitut -Cans; |
| "gd", // 144 langScottishGaelic; |
| "gv", // 145 langManxGaelic -Latn; |
| "ga-Latg", // 146 langIrishGaelicScript -Latn-dots; # "ga" // <xx> |
| "to", // 147 langTongan -Latn; |
| "grc", // 148 langGreekAncient -Grek-poly; # "el" |
| "kl", // 149 langGreenlandic -Latn; |
| "az", // 150 langAzerbaijanRoman -Latn; no region codes # "az" |
| "nn", // 151 langNynorsk -Latn; # (no entry) |
| }; |
| enum { |
| kNumLangCodeToLocaleString = sizeof(langCodeToLocaleString)/sizeof(char *) |
| }; |
| |
| static const KeyStringToResultString oldAppleLocaleToCanonical[] = { |
| // Map obsolete/old-style Apple strings to canonical |
| // Must be sorted according to how strcmp compares the strings in the first column |
| // |
| // non-canonical canonical [ comment ] # source/reason for non-canonical string |
| // string string |
| // ------------- --------- |
| { "Afrikaans", "af" }, // # __CFBundleLanguageNamesArray |
| { "Albanian", "sq" }, // # __CFBundleLanguageNamesArray |
| { "Amharic", "am" }, // # __CFBundleLanguageNamesArray |
| { "Arabic", "ar" }, // # __CFBundleLanguageNamesArray |
| { "Armenian", "hy" }, // # __CFBundleLanguageNamesArray |
| { "Assamese", "as" }, // # __CFBundleLanguageNamesArray |
| { "Aymara", "ay" }, // # __CFBundleLanguageNamesArray |
| { "Azerbaijani", "az" }, // -Arab,-Cyrl,-Latn? # __CFBundleLanguageNamesArray (had 3 entries "Azerbaijani" for "az-Arab", "az-Cyrl", "az-Latn") |
| { "Basque", "eu" }, // # __CFBundleLanguageNamesArray |
| { "Belarusian", "be" }, // # handle other names |
| { "Belorussian", "be" }, // # handle other names |
| { "Bengali", "bn" }, // # __CFBundleLanguageNamesArray |
| { "Brazilian Portugese", "pt-BR" }, // # from Installer.app Info.plist IFLanguages key, misspelled |
| { "Brazilian Portuguese", "pt-BR" }, // # correct spelling for above |
| { "Breton", "br" }, // # __CFBundleLanguageNamesArray |
| { "Bulgarian", "bg" }, // # __CFBundleLanguageNamesArray |
| { "Burmese", "my" }, // # __CFBundleLanguageNamesArray |
| { "Byelorussian", "be" }, // # __CFBundleLanguageNamesArray |
| { "Catalan", "ca" }, // # __CFBundleLanguageNamesArray |
| { "Chewa", "ny" }, // # handle other names |
| { "Chichewa", "ny" }, // # handle other names |
| { "Chinese", "zh" }, // -Hans,-Hant? # __CFBundleLanguageNamesArray (had 2 entries "Chinese" for "zh-Hant", "zh-Hans") |
| { "Chinese, Simplified", "zh-Hans" }, // # from Installer.app Info.plist IFLanguages key |
| { "Chinese, Traditional", "zh-Hant" }, // # correct spelling for below |
| { "Chinese, Tradtional", "zh-Hant" }, // # from Installer.app Info.plist IFLanguages key, misspelled |
| { "Croatian", "hr" }, // # __CFBundleLanguageNamesArray |
| { "Czech", "cs" }, // # __CFBundleLanguageNamesArray |
| { "Danish", "da" }, // # __CFBundleLanguageNamesArray |
| { "Dutch", "nl" }, // # __CFBundleLanguageNamesArray (had 2 entries "Dutch" for "nl", "nl-BE") |
| { "Dzongkha", "dz" }, // # __CFBundleLanguageNamesArray |
| { "English", "en" }, // # __CFBundleLanguageNamesArray |
| { "Esperanto", "eo" }, // # __CFBundleLanguageNamesArray |
| { "Estonian", "et" }, // # __CFBundleLanguageNamesArray |
| { "Faroese", "fo" }, // # __CFBundleLanguageNamesArray |
| { "Farsi", "fa" }, // # __CFBundleLanguageNamesArray |
| { "Finnish", "fi" }, // # __CFBundleLanguageNamesArray |
| { "Flemish", "nl-BE" }, // # handle other names |
| { "French", "fr" }, // # __CFBundleLanguageNamesArray |
| { "Galician", "gl" }, // # __CFBundleLanguageNamesArray |
| { "Gallegan", "gl" }, // # handle other names |
| { "Georgian", "ka" }, // # __CFBundleLanguageNamesArray |
| { "German", "de" }, // # __CFBundleLanguageNamesArray |
| { "Greek", "el" }, // # __CFBundleLanguageNamesArray (had 2 entries "Greek" for "el", "grc") |
| { "Greenlandic", "kl" }, // # __CFBundleLanguageNamesArray |
| { "Guarani", "gn" }, // # __CFBundleLanguageNamesArray |
| { "Gujarati", "gu" }, // # __CFBundleLanguageNamesArray |
| { "Hawaiian", "haw" }, // # handle new languages |
| { "Hebrew", "he" }, // # __CFBundleLanguageNamesArray |
| { "Hindi", "hi" }, // # __CFBundleLanguageNamesArray |
| { "Hungarian", "hu" }, // # __CFBundleLanguageNamesArray |
| { "Icelandic", "is" }, // # __CFBundleLanguageNamesArray |
| { "Indonesian", "id" }, // # __CFBundleLanguageNamesArray |
| { "Inuktitut", "iu" }, // # __CFBundleLanguageNamesArray |
| { "Irish", "ga" }, // # __CFBundleLanguageNamesArray (had 2 entries "Irish" for "ga", "ga-dots") |
| { "Italian", "it" }, // # __CFBundleLanguageNamesArray |
| { "Japanese", "ja" }, // # __CFBundleLanguageNamesArray |
| { "Javanese", "jv" }, // # __CFBundleLanguageNamesArray |
| { "Kalaallisut", "kl" }, // # handle other names |
| { "Kannada", "kn" }, // # __CFBundleLanguageNamesArray |
| { "Kashmiri", "ks" }, // # __CFBundleLanguageNamesArray |
| { "Kazakh", "kk" }, // # __CFBundleLanguageNamesArray |
| { "Khmer", "km" }, // # __CFBundleLanguageNamesArray |
| { "Kinyarwanda", "rw" }, // # __CFBundleLanguageNamesArray |
| { "Kirghiz", "ky" }, // # __CFBundleLanguageNamesArray |
| { "Korean", "ko" }, // # __CFBundleLanguageNamesArray |
| { "Kurdish", "ku" }, // # __CFBundleLanguageNamesArray |
| { "Lao", "lo" }, // # __CFBundleLanguageNamesArray |
| { "Latin", "la" }, // # __CFBundleLanguageNamesArray |
| { "Latvian", "lv" }, // # __CFBundleLanguageNamesArray |
| { "Lithuanian", "lt" }, // # __CFBundleLanguageNamesArray |
| { "Macedonian", "mk" }, // # __CFBundleLanguageNamesArray |
| { "Malagasy", "mg" }, // # __CFBundleLanguageNamesArray |
| { "Malay", "ms" }, // -Latn,-Arab? # __CFBundleLanguageNamesArray (had 2 entries "Malay" for "ms-Latn", "ms-Arab") |
| { "Malayalam", "ml" }, // # __CFBundleLanguageNamesArray |
| { "Maltese", "mt" }, // # __CFBundleLanguageNamesArray |
| { "Manx", "gv" }, // # __CFBundleLanguageNamesArray |
| { "Marathi", "mr" }, // # __CFBundleLanguageNamesArray |
| { "Moldavian", "mo" }, // # __CFBundleLanguageNamesArray |
| { "Mongolian", "mn" }, // -Mong,-Cyrl? # __CFBundleLanguageNamesArray (had 2 entries "Mongolian" for "mn-Mong", "mn-Cyrl") |
| { "Nepali", "ne" }, // # __CFBundleLanguageNamesArray |
| { "Norwegian", "nb" }, // # __CFBundleLanguageNamesArray (had "Norwegian" mapping to "no") |
| { "Nyanja", "ny" }, // # __CFBundleLanguageNamesArray |
| { "Nynorsk", "nn" }, // # handle other names (no entry in __CFBundleLanguageNamesArray) |
| { "Oriya", "or" }, // # __CFBundleLanguageNamesArray |
| { "Oromo", "om" }, // # __CFBundleLanguageNamesArray |
| { "Panjabi", "pa" }, // # handle other names |
| { "Pashto", "ps" }, // # __CFBundleLanguageNamesArray |
| { "Persian", "fa" }, // # handle other names |
| { "Polish", "pl" }, // # __CFBundleLanguageNamesArray |
| { "Portuguese", "pt" }, // # __CFBundleLanguageNamesArray |
| { "Portuguese, Brazilian", "pt-BR" }, // # handle other names |
| { "Punjabi", "pa" }, // # __CFBundleLanguageNamesArray |
| { "Pushto", "ps" }, // # handle other names |
| { "Quechua", "qu" }, // # __CFBundleLanguageNamesArray |
| { "Romanian", "ro" }, // # __CFBundleLanguageNamesArray |
| { "Ruanda", "rw" }, // # handle other names |
| { "Rundi", "rn" }, // # __CFBundleLanguageNamesArray |
| { "Russian", "ru" }, // # __CFBundleLanguageNamesArray |
| { "Sami", "se" }, // # __CFBundleLanguageNamesArray |
| { "Sanskrit", "sa" }, // # __CFBundleLanguageNamesArray |
| { "Scottish", "gd" }, // # __CFBundleLanguageNamesArray |
| { "Serbian", "sr" }, // # __CFBundleLanguageNamesArray |
| { "Simplified Chinese", "zh-Hans" }, // # handle other names |
| { "Sindhi", "sd" }, // # __CFBundleLanguageNamesArray |
| { "Sinhalese", "si" }, // # __CFBundleLanguageNamesArray |
| { "Slovak", "sk" }, // # __CFBundleLanguageNamesArray |
| { "Slovenian", "sl" }, // # __CFBundleLanguageNamesArray |
| { "Somali", "so" }, // # __CFBundleLanguageNamesArray |
| { "Spanish", "es" }, // # __CFBundleLanguageNamesArray |
| { "Sundanese", "su" }, // # __CFBundleLanguageNamesArray |
| { "Swahili", "sw" }, // # __CFBundleLanguageNamesArray |
| { "Swedish", "sv" }, // # __CFBundleLanguageNamesArray |
| { "Tagalog", "fil" }, // # __CFBundleLanguageNamesArray |
| { "Tajik", "tg" }, // # handle other names |
| { "Tajiki", "tg" }, // # __CFBundleLanguageNamesArray |
| { "Tamil", "ta" }, // # __CFBundleLanguageNamesArray |
| { "Tatar", "tt" }, // # __CFBundleLanguageNamesArray |
| { "Telugu", "te" }, // # __CFBundleLanguageNamesArray |
| { "Thai", "th" }, // # __CFBundleLanguageNamesArray |
| { "Tibetan", "bo" }, // # __CFBundleLanguageNamesArray |
| { "Tigrinya", "ti" }, // # __CFBundleLanguageNamesArray |
| { "Tongan", "to" }, // # __CFBundleLanguageNamesArray |
| { "Traditional Chinese", "zh-Hant" }, // # handle other names |
| { "Turkish", "tr" }, // # __CFBundleLanguageNamesArray |
| { "Turkmen", "tk" }, // # __CFBundleLanguageNamesArray |
| { "Uighur", "ug" }, // # __CFBundleLanguageNamesArray |
| { "Ukrainian", "uk" }, // # __CFBundleLanguageNamesArray |
| { "Urdu", "ur" }, // # __CFBundleLanguageNamesArray |
| { "Uzbek", "uz" }, // # __CFBundleLanguageNamesArray |
| { "Vietnamese", "vi" }, // # __CFBundleLanguageNamesArray |
| { "Welsh", "cy" }, // # __CFBundleLanguageNamesArray |
| { "Yiddish", "yi" }, // # __CFBundleLanguageNamesArray |
| { "ar_??", "ar" }, // # from old MapScriptInfoAndISOCodes |
| { "az.Ar", "az-Arab" }, // # from old LocaleRefGetPartString |
| { "az.Cy", "az-Cyrl" }, // # from old LocaleRefGetPartString |
| { "az.La", "az" }, // # from old LocaleRefGetPartString |
| { "be_??", "be_BY" }, // # from old MapScriptInfoAndISOCodes |
| { "bn_??", "bn" }, // # from old LocaleRefGetPartString |
| { "bo_??", "bo" }, // # from old MapScriptInfoAndISOCodes |
| { "br_??", "br" }, // # from old MapScriptInfoAndISOCodes |
| { "cy_??", "cy" }, // # from old MapScriptInfoAndISOCodes |
| { "de-96", "de-1996" }, // # from old MapScriptInfoAndISOCodes // <1.9> |
| { "de_96", "de-1996" }, // # from old MapScriptInfoAndISOCodes // <1.9> |
| { "de_??", "de-1996" }, // # from old MapScriptInfoAndISOCodes |
| { "el.El-P", "grc" }, // # from old LocaleRefGetPartString |
| { "en-ascii", "en_001" }, // # from earlier version of tables in this file! |
| { "en_??", "en_001" }, // # from old MapScriptInfoAndISOCodes |
| { "eo_??", "eo" }, // # from old MapScriptInfoAndISOCodes |
| { "es_??", "es_419" }, // # from old MapScriptInfoAndISOCodes |
| { "es_XL", "es_419" }, // # from earlier version of tables in this file! |
| { "fr_??", "fr_001" }, // # from old MapScriptInfoAndISOCodes |
| { "ga-dots", "ga-Latg" }, // # from earlier version of tables in this file! // <1.8> |
| { "ga-dots_IE", "ga-Latg_IE" }, // # from earlier version of tables in this file! // <1.8> |
| { "ga.Lg", "ga-Latg" }, // # from old LocaleRefGetPartString // <1.8> |
| { "ga.Lg_IE", "ga-Latg_IE" }, // # from old LocaleRefGetPartString // <1.8> |
| { "gd_??", "gd" }, // # from old MapScriptInfoAndISOCodes |
| { "gv_??", "gv" }, // # from old MapScriptInfoAndISOCodes |
| { "jv.La", "jv" }, // # logical extension // <1.9> |
| { "jw.La", "jv" }, // # from old LocaleRefGetPartString |
| { "kk.Cy", "kk" }, // # from old LocaleRefGetPartString |
| { "kl.La", "kl" }, // # from old LocaleRefGetPartString |
| { "kl.La_GL", "kl_GL" }, // # from old LocaleRefGetPartString // <1.9> |
| { "lp_??", "se" }, // # from old MapScriptInfoAndISOCodes |
| { "mk_??", "mk_MK" }, // # from old MapScriptInfoAndISOCodes |
| { "mn.Cy", "mn" }, // # from old LocaleRefGetPartString |
| { "mn.Mn", "mn-Mong" }, // # from old LocaleRefGetPartString |
| { "ms.Ar", "ms-Arab" }, // # from old LocaleRefGetPartString |
| { "ms.La", "ms" }, // # from old LocaleRefGetPartString |
| { "nl-be", "nl-BE" }, // # from old LocaleRefGetPartString |
| { "nl-be_BE", "nl_BE" }, // # from old LocaleRefGetPartString |
| { "no-NO", "nb-NO" }, // # not handled by localeStringPrefixToCanonical |
| { "no-NO_NO", "nb-NO_NO" }, // # not handled by localeStringPrefixToCanonical |
| // { "no-bok_NO", "nb_NO" }, // # from old LocaleRefGetPartString - handled by localeStringPrefixToCanonical |
| // { "no-nyn_NO", "nn_NO" }, // # from old LocaleRefGetPartString - handled by localeStringPrefixToCanonical |
| // { "nya", "ny" }, // # from old LocaleRefGetPartString - handled by localeStringPrefixToCanonical |
| { "pa_??", "pa" }, // # from old LocaleRefGetPartString |
| { "sa.Dv", "sa" }, // # from old LocaleRefGetPartString |
| { "sl_??", "sl_SI" }, // # from old MapScriptInfoAndISOCodes |
| { "sr_??", "sr_RS" }, // # from old MapScriptInfoAndISOCodes // <1.18> |
| { "su.La", "su" }, // # from old LocaleRefGetPartString |
| { "yi.He", "yi" }, // # from old LocaleRefGetPartString |
| { "zh-simp", "zh-Hans" }, // # from earlier version of tables in this file! |
| { "zh-trad", "zh-Hant" }, // # from earlier version of tables in this file! |
| { "zh.Ha-S", "zh-Hans" }, // # from old LocaleRefGetPartString |
| { "zh.Ha-S_CN", "zh_CN" }, // # from old LocaleRefGetPartString |
| { "zh.Ha-T", "zh-Hant" }, // # from old LocaleRefGetPartString |
| { "zh.Ha-T_TW", "zh_TW" }, // # from old LocaleRefGetPartString |
| }; |
| enum { |
| kNumOldAppleLocaleToCanonical = sizeof(oldAppleLocaleToCanonical)/sizeof(KeyStringToResultString) |
| }; |
| |
| static const KeyStringToResultString localeStringPrefixToCanonical[] = { |
| // Map 3-letter & obsolete ISO 639 codes, plus obsolete RFC 3066 codes, to 2-letter ISO 639 code. |
| // (special cases for 'sh' handled separately) |
| // First column must be all lowercase; must be sorted according to how strcmp compares the strings in the first column. |
| // |
| // non-canonical canonical [ comment ] # source/reason for non-canonical string |
| // prefix prefix |
| // ------------- --------- |
| |
| { "aar", "aa" }, // Afar |
| // { "aa_SAAHO", "ssy" }, // Saho # deprecated/grandfathered, handled as a special case |
| { "abk", "ab" }, // Abkhazian |
| { "afr", "af" }, // Afrikaans |
| { "aju", "jrb" }, // Moroccan Judeo-Arabic -> Judeo-Arabic (macrolang.) |
| { "aka", "ak" }, // Akan |
| { "alb", "sq" }, // Albanian |
| { "als", "sq" }, // Tosk Albanian -> Albanian (macrolang.) |
| { "amh", "am" }, // Amharic |
| { "ara", "ar" }, // Arabic |
| { "arb", "ar" }, // Std Arabic -> Arabic (macrolang.) |
| { "arg", "an" }, // Aragonese |
| { "arm", "hy" }, // Armenian |
| { "art-lojban", "jbo" }, // Lojban # deprecated/grandfathered |
| { "asm", "as" }, // Assamese |
| { "ava", "av" }, // Avaric |
| { "ave", "ae" }, // Avestan |
| { "aym", "ay" }, // Aymara |
| { "ayr", "ay" }, // Central Aymara -> Aymara (macrolang.) |
| { "aze", "az" }, // Azerbaijani |
| { "azj", "az" }, // N.Azerbaijani -> Azerbaijani (macrolang.) |
| { "bak", "ba" }, // Bashkir |
| { "bam", "bm" }, // Bambara |
| { "baq", "eu" }, // Basque |
| { "bcc", "bal" }, // Balochi, Southern -> Baluchi (macrolang.) |
| { "bcl", "bik" }, // Bicolano, Central -> Bikol (macrolang.) |
| { "bel", "be" }, // Belarusian |
| { "ben", "bn" }, // Bengali |
| { "bih", "bh" }, // Bihari |
| { "bis", "bi" }, // Bislama |
| { "bod", "bo" }, // Tibetan |
| { "bos", "bs" }, // Bosnian |
| { "bre", "br" }, // Breton |
| { "bul", "bg" }, // Bulgarian |
| { "bur", "my" }, // Burmese |
| { "bxk", "luy" }, // Lubukusu -> Luyia (macrolang.) |
| { "bxr", "bua" }, // Buriat, Russia -> Buriat (macrolang.) |
| { "cat", "ca" }, // Catalan |
| { "ces", "cs" }, // Czech |
| { "cha", "ch" }, // Chamorro |
| { "che", "ce" }, // Chechen |
| { "chi", "zh" }, // Chinese |
| { "chu", "cu" }, // Church Slavic, Church Slavonic, Old Bulgarian, Old Church Slavonic, Old Slavonic |
| { "chv", "cv" }, // Chuvash |
| { "cld", "syr" }, // Chaldean Neo-Aramaic -> Syriac (macrolang.) |
| { "cmn", "zh" }, // Mandarin -> Chinese (macrolang.) |
| { "cor", "kw" }, // Cornish |
| { "cos", "co" }, // Corsican |
| { "cre", "cr" }, // Cree |
| { "cwd", "cr" }, // Cree, Woods -> Cree (macrolang.) |
| { "cym", "cy" }, // Welsh |
| { "cze", "cs" }, // Czech |
| { "dan", "da" }, // Danish |
| { "deu", "de" }, // German |
| { "dgo", "doi" }, // Dogri -> Dogri (macrolang.) |
| { "dhd", "mwr" }, // Dhundari -> Marwari (macrolang.) |
| { "dik", "din" }, // Southwestern Dinka -> Dinka (macrolang.) |
| { "diq", "zza" }, // Dimli -> Zaza (macrolang.) |
| { "div", "dv" }, // Dhivehi, Divehi, Maldivian |
| { "dut", "nl" }, // Dutch |
| { "dzo", "dz" }, // Dzongkha |
| { "ekk", "et" }, // Std Estonian -> Estonian (macrolang.) |
| { "ell", "el" }, // Greek, Modern (1453-) |
| { "emk", "man" }, // Maninkakan, Eastern -> Mandingo (macrolang.) |
| { "eng", "en" }, // English |
| { "epo", "eo" }, // Esperanto |
| { "esk", "ik" }, // Northwest Alaska Inupiatun -> Inupiaq (macrolang.) |
| { "est", "et" }, // Estonian |
| { "eus", "eu" }, // Basque |
| { "ewe", "ee" }, // Ewe |
| { "fao", "fo" }, // Faroese |
| { "fas", "fa" }, // Persian |
| { "fat", "ak" }, // Fanti -> Akan (macrolang.) |
| { "fij", "fj" }, // Fijian |
| { "fin", "fi" }, // Finnish |
| { "fra", "fr" }, // French |
| { "fre", "fr" }, // French |
| { "fry", "fy" }, // Western Frisian |
| { "fuc", "ff" }, // Pular -> Fulah (macrolang.) |
| { "ful", "ff" }, // Fulah |
| { "gaz", "om" }, // W.Central Oromo -> Oromo (macrolang.) |
| { "gbo", "grb" }, // Northern Grebo -> Grebo (macrolang.) |
| { "geo", "ka" }, // Georgian |
| { "ger", "de" }, // German |
| { "gla", "gd" }, // Gaelic,Scottish |
| { "gle", "ga" }, // Irish |
| { "glg", "gl" }, // Gallegan |
| { "glv", "gv" }, // Manx |
| { "gno", "gon" }, // Northern Gondi -> Gondi (macrolang.) |
| { "gre", "el" }, // Greek, Modern (1453-) |
| { "grn", "gn" }, // Guarani |
| { "gug", "gn" }, // Paraguayan Guarani -> Guarani (macrolang.) |
| { "guj", "gu" }, // Gujarati |
| { "gya", "gba" }, // Northwest Gbaya -> Gbaya (Cent. Afr. Rep.) (macrolang.) |
| { "hat", "ht" }, // Haitian, Haitian Creole |
| { "hau", "ha" }, // Hausa |
| { "hbs", "sr_Latn" }, // Serbo-Croatian |
| { "hdn", "hai" }, // Northern Haida -> Haida (macrolang.) |
| { "hea", "hmn" }, // Northern Qiandong Miao -> Hmong (macrolang.) |
| { "heb", "he" }, // Hebrew |
| { "her", "hz" }, // Herero |
| { "him", "srx" }, // Himachali -> Sirmauri (= Pahari, Himachali) (macrolang.) |
| { "hin", "hi" }, // Hindi |
| { "hmo", "ho" }, // Hiri Motu |
| { "hrv", "hr" }, // Croatian |
| { "hun", "hu" }, // Hungarian |
| { "hye", "hy" }, // Armenian |
| { "i-ami", "ami" }, // Amis # deprecated/grandfathered |
| { "i-bnn", "bnn" }, // Bunun # deprecated/grandfathered |
| { "i-hak", "hak" }, // Hakka # deprecated RFC 3066 |
| { "i-klingon", "tlh" }, // Klingon # deprecated/grandfathered |
| { "i-lux", "lb" }, // Luxembourgish # deprecated RFC 3066 |
| { "i-navajo", "nv" }, // Navajo # deprecated RFC 3066 |
| { "i-pwn", "pwn" }, // Paiwan # deprecated/grandfathered |
| { "i-tao", "tao" }, // Tao # deprecated/grandfathered |
| { "i-tay", "tay" }, // Tayal # deprecated/grandfathered |
| { "i-tsu", "tsu" }, // Tsou # deprecated/grandfathered |
| { "ibo", "ig" }, // Igbo |
| { "ice", "is" }, // Icelandic |
| { "ido", "io" }, // Ido |
| { "iii", "ii" }, // Sichuan Yi, Nuosu |
| { "ike", "iu" }, // E.Canada Inuktitut -> Inuktitut (macrolang.) |
| { "iku", "iu" }, // Inuktitut |
| { "ile", "ie" }, // Interlingue |
| { "in", "id" }, // Indonesian # deprecated 639 code in -> id (1989) |
| { "ina", "ia" }, // Interlingua |
| { "ind", "id" }, // Indonesian |
| { "ipk", "ik" }, // Inupiaq |
| { "isl", "is" }, // Icelandic |
| { "ita", "it" }, // Italian |
| { "iw", "he" }, // Hebrew # deprecated 639 code iw -> he (1989) |
| { "jav", "jv" }, // Javanese |
| { "jaw", "jv" }, // Javanese # deprecated 639 code jaw -> jv (2001) |
| { "ji", "yi" }, // Yiddish # deprecated 639 code ji -> yi (1989) |
| { "jpn", "ja" }, // Japanese |
| { "jw", "jv" }, // Javanese # deprecated |
| { "kal", "kl" }, // Kalaallisut |
| { "kan", "kn" }, // Kannada |
| { "kas", "ks" }, // Kashmiri |
| { "kat", "ka" }, // Georgian |
| { "kau", "kr" }, // Kanuri |
| { "kaz", "kk" }, // Kazakh |
| { "khk", "mn" }, // Halh Mongolian [mainly Cyrl] -> Mongolian (macrolang.) |
| { "khm", "km" }, // Khmer |
| { "kik", "ki" }, // Kikuyu, Gikuyu |
| { "kin", "rw" }, // Kinyarwanda |
| { "kir", "ky" }, // Kirghiz |
| { "kmr", "ku" }, // Northern Kurdish -> Kurdish (macrolang.) |
| { "knc", "kr" }, // Central Kanuri -> Kanuri (macrolang.) |
| { "kng", "kg" }, // Koongo -> Kongo (macrolang.) |
| { "knn", "kok" }, // Konkani (individ.lang) -> Konkani (macrolang.) |
| { "kom", "kv" }, // Komi |
| { "kon", "kg" }, // Kongo |
| { "kor", "ko" }, // Korean |
| { "kpv", "kv" }, // Komi-Zyrian -> Komi (macrolang.) |
| { "kua", "kj" }, // Kuanyama, Kwanyama |
| { "kur", "ku" }, // Kurdish |
| { "lao", "lo" }, // Lao |
| { "lat", "la" }, // Latin |
| { "lav", "lv" }, // Latvian |
| { "lbk", "bnc" }, // Central Bontok -> Bontok (macrolang.) |
| { "lim", "li" }, // Limburgan, Limburger, Limburgish |
| { "lin", "ln" }, // Lingala |
| { "lit", "lt" }, // Lithuanian |
| { "ltz", "lb" }, // Letzeburgesch |
| { "lub", "lu" }, // Luba-Katanga |
| { "lug", "lg" }, // Ganda |
| { "lvs", "lv" }, // Std Latvian -> Latvian (macrolang.) |
| { "mac", "mk" }, // Macedonian |
| { "mal", "ml" }, // Malayalam |
| { "mar", "mr" }, // Marathi |
| { "may", "ms" }, // Malay |
| { "mhr", "chm" }, // Mari, Eastern -> Mari (Russia) (macrolang.) |
| { "mkd", "mk" }, // Macedonian |
| { "mlg", "mg" }, // Malagasy |
| { "mlt", "mt" }, // Maltese |
| { "mol", "mo" }, // Moldavian |
| { "mon", "mn" }, // Mongolian |
| { "msa", "ms" }, // Malay |
| { "mup", "raj" }, // Malvi -> Rajasthani (macrolang.) |
| { "mya", "my" }, // Burmese |
| { "nau", "na" }, // Nauru |
| { "nav", "nv" }, // Navajo, Navaho |
| { "nbl", "nr" }, // South Ndebele |
| { "nde", "nd" }, // North Ndebele |
| { "ndo", "ng" }, // Ndonga |
| { "nep", "ne" }, // Nepali |
| { "nld", "nl" }, // Dutch |
| { "nno", "nn" }, // Norwegian Nynorsk |
| { "no", "nb" }, // Norwegian generic # ambiguous 639 code no -> nb |
| { "no-bok", "nb" }, // Norwegian Bokmal # deprecated RFC 3066 tag - used in old LocaleRefGetPartString |
| { "no-nyn", "nn" }, // Norwegian Nynorsk # deprecated RFC 3066 tag - used in old LocaleRefGetPartString |
| { "nob", "nb" }, // Norwegian Bokmal |
| { "nor", "nb" }, // Norwegian generic # ambiguous 639 code nor -> nb |
| // { "no_BOKMAL", "nb" }, // Norwegian Bokmal # deprecated/grandfathered, handled as a special case |
| // { "no_NYNORSK", "nn" }, // Norwegian Nynorsk # deprecated/grandfathered, handled as a special case |
| { "nya", "ny" }, // Nyanja/Chewa/Chichewa # 3-letter code used in old LocaleRefGetPartString |
| { "oci", "oc" }, // Occitan/Provencal |
| { "ojg", "oj" }, // Ojibwa, Eastern -> Ojibwa (macrolang.) |
| { "oji", "oj" }, // Ojibwa |
| { "ori", "or" }, // Oriya |
| { "orm", "om" }, // Oromo,Galla |
| { "oss", "os" }, // Ossetian, Ossetic |
| { "pan", "pa" }, // Panjabi |
| { "pbu", "ps" }, // N.Pashto, -> Pushto (macrolang.) |
| { "per", "fa" }, // Persian |
| { "pes", "fa" }, // W.Farsi -> Persian (macrolang.) |
| { "pli", "pi" }, // Pali |
| { "plt", "mg" }, // Plateau Malagasy -> Malagasy (macrolang.) |
| { "pnb", "lah" }, // W.Panjabi -> Lahnda (macrolang.) |
| { "pol", "pl" }, // Polish |
| { "por", "pt" }, // Portuguese |
| { "pus", "ps" }, // Pushto |
| { "que", "qu" }, // Quechua |
| { "qxp", "qu" }, // Puno Quechua -> Quechua (macrolang.) |
| { "rmy", "rom" }, // Vlax Romani -> Romany (macrolang.) |
| { "roh", "rm" }, // Raeto-Romance |
| { "ron", "ro" }, // Romanian |
| { "rum", "ro" }, // Romanian |
| { "run", "rn" }, // Rundi |
| { "rus", "ru" }, // Russian |
| { "sag", "sg" }, // Sango |
| { "san", "sa" }, // Sanskrit |
| { "scc", "sr" }, // Serbian |
| { "scr", "hr" }, // Croatian |
| { "sgn-be-fr", "sfb" }, // Belgian-French Sign Lang. # deprecated/grandfathered |
| { "sgn-be-nl", "vgt" }, // Belgian-Flemish Sign Lang. # deprecated/grandfathered |
| { "sgn-ch-de", "sgg" }, // Swiss German Sign Lang. # deprecated/grandfathered |
| { "sin", "si" }, // Sinhalese |
| { "slk", "sk" }, // Slovak |
| { "slo", "sk" }, // Slovak |
| { "slv", "sl" }, // Slovenian |
| { "sme", "se" }, // Sami,Northern |
| { "smo", "sm" }, // Samoan |
| { "sna", "sn" }, // Shona |
| { "snd", "sd" }, // Sindhi |
| { "som", "so" }, // Somali |
| { "sot", "st" }, // Southern Sotho |
| { "spa", "es" }, // Spanish |
| { "spy", "kln" }, // Sabaot -> Kalenjin (macrolang.) |
| { "sqi", "sq" }, // Albanian |
| { "src", "sc" }, // Sardinian, Logudorese -> Sardinian (macrolang.) |
| { "srd", "sc" }, // Sardinian |
| { "srp", "sr" }, // Serbian |
| { "ssw", "ss" }, // Swati |
| { "sun", "su" }, // Sundanese |
| { "swa", "sw" }, // Swahili |
| { "swe", "sv" }, // Swedish |
| { "swh", "sw" }, // Swahili (individ.lang) -> Swahili (macrolang.) |
| { "tah", "ty" }, // Tahitian |
| { "tam", "ta" }, // Tamil |
| { "tat", "tt" }, // Tatar |
| { "tel", "te" }, // Telugu |
| { "tgk", "tg" }, // Tajik |
| { "tgl", "fil" }, // Tagalog |
| { "tha", "th" }, // Thai |
| { "tib", "bo" }, // Tibetan |
| { "tir", "ti" }, // Tigrinya |
| { "tl", "fil" }, // Tagalog # legacy |
| { "ton", "to" }, // Tongan |
| { "tsn", "tn" }, // Tswana |
| { "tso", "ts" }, // Tsonga |
| { "ttq", "tmh" }, // Tamajaq, Tawallammat -> Tamashek (macrolang.) |
| { "tuk", "tk" }, // Turkmen |
| { "tur", "tr" }, // Turkish |
| { "tw", "ak" }, // Twi -> Akan (macrolang.) |
| { "twi", "ak" }, // Twi |
| { "uig", "ug" }, // Uighur |
| { "ukr", "uk" }, // Ukrainian |
| { "umu", "del" }, // Munsee -> Delaware (macrolang.) |
| { "urd", "ur" }, // Urdu |
| { "uzb", "uz" }, // Uzbek |
| { "uzn", "uz" }, // N. Uzbek -> Uzbek (macrolang.) |
| { "ven", "ve" }, // Venda |
| { "vie", "vi" }, // Vietnamese |
| { "vol", "vo" }, // Volapük |
| { "wel", "cy" }, // Welsh |
| { "wln", "wa" }, // Walloon |
| { "wol", "wo" }, // Wolof |
| { "xho", "xh" }, // Xhosa |
| { "xpe", "kpe" }, // Kpelle, Liberia -> Kpelle (macrolang.) |
| { "xsl", "den" }, // Slavey, South -> Slave (Athapascan) (macrolang.) |
| { "ydd", "yi" }, // Yiddish,E. -> Yiddish (macrolang.) |
| { "yid", "yi" }, // Yiddish |
| { "yor", "yo" }, // Yoruba |
| { "zai", "zap" }, // Zapotec, Isthmus -> Zapotec (macrolang.) |
| { "zh-cdo", "cdo" }, // Chinese, Min Dong # extlang |
| { "zh-cjy", "cjy" }, // Chinese, Jinyu # extlang |
| { "zh-cmn", "zh" }, // Chinese, Mandarin # extlang |
| { "zh-cpx", "cpx" }, // Chinese, Pu-Xian # extlang |
| { "zh-czh", "czh" }, // Chinese, Huizhou # extlang |
| { "zh-czo", "czo" }, // Chinese, Min Zhong # extlang |
| { "zh-gan", "gan" }, // Chinese, Gan # extlang |
| { "zh-guoyu", "zh" }, // Mandarin/Std Chinese # deprecated |
| { "zh-hak", "hak" }, // Chinese, Hakka # extlang |
| { "zh-hakka", "hak" }, // Hakka # deprecated |
| { "zh-hsn", "hsn" }, // Chinese, Xiang # extlang |
| { "zh-min-nan", "nan" }, // Minnan,Hokkien,Taiwanese,So. Fujian # deprecated |
| { "zh-mnp", "mnp" }, // Chinese, Min Bei # extlang |
| { "zh-nan", "nan" }, // Chinese, Min Nan # extlang |
| { "zh-wuu", "wuu" }, // Chinese, Wu # extlang |
| { "zh-xiang", "hsn" }, // Xiang/Hunanese # deprecated |
| { "zh-yue", "yue" }, // Chinese, Yue # extlang |
| { "zha", "za" }, // Zhuang, Chuang |
| { "zho", "zh" }, // Chinese |
| { "zsm", "ms" }, // Std Malay -> Malay (macrolang.) |
| { "zul", "zu" }, // Zulu |
| { "zyb", "za" }, // Yongbei Zhuang -> Zhuang (macrolang.) |
| }; |
| enum { |
| kNumLocaleStringPrefixToCanonical = sizeof(localeStringPrefixToCanonical)/sizeof(KeyStringToResultString) |
| }; |
| |
| |
| static const SpecialCaseUpdates specialCases[] = { |
| // Data for special cases |
| // a) The 3166 code CS was used for Czechoslovakia until 1993, when that country split and the code was |
| // replaced by CZ and SK. Then in 2003-07, the code YU (formerly designating all of Yugoslavia, then after |
| // the 1990s breakup just designating what is now Serbia and Montenegro) was changed to CS! Then after |
| // Serbia and Montenegro split, the code CS was replaced in 2006-09 with separate codes RS and ME. If we |
| // see CS but a language of cs or sk, we change CS to CZ or SK. Otherwise, we change CS (and old YU) to RS. |
| // b) The 639 code sh for Serbo-Croatian was also replaced in the 1990s by separate codes hr and sr, and |
| // deprecated in 2000. We guess which one to map it to as follows: If there is a region tag of HR we use |
| // hr; if there is a region tag of (now) RS we use sr; else we do not change it (not enough info). |
| // c) There are other codes that have been updated without these issues (eg. TP to TL), plus among the |
| // "exceptionally reserved" codes some are just alternates for standard codes (eg. UK for GB). |
| { NULL, "-UK", "GB", NULL, NULL }, // always change UK to GB (UK is "exceptionally reserved" to mean GB) |
| { NULL, "-TP", "TL", NULL, NULL }, // always change TP to TL (East Timor, code changed 2002-05) |
| { "cs", "-CS", "CZ", NULL, NULL }, // if language is cs, change CS (pre-1993 Czechoslovakia) to CZ (Czech Republic) |
| { "sk", "-CS", "SK", NULL, NULL }, // if language is sk, change CS (pre-1993 Czechoslovakia) to SK (Slovakia) |
| { NULL, "-CS", "RS", NULL, NULL }, // otherwise map CS (assume Serbia+Montenegro) to RS (Serbia) |
| { NULL, "-YU", "RS", NULL, NULL }, // also map old YU (assume Serbia+Montenegro) to RS (Serbia) |
| { "sh", "-HR", "hr", "-RS", "sr" }, // then if language is old 'sh' (SerboCroatian), change it to 'hr' (Croatian) |
| // if we find HR (Croatia) or to 'sr' (Serbian) if we find RS (Serbia). |
| // Note: Do this after changing YU/CS toRS as above. |
| { NULL, NULL, NULL, NULL, NULL } // terminator |
| }; |
| |
| |
| static const KeyStringToResultString localeStringRegionToDefaults[] = { |
| // For some region-code suffixes, there are default substrings to strip off for canonical string. |
| // Must be sorted according to how strcmp compares the strings in the first column |
| // |
| // region default writing |
| // suffix system tags, strip comment |
| // -------- ------------- --------- |
| { "_CN", "-Hans" }, // mainland China, default is simplified |
| { "_HK", "-Hant" }, // Hong Kong, default is traditional |
| { "_MO", "-Hant" }, // Macao, default is traditional |
| { "_SG", "-Hans" }, // Singapore, default is simplified |
| { "_TW", "-Hant" }, // Taiwan, default is traditional |
| }; |
| enum { |
| kNumLocaleStringRegionToDefaults = sizeof(localeStringRegionToDefaults)/sizeof(KeyStringToResultString) |
| }; |
| |
| static const KeyStringToResultString localeStringPrefixToDefaults[] = { |
| // For some initial portions of language tag, there are default substrings to strip off for canonical string. |
| // Must be sorted according to how strcmp compares the strings in the first column |
| // |
| // language default writing |
| // tag prefix system tags, strip comment |
| // -------- ------------- --------- |
| { "ab-", "-Cyrl" }, // Abkhazian |
| { "af-", "-Latn" }, // Afrikaans |
| { "agq-", "-Latn" }, // Aghem |
| { "ak-", "-Latn" }, // Akan |
| { "am-", "-Ethi" }, // Amharic |
| { "ar-", "-Arab" }, // Arabic |
| { "as-", "-Beng" }, // Assamese |
| { "asa-", "-Latn" }, // Asu |
| { "ay-", "-Latn" }, // Aymara |
| { "az-", "-Latn" }, // Azerbaijani |
| { "bas-", "-Latn" }, // Basaa |
| { "be-", "-Cyrl" }, // Belarusian |
| { "bem-", "-Latn" }, // Bemba |
| { "bez-", "-Latn" }, // Bena |
| { "bg-", "-Cyrl" }, // Bulgarian |
| { "bm-", "-Latn" }, // Bambara |
| { "bn-", "-Beng" }, // Bengali |
| { "bo-", "-Tibt" }, // Tibetan (? not Suppress-Script) |
| { "br-", "-Latn" }, // Breton (? not Suppress-Script) |
| { "brx-", "-Deva" }, // Bodo |
| { "bs-", "-Latn" }, // Bosnian |
| { "ca-", "-Latn" }, // Catalan |
| { "cgg-", "-Latn" }, // Chiga |
| { "chr-", "-Cher" }, // Cherokee |
| { "cs-", "-Latn" }, // Czech |
| { "cy-", "-Latn" }, // Welsh |
| { "da-", "-Latn" }, // Danish |
| { "dav-", "-Latn" }, // Taita |
| { "de-", "-Latn -1901" }, // German, traditional orthography |
| { "dje-", "-Latn" }, // Zarma |
| { "dua-", "-Latn" }, // Duala |
| { "dv-", "-Thaa" }, // Divehi/Maldivian |
| { "dyo-", "-Latn" }, // Jola-Fonyi |
| { "dz-", "-Tibt" }, // Dzongkha |
| { "ebu-", "-Latn" }, // Embu |
| { "ee-", "-Latn" }, // Ewe |
| { "el-", "-Grek" }, // Greek (modern, monotonic) |
| { "en-", "-Latn" }, // English |
| { "eo-", "-Latn" }, // Esperanto |
| { "es-", "-Latn" }, // Spanish |
| { "et-", "-Latn" }, // Estonian |
| { "eu-", "-Latn" }, // Basque |
| { "ewo-", "-Latn" }, // Ewondo |
| { "fa-", "-Arab" }, // Farsi |
| { "ff-", "-Latn" }, // Fulah |
| { "fi-", "-Latn" }, // Finnish |
| { "fil-", "-Latn" }, // Tagalog |
| { "fo-", "-Latn" }, // Faroese |
| { "fr-", "-Latn" }, // French |
| { "ga-", "-Latn" }, // Irish |
| { "gd-", "-Latn" }, // Scottish Gaelic (? not Suppress-Script) |
| { "gl-", "-Latn" }, // Galician |
| { "gn-", "-Latn" }, // Guarani |
| { "gsw-", "-Latn" }, // Swiss German |
| { "gu-", "-Gujr" }, // Gujarati |
| { "guz-", "-Latn" }, // Gusii |
| { "gv-", "-Latn" }, // Manx |
| { "ha-", "-Latn" }, // Hausa |
| { "haw-", "-Latn" }, // Hawaiian (? not Suppress-Script) |
| { "he-", "-Hebr" }, // Hebrew |
| { "hi-", "-Deva" }, // Hindi |
| { "hr-", "-Latn" }, // Croatian |
| { "hu-", "-Latn" }, // Hungarian |
| { "hy-", "-Armn" }, // Armenian |
| { "id-", "-Latn" }, // Indonesian |
| { "ig-", "-Latn" }, // Igbo |
| { "ii-", "-Yiii" }, // Sichuan Yi |
| { "is-", "-Latn" }, // Icelandic |
| { "it-", "-Latn" }, // Italian |
| { "ja-", "-Jpan" }, // Japanese |
| { "jmc-", "-Latn" }, // Machame |
| { "ka-", "-Geor" }, // Georgian |
| { "kab-", "-Latn" }, // Kabyle |
| { "kam-", "-Latn" }, // Kamba |
| { "kde-", "-Latn" }, // Makonde |
| { "kea-", "-Latn" }, // Kabuverdianu |
| { "khq-", "-Latn" }, // Koyra Chiini |
| { "ki-", "-Latn" }, // Kikuyu |
| { "kk-", "-Cyrl" }, // Kazakh |
| { "kl-", "-Latn" }, // Kalaallisut/Greenlandic |
| { "km-", "-Khmr" }, // Central Khmer |
| { "kn-", "-Knda" }, // Kannada |
| { "ko-", "-Hang" }, // Korean (? not Suppress-Script) |
| { "kok-", "-Deva" }, // Konkani |
| { "ksb-", "-Latn" }, // Shambala |
| { "ksf-", "-Latn" }, // Bafia |
| { "kw-", "-Latn" }, // Cornish |
| { "ky-", "-Cyrl" }, // Kirghiz |
| { "la-", "-Latn" }, // Latin |
| { "lag-", "-Latn" }, // Langi |
| { "lb-", "-Latn" }, // Luxembourgish |
| { "lg-", "-Latn" }, // Ganda |
| { "ln-", "-Latn" }, // Lingala |
| { "lo-", "-Laoo" }, // Lao |
| { "lt-", "-Latn" }, // Lithuanian |
| { "lu-", "-Latn" }, // Luba-Katanga |
| { "luo-", "-Latn" }, // Luo |
| { "luy-", "-Latn" }, // Luyia |
| { "lv-", "-Latn" }, // Latvian |
| { "mas-", "-Latn" }, // Masai |
| { "mer-", "-Latn" }, // Meru |
| { "mfe-", "-Latn" }, // Morisyen |
| { "mg-", "-Latn" }, // Malagasy |
| { "mgh-", "-Latn" }, // Makhuwa-Meetto |
| { "mk-", "-Cyrl" }, // Macedonian |
| { "ml-", "-Mlym" }, // Malayalam |
| { "mn-", "-Cyrl" }, // Mongolian |
| { "mo-", "-Latn" }, // Moldavian |
| { "mr-", "-Deva" }, // Marathi |
| { "ms-", "-Latn" }, // Malay |
| { "mt-", "-Latn" }, // Maltese |
| { "mua-", "-Latn" }, // Mundang |
| { "my-", "-Mymr" }, // Burmese/Myanmar |
| { "naq-", "-Latn" }, // Nama |
| { "nb-", "-Latn" }, // Norwegian Bokmal |
| { "nd-", "-Latn" }, // North Ndebele |
| { "ne-", "-Deva" }, // Nepali |
| { "nl-", "-Latn" }, // Dutch |
| { "nmg-", "-Latn" }, // Kwasio |
| { "nn-", "-Latn" }, // Norwegian Nynorsk |
| { "nus-", "-Latn" }, // Nuer |
| { "ny-", "-Latn" }, // Chichewa/Nyanja |
| { "nyn-", "-Latn" }, // Nyankole |
| { "om-", "-Latn" }, // Oromo |
| { "or-", "-Orya" }, // Oriya |
| { "pa-", "-Guru" }, // Punjabi |
| { "pl-", "-Latn" }, // Polish |
| { "ps-", "-Arab" }, // Pushto |
| { "pt-", "-Latn" }, // Portuguese |
| { "qu-", "-Latn" }, // Quechua |
| { "rm-", "-Latn" }, // Romansh |
| { "rn-", "-Latn" }, // Rundi |
| { "ro-", "-Latn" }, // Romanian |
| { "rof-", "-Latn" }, // Rombo |
| { "ru-", "-Cyrl" }, // Russian |
| { "rw-", "-Latn" }, // Kinyarwanda |
| { "rwk-", "-Latn" }, // Rwa |
| { "sa-", "-Deva" }, // Sanskrit (? not Suppress-Script) |
| { "saq-", "-Latn" }, // Samburu |
| { "sbp-", "-Latn" }, // Sangu |
| { "se-", "-Latn" }, // Sami (? not Suppress-Script) |
| { "seh-", "-Latn" }, // Sena |
| { "ses-", "-Latn" }, // Koyraboro Senni |
| { "sg-", "-Latn" }, // Sango |
| { "shi-", "-Latn" }, // Tachelhit |
| { "si-", "-Sinh" }, // Sinhala |
| { "sk-", "-Latn" }, // Slovak |
| { "sl-", "-Latn" }, // Slovenian |
| { "sn-", "-Latn" }, // Shona |
| { "so-", "-Latn" }, // Somali |
| { "sq-", "-Latn" }, // Albanian |
| { "sr-", "-Cyrl" }, // Serbian |
| { "sv-", "-Latn" }, // Swedish |
| { "sw-", "-Latn" }, // Swahili |
| { "swc-", "-Latn" }, // Congo Swahili |
| { "ta-", "-Taml" }, // Tamil |
| { "te-", "-Telu" }, // Telugu |
| { "teo-", "-Latn" }, // Teso |
| { "tg-", "-Cyrl" }, // Tajik |
| { "th-", "-Thai" }, // Thai |
| { "ti-", "-Ethi" }, // Tigrinya |
| { "tk-", "-Latn" }, // Turkmen |
| { "tn-", "-Latn" }, // Tswana |
| { "to-", "-Latn" }, // Tonga of Tonga Islands |
| { "tr-", "-Latn" }, // Turkish |
| { "twq-", "-Latn" }, // Tasawaq |
| { "tzm-", "-Latn" }, // Central Morocco Tamazight |
| { "uk-", "-Cyrl" }, // Ukrainian |
| { "ur-", "-Arab" }, // Urdu |
| { "uz-", "-Cyrl" }, // Uzbek |
| { "vai-", "-Vaii" }, // Vai |
| { "vi-", "-Latn" }, // Vietnamese |
| { "vun-", "-Latn" }, // Vunjo |
| { "wo-", "-Latn" }, // Wolof |
| { "xh-", "-Latn" }, // Xhosa |
| { "xog-", "-Latn" }, // Soga |
| { "yav-", "-Latn" }, // Yangben |
| { "yi-", "-Hebr" }, // Yiddish |
| { "yo-", "-Latn" }, // Yoruba |
| { "zh-", "-Hani" }, // Chinese (? not Suppress-Script) |
| { "zu-", "-Latn" }, // Zulu |
| }; |
| enum { |
| kNumLocaleStringPrefixToDefaults = sizeof(localeStringPrefixToDefaults)/sizeof(KeyStringToResultString) |
| }; |
| |
| static const KeyStringToResultString appleLocaleToLanguageString[] = { |
| // Map locale strings that Apple uses as language IDs to real language strings. |
| // Must be sorted according to how strcmp compares the strings in the first column. |
| // Note: Now we remove all transforms of the form ll_RR -> ll-RR, they are now |
| // handled in the code. <1.19> |
| // |
| // locale lang [ comment ] |
| // string string |
| // ------- ------- |
| { "en_US_POSIX", "en-US-POSIX" }, // POSIX locale, need as language string // <1.17> [3840752] |
| { "zh_CN", "zh-Hans" }, // mainland China => simplified |
| { "zh_HK", "zh-HK" }, // Hong Kong => traditional, not currently used |
| { "zh_MO", "zh-MO" }, // Macao => traditional, not currently used |
| { "zh_SG", "zh-SG" }, // Singapore => simplified, not currently used |
| { "zh_TW", "zh-Hant" }, // Taiwan => traditional |
| }; |
| enum { |
| kNumAppleLocaleToLanguageString = sizeof(appleLocaleToLanguageString)/sizeof(KeyStringToResultString) |
| }; |
| |
| /* |
| static const KeyStringToResultString appleLocaleToLanguageStringForCFBundle[] = { |
| // Map locale strings that Apple uses as language IDs to real language strings. |
| // Must be sorted according to how strcmp compares the strings in the first column. |
| // |
| // locale lang [ comment ] |
| // string string |
| // ------- ------- |
| { "de_AT", "de-AT" }, // Austrian German |
| { "de_CH", "de-CH" }, // Swiss German |
| // { "de_DE", "de-DE" }, // German for Germany (default), not currently used |
| { "en_AU", "en-AU" }, // Australian English |
| { "en_CA", "en-CA" }, // Canadian English |
| { "en_GB", "en-GB" }, // British English |
| // { "en_IE", "en-IE" }, // Irish English, not currently used |
| { "en_US", "en-US" }, // U.S. English |
| { "en_US_POSIX", "en-US-POSIX" }, // POSIX locale, need as language string // <1.17> [3840752] |
| // { "fr_BE", "fr-BE" }, // Belgian French, not currently used |
| { "fr_CA", "fr-CA" }, // Canadian French |
| { "fr_CH", "fr-CH" }, // Swiss French |
| // { "fr_FR", "fr-FR" }, // French for France (default), not currently used |
| { "nl_BE", "nl-BE" }, // Flemish = Vlaams, Dutch for Belgium |
| // { "nl_NL", "nl-NL" }, // Dutch for Netherlands (default), not currently used |
| { "pt_BR", "pt-BR" }, // Brazilian Portuguese |
| { "pt_PT", "pt-PT" }, // Portuguese for Portugal |
| { "zh_CN", "zh-Hans" }, // mainland China => simplified |
| { "zh_HK", "zh-Hant" }, // Hong Kong => traditional, not currently used |
| { "zh_MO", "zh-Hant" }, // Macao => traditional, not currently used |
| { "zh_SG", "zh-Hans" }, // Singapore => simplified, not currently used |
| { "zh_TW", "zh-Hant" }, // Taiwan => traditional |
| }; |
| enum { |
| kNumAppleLocaleToLanguageStringForCFBundle = sizeof(appleLocaleToLanguageStringForCFBundle)/sizeof(KeyStringToResultString) |
| }; |
| */ |
| |
| #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX |
| |
| struct LocaleToLegacyCodes { |
| const char * locale; // reduced to language plus one other component (script, region, variant), separators normalized to'_' |
| RegionCode regCode; |
| LangCode langCode; |
| CFStringEncoding encoding; |
| }; |
| typedef struct LocaleToLegacyCodes LocaleToLegacyCodes; |
| |
| static const LocaleToLegacyCodes localeToLegacyCodes[] = { |
| // locale RegionCode LangCode CFStringEncoding |
| { "af"/*ZA*/, 102/*verAfrikaans*/, 141/*langAfrikaans*/, 0/*Roman*/ }, // Latn |
| { "am", -1, 85/*langAmharic*/, 28/*Ethiopic*/ }, // Ethi |
| { "ar", 16/*verArabic*/, 12/*langArabic*/, 4/*Arabic*/ }, // Arab; |
| { "as", -1, 68/*langAssamese*/, 13/*Bengali*/ }, // Beng; |
| { "ay", -1, 134/*langAymara*/, 0/*Roman*/ }, // Latn; |
| { "az", -1, 150/*langAzerbaijanRoman*/, 0/*Roman*/ }, // "az" defaults to -Latn |
| { "az_Arab", -1, 50/*langAzerbaijanAr*/, 4/*Arabic*/ }, // Arab; |
| { "az_Cyrl", -1, 49/*langAzerbaijani*/, 7/*Cyrillic*/ }, // Cyrl; |
| { "az_Latn", -1, 150/*langAzerbaijanRoman*/, 0/*Roman*/ }, // Latn; |
| { "be"/*BY*/, 61/*verBelarus*/, 46/*langBelorussian*/, 7/*Cyrillic*/ }, // Cyrl; |
| { "bg"/*BG*/, 72/*verBulgaria*/, 44/*langBulgarian*/, 7/*Cyrillic*/ }, // Cyrl; |
| { "bn", 60/*verBengali*/, 67/*langBengali*/, 13/*Bengali*/ }, // Beng; |
| { "bo", 105/*verTibetan*/, 63/*langTibetan*/, 26/*Tibetan*/ }, // Tibt; |
| { "br", 77/*verBreton*/, 142/*langBreton*/, 39/*Celtic*/ }, // Latn; |
| { "ca"/*ES*/, 73/*verCatalonia*/, 130/*langCatalan*/, 0/*Roman*/ }, // Latn; |
| { "cs"/*CZ*/, 56/*verCzech*/, 38/*langCzech*/, 29/*CentralEurRoman*/ }, // Latn; |
| { "cy", 79/*verWelsh*/, 128/*langWelsh*/, 39/*Celtic*/ }, // Latn; |
| { "da"/*DK*/, 9/*verDenmark*/, 7/*langDanish*/, 0/*Roman*/ }, // Latn; |
| { "de", 3/*verGermany*/, 2/*langGerman*/, 0/*Roman*/ }, // assume "de" defaults to verGermany |
| { "de_1996", 70/*verGermanReformed*/, 2/*langGerman*/, 0/*Roman*/ }, |
| { "de_AT", 92/*verAustria*/, 2/*langGerman*/, 0/*Roman*/ }, |
| { "de_CH", 19/*verGrSwiss*/, 2/*langGerman*/, 0/*Roman*/ }, |
| { "de_DE", 3/*verGermany*/, 2/*langGerman*/, 0/*Roman*/ }, |
| { "dz"/*BT*/, 83/*verBhutan*/, 137/*langDzongkha*/, 26/*Tibetan*/ }, // Tibt; |
| { "el", 20/*verGreece*/, 14/*langGreek*/, 6/*Greek*/ }, // assume "el" defaults to verGreece |
| { "el_CY", 23/*verCyprus*/, 14/*langGreek*/, 6/*Greek*/ }, |
| { "el_GR", 20/*verGreece*/, 14/*langGreek*/, 6/*Greek*/ }, // modern monotonic |
| { "en", 0/*verUS*/, 0/*langEnglish*/, 0/*Roman*/ }, // "en" defaults to verUS (per Chris Hansten) |
| { "en_001", 37/*verInternational*/, 0/*langEnglish*/, 0/*Roman*/ }, |
| { "en_AU", 15/*verAustralia*/, 0/*langEnglish*/, 0/*Roman*/ }, |
| { "en_CA", 82/*verEngCanada*/, 0/*langEnglish*/, 0/*Roman*/ }, |
| { "en_GB", 2/*verBritain*/, 0/*langEnglish*/, 0/*Roman*/ }, |
| { "en_IE", 108/*verIrelandEnglish*/, 0/*langEnglish*/, 0/*Roman*/ }, |
| { "en_SG", 100/*verSingapore*/, 0/*langEnglish*/, 0/*Roman*/ }, |
| { "en_US", 0/*verUS*/, 0/*langEnglish*/, 0/*Roman*/ }, |
| { "eo", 103/*verEsperanto*/, 94/*langEsperanto*/, 0/*Roman*/ }, // Latn; |
| { "es", 8/*verSpain*/, 6/*langSpanish*/, 0/*Roman*/ }, // "es" defaults to verSpain (per Chris Hansten) |
| { "es_419", 86/*verSpLatinAmerica*/, 6/*langSpanish*/, 0/*Roman*/ }, // new BCP 47 tag |
| { "es_ES", 8/*verSpain*/, 6/*langSpanish*/, 0/*Roman*/ }, |
| { "es_MX", 86/*verSpLatinAmerica*/, 6/*langSpanish*/, 0/*Roman*/ }, |
| { "es_US", 86/*verSpLatinAmerica*/, 6/*langSpanish*/, 0/*Roman*/ }, |
| { "et"/*EE*/, 44/*verEstonia*/, 27/*langEstonian*/, 29/*CentralEurRoman*/ }, |
| { "eu", -1, 129/*langBasque*/, 0/*Roman*/ }, // Latn; |
| { "fa"/*IR*/, 48/*verIran*/, 31/*langFarsi/Persian*/, 0x8C/*Farsi*/ }, // Arab; |
| { "fi"/*FI*/, 17/*verFinland*/, 13/*langFinnish*/, 0/*Roman*/ }, |
| { "fil", -1, 82/*langTagalog*/, 0/*Roman*/ }, // Latn; |
| { "fo"/*FO*/, 47/*verFaroeIsl*/, 30/*langFaroese*/, 37/*Icelandic*/ }, |
| { "fr", 1/*verFrance*/, 1/*langFrench*/, 0/*Roman*/ }, // "fr" defaults to verFrance (per Chris Hansten) |
| { "fr_001", 91/*verFrenchUniversal*/, 1/*langFrench*/, 0/*Roman*/ }, |
| { "fr_BE", 98/*verFrBelgium*/, 1/*langFrench*/, 0/*Roman*/ }, |
| { "fr_CA", 11/*verFrCanada*/, 1/*langFrench*/, 0/*Roman*/ }, |
| { "fr_CH", 18/*verFrSwiss*/, 1/*langFrench*/, 0/*Roman*/ }, |
| { "fr_FR", 1/*verFrance*/, 1/*langFrench*/, 0/*Roman*/ }, |
| { "ga"/*IE*/, 50/*verIreland*/, 35/*langIrishGaelic*/, 0/*Roman*/ }, // no dots (h after) |
| { "ga_Latg"/*IE*/, 81/*verIrishGaelicScrip*/, 146/*langIrishGaelicScript*/, 40/*Gaelic*/ }, // using dots |
| { "gd", 75/*verScottishGaelic*/, 144/*langScottishGaelic*/, 39/*Celtic*/ }, |
| { "gl", -1, 140/*langGalician*/, 0/*Roman*/ }, // Latn; |
| { "gn", -1, 133/*langGuarani*/, 0/*Roman*/ }, // Latn; |
| { "grc", 40/*verGreekAncient*/, 148/*langGreekAncient*/, 6/*Greek*/ }, // polytonic (MacGreek doesn't actually support it) |
| { "gu"/*IN*/, 94/*verGujarati*/, 69/*langGujarati*/, 11/*Gujarati*/ }, // Gujr; |
| { "gv", 76/*verManxGaelic*/, 145/*langManxGaelic*/, 39/*Celtic*/ }, // Latn; |
| { "he"/*IL*/, 13/*verIsrael*/, 10/*langHebrew*/, 5/*Hebrew*/ }, // Hebr; |
| { "hi"/*IN*/, 33/*verIndiaHindi*/, 21/*langHindi*/, 9/*Devanagari*/ }, // Deva; |
| { "hr"/*HR*/, 68/*verCroatia*/, 18/*langCroatian*/, 36/*Croatian*/ }, |
| { "hu"/*HU*/, 43/*verHungary*/, 26/*langHungarian*/, 29/*CentralEurRoman*/ }, |
| { "hy"/*AM*/, 84/*verArmenian*/, 51/*langArmenian*/, 24/*Armenian*/ }, // Armn; |
| { "id", -1, 81/*langIndonesian*/, 0/*Roman*/ }, // Latn; |
| { "is"/*IS*/, 21/*verIceland*/, 15/*langIcelandic*/, 37/*Icelandic*/ }, |
| { "it", 4/*verItaly*/, 3/*langItalian*/, 0/*Roman*/ }, // "it" defaults to verItaly |
| { "it_CH", 36/*verItalianSwiss*/, 3/*langItalian*/, 0/*Roman*/ }, |
| { "it_IT", 4/*verItaly*/, 3/*langItalian*/, 0/*Roman*/ }, |
| { "iu"/*CA*/, 78/*verNunavut*/, 143/*langInuktitut*/, 0xEC/*Inuit*/ }, // Cans; |
| { "ja"/*JP*/, 14/*verJapan*/, 11/*langJapanese*/, 1/*Japanese*/ }, // Jpan; |
| { "jv", -1, 138/*langJavaneseRom*/, 0/*Roman*/ }, // Latn; |
| { "ka"/*GE*/, 85/*verGeorgian*/, 52/*langGeorgian*/, 23/*Georgian*/ }, // Geor; |
| { "kk", -1, 48/*langKazakh*/, 7/*Cyrillic*/ }, // "kk" defaults to -Cyrl; also have -Latn, -Arab |
| { "kl", 107/*verGreenland*/, 149/*langGreenlandic*/, 0/*Roman*/ }, // Latn; |
| { "km", -1, 78/*langKhmer*/, 20/*Khmer*/ }, // Khmr; |
| { "kn", -1, 73/*langKannada*/, 16/*Kannada*/ }, // Knda; |
| { "ko"/*KR*/, 51/*verKorea*/, 23/*langKorean*/, 3/*Korean*/ }, // Hang; |
| { "ks", -1, 61/*langKashmiri*/, 4/*Arabic*/ }, // Arab; |
| { "ku", -1, 60/*langKurdish*/, 4/*Arabic*/ }, // Arab; |
| { "ky", -1, 54/*langKirghiz*/, 7/*Cyrillic*/ }, // Cyrl; also -Latn, -Arab |
| { "la", -1, 131/*langLatin*/, 0/*Roman*/ }, // Latn; |
| { "lo", -1, 79/*langLao*/, 22/*Laotian*/ }, // Laoo; |
| { "lt"/*LT*/, 41/*verLithuania*/, 24/*langLithuanian*/, 29/*CentralEurRoman*/ }, |
| { "lv"/*LV*/, 45/*verLatvia*/, 28/*langLatvian*/, 29/*CentralEurRoman*/ }, |
| { "mg", -1, 93/*langMalagasy*/, 0/*Roman*/ }, // Latn; |
| { "mk"/*MK*/, 67/*verMacedonian*/, 43/*langMacedonian*/, 7/*Cyrillic*/ }, // Cyrl; |
| { "ml", -1, 72/*langMalayalam*/, 17/*Malayalam*/ }, // Mlym; |
| { "mn", -1, 58/*langMongolianCyr*/, 7/*Cyrillic*/ }, // "mn" defaults to -Cyrl |
| { "mn_Cyrl", -1, 58/*langMongolianCyr*/, 7/*Cyrillic*/ }, // Cyrl; |
| { "mn_Mong", -1, 57/*langMongolian*/, 27/*Mongolian*/ }, // Mong; |
| { "mo", -1, 53/*langMoldavian*/, 7/*Cyrillic*/ }, // Cyrl; |
| { "mr"/*IN*/, 104/*verMarathi*/, 66/*langMarathi*/, 9/*Devanagari*/ }, // Deva; |
| { "ms", -1, 83/*langMalayRoman*/, 0/*Roman*/ }, // "ms" defaults to -Latn; |
| { "ms_Arab", -1, 84/*langMalayArabic*/, 4/*Arabic*/ }, // Arab; |
| { "mt"/*MT*/, 22/*verMalta*/, 16/*langMaltese*/, 0/*Roman*/ }, // Latn; |
| { "mul", 74/*verMultilingual*/, -1, 0 }, |
| { "my", -1, 77/*langBurmese*/, 19/*Burmese*/ }, // Mymr; |
| { "nb"/*NO*/, 12/*verNorway*/, 9/*langNorwegian*/, 0/*Roman*/ }, |
| { "ne"/*NP*/, 106/*verNepal*/, 64/*langNepali*/, 9/*Devanagari*/ }, // Deva; |
| { "nl", 5/*verNetherlands*/, 4/*langDutch*/, 0/*Roman*/ }, // "nl" defaults to verNetherlands |
| { "nl_BE", 6/*verFlemish*/, 34/*langFlemish*/, 0/*Roman*/ }, |
| { "nl_NL", 5/*verNetherlands*/, 4/*langDutch*/, 0/*Roman*/ }, |
| { "nn"/*NO*/, 101/*verNynorsk*/, 151/*langNynorsk*/, 0/*Roman*/ }, |
| { "ny", -1, 92/*langNyanja/Chewa*/, 0/*Roman*/ }, // Latn; |
| { "om", -1, 87/*langOromo*/, 28/*Ethiopic*/ }, // Ethi; |
| { "or", -1, 71/*langOriya*/, 12/*Oriya*/ }, // Orya; |
| { "pa", 95/*verPunjabi*/, 70/*langPunjabi*/, 10/*Gurmukhi*/ }, // Guru; |
| { "pl"/*PL*/, 42/*verPoland*/, 25/*langPolish*/, 29/*CentralEurRoman*/ }, |
| { "ps", -1, 59/*langPashto*/, 0x8C/*Farsi*/ }, // Arab; |
| { "pt", 71/*verBrazil*/, 8/*langPortuguese*/, 0/*Roman*/ }, // "pt" defaults to verBrazil (per Chris Hansten) |
| { "pt_BR", 71/*verBrazil*/, 8/*langPortuguese*/, 0/*Roman*/ }, |
| { "pt_PT", 10/*verPortugal*/, 8/*langPortuguese*/, 0/*Roman*/ }, |
| { "qu", -1, 132/*langQuechua*/, 0/*Roman*/ }, // Latn; |
| { "rn", -1, 91/*langRundi*/, 0/*Roman*/ }, // Latn; |
| { "ro"/*RO*/, 39/*verRomania*/, 37/*langRomanian*/, 38/*Romanian*/ }, |
| { "ru"/*RU*/, 49/*verRussia*/, 32/*langRussian*/, 7/*Cyrillic*/ }, // Cyrl; |
| { "rw", -1, 90/*langKinyarwanda*/, 0/*Roman*/ }, // Latn; |
| { "sa", -1, 65/*langSanskrit*/, 9/*Devanagari*/ }, // Deva; |
| { "sd", -1, 62/*langSindhi*/, 0x8C/*Farsi*/ }, // Arab; |
| { "se", 46/*verSami*/, 29/*langSami*/, 0/*Roman*/ }, |
| { "si", -1, 76/*langSinhalese*/, 18/*Sinhalese*/ }, // Sinh; |
| { "sk"/*SK*/, 57/*verSlovak*/, 39/*langSlovak*/, 29/*CentralEurRoman*/ }, |
| { "sl"/*SI*/, 66/*verSlovenian*/, 40/*langSlovenian*/, 36/*Croatian*/ }, |
| { "so", -1, 88/*langSomali*/, 0/*Roman*/ }, // Latn; |
| { "sq", -1, 36/*langAlbanian*/, 0/*Roman*/ }, |
| { "sr"/*CS,RS*/, 65/*verSerbian*/, 42/*langSerbian*/, 7/*Cyrillic*/ }, // Cyrl; |
| { "su", -1, 139/*langSundaneseRom*/, 0/*Roman*/ }, // Latn; |
| { "sv"/*SE*/, 7/*verSweden*/, 5/*langSwedish*/, 0/*Roman*/ }, |
| { "sw", -1, 89/*langSwahili*/, 0/*Roman*/ }, // Latn; |
| { "ta", -1, 74/*langTamil*/, 14/*Tamil*/ }, // Taml; |
| { "te", -1, 75/*langTelugu*/, 15/*Telugu*/ }, // Telu |
| { "tg", -1, 55/*langTajiki*/, 7/*Cyrillic*/ }, // "tg" defaults to "Cyrl" |
| { "tg_Cyrl", -1, 55/*langTajiki*/, 7/*Cyrillic*/ }, // Cyrl; also -Latn, -Arab |
| { "th"/*TH*/, 54/*verThailand*/, 22/*langThai*/, 21/*Thai*/ }, // Thai; |
| { "ti", -1, 86/*langTigrinya*/, 28/*Ethiopic*/ }, // Ethi; |
| { "tk", -1, 56/*langTurkmen*/, 7/*Cyrillic*/ }, // "tk" defaults to Cyrl |
| { "tk_Cyrl", -1, 56/*langTurkmen*/, 7/*Cyrillic*/ }, // Cyrl; also -Latn, -Arab |
| { "tl", -1, 82/*langTagalog*/, 0/*Roman*/ }, // Latn; |
| { "to"/*TO*/, 88/*verTonga*/, 147/*langTongan*/, 0/*Roman*/ }, // Latn; |
| { "tr"/*TR*/, 24/*verTurkey*/, 17/*langTurkish*/, 35/*Turkish*/ }, // Latn; |
| { "tt", -1, 135/*langTatar*/, 7/*Cyrillic*/ }, // Cyrl; |
| { "tt_Cyrl", -1, 135/*langTatar*/, 7/*Cyrillic*/ }, // Cyrl; |
| { "ug", -1, 136/*langUighur*/, 4/*Arabic*/ }, // Arab; |
| { "uk"/*UA*/, 62/*verUkraine*/, 45/*langUkrainian*/, 7/*Cyrillic*/ }, // Cyrl; |
| { "und", 55/*verScriptGeneric*/, -1, 0 }, |
| { "ur", 34/*verPakistanUrdu*/, 20/*langUrdu*/, 0x8C/*Farsi*/ }, // "ur" defaults to verPakistanUrdu |
| { "ur_IN", 96/*verIndiaUrdu*/, 20/*langUrdu*/, 0x8C/*Farsi*/ }, // Arab |
| { "ur_PK", 34/*verPakistanUrdu*/, 20/*langUrdu*/, 0x8C/*Farsi*/ }, // Arab |
| { "uz"/*UZ*/, 99/*verUzbek*/, 47/*langUzbek*/, 7/*Cyrillic*/ }, // Cyrl; also -Latn, -Arab |
| { "uz_Cyrl", 99/*verUzbek*/, 47/*langUzbek*/, 7/*Cyrillic*/ }, |
| { "vi"/*VN*/, 97/*verVietnam*/, 80/*langVietnamese*/, 30/*Vietnamese*/ }, // Latn |
| { "yi", -1, 41/*langYiddish*/, 5/*Hebrew*/ }, // Hebr; |
| { "zh", 52/*verChina*/, 33/*langSimpChinese*/, 25/*ChineseSimp*/ }, // "zh" defaults to verChina, langSimpChinese |
| { "zh_CN", 52/*verChina*/, 33/*langSimpChinese*/, 25/*ChineseSimp*/ }, |
| { "zh_HK", 53/*verTaiwan*/, 19/*langTradChinese*/, 2/*ChineseTrad*/ }, |
| { "zh_Hans", 52/*verChina*/, 33/*langSimpChinese*/, 25/*ChineseSimp*/ }, |
| { "zh_Hant", 53/*verTaiwan*/, 19/*langTradChinese*/, 2/*ChineseTrad*/ }, |
| { "zh_MO", 53/*verTaiwan*/, 19/*langTradChinese*/, 2/*ChineseTrad*/ }, |
| { "zh_SG", 52/*verChina*/, 33/*langSimpChinese*/, 25/*ChineseSimp*/ }, |
| { "zh_TW", 53/*verTaiwan*/, 19/*langTradChinese*/, 2/*ChineseTrad*/ }, |
| }; |
| enum { |
| kNumLocaleToLegacyCodes = sizeof(localeToLegacyCodes)/sizeof(localeToLegacyCodes[0]) |
| }; |
| |
| #endif |
| |
| /* |
| For reference here is a list of ICU locales with variants and how some |
| of them are canonicalized with the ICU function uloc_canonicalize: |
| |
| ICU 3.0 has: |
| en_US_POSIX x no change |
| hy_AM_REVISED x no change |
| ja_JP_TRADITIONAL -> ja_JP@calendar=japanese |
| th_TH_TRADITIONAL -> th_TH@calendar=buddhist |
| |
| ICU 2.8 also had the following (now obsolete): |
| ca_ES_PREEURO |
| de__PHONEBOOK -> de@collation=phonebook |
| de_AT_PREEURO |
| de_DE_PREEURO |
| de_LU_PREEURO |
| el_GR_PREEURO |
| en_BE_PREEURO |
| en_GB_EURO -> en_GB@currency=EUR |
| en_IE_PREEURO -> en_IE@currency=IEP |
| es__TRADITIONAL -> es@collation=traditional |
| es_ES_PREEURO |
| eu_ES_PREEURO |
| fi_FI_PREEURO |
| fr_BE_PREEURO |
| fr_FR_PREEURO -> fr_FR@currency=FRF |
| fr_LU_PREEURO |
| ga_IE_PREEURO |
| gl_ES_PREEURO |
| hi__DIRECT -> hi@collation=direct |
| it_IT_PREEURO |
| nl_BE_PREEURO |
| nl_NL_PREEURO |
| pt_PT_PREEURO |
| zh__PINYIN -> zh@collation=pinyin |
| zh_TW_STROKE -> zh_TW@collation=stroke |
| |
| */ |
| |
| // _CompareTestEntryToTableEntryKey |
| // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString) |
| // comparison function for bsearch |
| static int _CompareTestEntryToTableEntryKey(const void *testEntryPtr, const void *tableEntryKeyPtr) { |
| return strcmp( ((const KeyStringToResultString *)testEntryPtr)->key, ((const KeyStringToResultString *)tableEntryKeyPtr)->key ); |
| } |
| |
| // _CompareTestEntryPrefixToTableEntryKey |
| // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString) |
| // Comparison function for bsearch. Assumes prefix IS terminated with '-' or '_'. |
| // Do the following instead of strlen & strncmp so we don't walk tableEntry key twice. |
| static int _CompareTestEntryPrefixToTableEntryKey(const void *testEntryPtr, const void *tableEntryKeyPtr) { |
| const char * testPtr = ((const KeyStringToResultString *)testEntryPtr)->key; |
| const char * tablePtr = ((const KeyStringToResultString *)tableEntryKeyPtr)->key; |
| |
| while ( *testPtr == *tablePtr && *tablePtr != 0 ) { |
| testPtr++; tablePtr++; |
| } |
| if ( *tablePtr != 0 ) { |
| // strings are different, and the string in the table has not run out; |
| // i.e. the table entry is not a prefix of the text string. |
| return ( *testPtr < *tablePtr )? -1: 1; |
| } |
| return 0; |
| } |
| |
| // _CompareLowerTestEntryPrefixToTableEntryKey |
| // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString) |
| // Comparison function for bsearch. Assumes prefix NOT terminated with '-' or '_'. |
| // Lowercases the test string before comparison (the table should already have lowercased entries). |
| static int _CompareLowerTestEntryPrefixToTableEntryKey(const void *testEntryPtr, const void *tableEntryKeyPtr) { |
| const char * testPtr = ((const KeyStringToResultString *)testEntryPtr)->key; |
| const char * tablePtr = ((const KeyStringToResultString *)tableEntryKeyPtr)->key; |
| char lowerTestChar; |
| |
| while ( (lowerTestChar = tolower(*testPtr)) == *tablePtr && *tablePtr != 0 && lowerTestChar != '_' ) { // <1.9> |
| testPtr++; tablePtr++; |
| } |
| if ( *tablePtr != 0 ) { |
| // strings are different, and the string in the table has not run out; |
| // i.e. the table entry is not a prefix of the text string. |
| if (lowerTestChar == '_') // <1.9> |
| return -1; // <1.9> |
| return ( lowerTestChar < *tablePtr )? -1: 1; |
| } |
| // The string in the table has run out. If the test string char is not alnum, |
| // then the string matches, else the test string sorts after. |
| return ( !isalnum(lowerTestChar) )? 0: 1; |
| } |
| |
| // _DeleteCharsAtPointer |
| // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString) |
| // remove _length_ characters from the beginning of the string indicated by _stringPtr_ |
| // (we know that the string has at least _length_ characters in it) |
| static void _DeleteCharsAtPointer(char *stringPtr, int length) { |
| do { |
| *stringPtr = stringPtr[length]; |
| } while (*stringPtr++ != 0); |
| } |
| |
| // _CopyReplacementAtPointer |
| // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString) |
| // Copy replacement string (*excluding* terminating NULL byte) to the place indicated by stringPtr |
| static void _CopyReplacementAtPointer(char *stringPtr, const char *replacementPtr) { |
| while (*replacementPtr != 0) { |
| *stringPtr++ = *replacementPtr++; |
| } |
| } |
| |
| // _CheckForTag |
| // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString) |
| static Boolean _CheckForTag(const char *localeStringPtr, const char *tagPtr, int tagLen) { |
| return ( strncmp(localeStringPtr, tagPtr, tagLen) == 0 && !isalnum(localeStringPtr[tagLen]) ); |
| } |
| |
| // _ReplacePrefix |
| // Move this code from _UpdateFullLocaleString into separate function // <1.10> |
| static void _ReplacePrefix(char locString[], int locStringMaxLen, int oldPrefixLen, const char *newPrefix) { |
| int newPrefixLen = strlen(newPrefix); |
| int lengthDelta = newPrefixLen - oldPrefixLen; |
| |
| if (lengthDelta < 0) { |
| // replacement is shorter, delete chars by shifting tail of string |
| _DeleteCharsAtPointer(locString + newPrefixLen, -lengthDelta); |
| } else if (lengthDelta > 0) { |
| // replacement is longer... |
| int stringLen = strlen(locString); |
| |
| if (stringLen + lengthDelta < locStringMaxLen) { |
| // make room by shifting tail of string |
| char * tailShiftPtr = locString + stringLen; |
| char * tailStartPtr = locString + oldPrefixLen; // pointer to tail of string to shift |
| |
| while (tailShiftPtr >= tailStartPtr) { |
| tailShiftPtr[lengthDelta] = *tailShiftPtr; |
| tailShiftPtr--; |
| } |
| } else { |
| // no room, can't do substitution |
| newPrefix = NULL; |
| } |
| } |
| |
| if (newPrefix) { |
| // do the substitution |
| _CopyReplacementAtPointer(locString, newPrefix); |
| } |
| } |
| |
| // _UpdateFullLocaleString |
| // Given a locale string that uses standard codes (not a special old-style Apple string), |
| // update all the language codes and region codes to latest versions, map 3-letter |
| // language codes to 2-letter codes if possible, and normalize casing. If requested, return |
| // pointers to a language-region variant subtag (if present) and a region tag (if present). |
| // (add locStringMaxLen parameter) // <1.10> |
| static void _UpdateFullLocaleString(char inLocaleString[], int locStringMaxLen, |
| char **langRegSubtagRef, char **regionTagRef, |
| char varKeyValueString[]) // <1.17> |
| { |
| KeyStringToResultString testEntry; |
| KeyStringToResultString * foundEntry; |
| const SpecialCaseUpdates * specialCasePtr; |
| char * inLocalePtr; |
| char * subtagPtr; |
| char * langRegSubtag = NULL; |
| char * regionTag = NULL; |
| char * variantTag = NULL; |
| Boolean subtagHasDigits, pastPrimarySubtag, hadRegion; |
| |
| // 1. First replace any non-canonical prefix (case insensitive) with canonical |
| // (change 3-letter ISO 639 code to 2-letter, update obsolete ISO 639 codes & RFC 3066 tags, etc.) |
| |
| testEntry.key = inLocaleString; |
| foundEntry = (KeyStringToResultString *)bsearch( &testEntry, localeStringPrefixToCanonical, kNumLocaleStringPrefixToCanonical, |
| sizeof(KeyStringToResultString), _CompareLowerTestEntryPrefixToTableEntryKey ); |
| if (foundEntry) { |
| // replace key (at beginning of string) with result |
| _ReplacePrefix(inLocaleString, locStringMaxLen, strlen(foundEntry->key), foundEntry->result); // <1.10> |
| } |
| |
| // 2. Walk through input string, normalizing case & marking use of ISO 3166 codes |
| |
| inLocalePtr = inLocaleString; |
| subtagPtr = inLocaleString; |
| subtagHasDigits = false; |
| pastPrimarySubtag = false; |
| hadRegion = false; |
| |
| while ( true ) { |
| if ( isalpha(*inLocalePtr) ) { |
| // if not past a region tag, then lowercase, else uppercase |
| *inLocalePtr = (!hadRegion)? tolower(*inLocalePtr): toupper(*inLocalePtr); |
| } else if ( isdigit(*inLocalePtr) ) { |
| subtagHasDigits = true; |
| } else { |
| |
| if (!pastPrimarySubtag) { |
| // may have a NULL primary subtag |
| if (subtagHasDigits) { |
| break; |
| } |
| pastPrimarySubtag = true; |
| } else if (!hadRegion) { |
| // We are after any primary language subtag, but not past any region tag. |
| // This subtag is preceded by '-' or '_'. |
| int subtagLength = inLocalePtr - subtagPtr; // includes leading '-' or '_' |
| |
| if (subtagLength == 3 && !subtagHasDigits) { |
| // potential ISO 3166 code for region or language variant; if so, needs uppercasing |
| if (*subtagPtr == '_') { |
| regionTag = subtagPtr; |
| hadRegion = true; |
| subtagPtr[1] = toupper(subtagPtr[1]); |
| subtagPtr[2] = toupper(subtagPtr[2]); |
| } else if (langRegSubtag == NULL) { |
| langRegSubtag = subtagPtr; |
| subtagPtr[1] = toupper(subtagPtr[1]); |
| subtagPtr[2] = toupper(subtagPtr[2]); |
| } |
| } else if (subtagLength == 4 && subtagHasDigits) { |
| // potential UN M.49 region code |
| if (*subtagPtr == '_') { |
| regionTag = subtagPtr; |
| hadRegion = true; |
| } else if (langRegSubtag == NULL) { |
| langRegSubtag = subtagPtr; |
| } |
| } else if (subtagLength == 5 && !subtagHasDigits) { |
| // ISO 15924 script code, uppercase just the first letter |
| subtagPtr[1] = toupper(subtagPtr[1]); |
| } else if (subtagLength == 1 && *subtagPtr == '_') { // <1.17> |
| hadRegion = true; |
| } |
| |
| if (!hadRegion) { |
| // convert improper '_' to '-' |
| *subtagPtr = '-'; |
| } |
| } else { |
| variantTag = subtagPtr; // <1.17> |
| } |
| |
| if (*inLocalePtr == '-' || *inLocalePtr == '_') { |
| subtagPtr = inLocalePtr; |
| subtagHasDigits = false; |
| } else { |
| break; |
| } |
| } |
| |
| inLocalePtr++; |
| } |
| |
| // 3 If there is a variant tag, see if ICU canonicalizes it to keywords. // <1.17> [3577669] |
| // If so, copy the keywords to varKeyValueString and delete the variant tag |
| // from the original string (but don't otherwise use the ICU canonicalization). |
| varKeyValueString[0] = 0; |
| #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX |
| if (variantTag) { |
| UErrorCode icuStatus; |
| int icuCanonStringLen; |
| char * varKeyValueStringPtr = varKeyValueString; |
| |
| icuStatus = U_ZERO_ERROR; |
| icuCanonStringLen = uloc_canonicalize( inLocaleString, varKeyValueString, locStringMaxLen, &icuStatus ); |
| if ( U_SUCCESS(icuStatus) ) { |
| char * icuCanonStringPtr = varKeyValueString; |
| |
| if (icuCanonStringLen >= locStringMaxLen) |
| icuCanonStringLen = locStringMaxLen - 1; |
| varKeyValueString[icuCanonStringLen] = 0; |
| while (*icuCanonStringPtr != 0 && *icuCanonStringPtr != ULOC_KEYWORD_SEPARATOR) |
| ++icuCanonStringPtr; |
| if (*icuCanonStringPtr != 0) { |
| // the canonicalized string has keywords |
| // delete the variant tag in the original string (and other trailing '_' or '-') |
| *variantTag-- = 0; |
| while (*variantTag == '_') |
| *variantTag-- = 0; |
| // delete all of the canonicalized string except the keywords |
| while (*icuCanonStringPtr != 0) |
| *varKeyValueStringPtr++ = *icuCanonStringPtr++; |
| } |
| *varKeyValueStringPtr = 0; |
| } |
| } |
| #endif |
| |
| // 4. Handle special cases of updating region codes, or updating language codes based on |
| // region code. |
| for (specialCasePtr = specialCases; specialCasePtr->reg1 != NULL; specialCasePtr++) { |
| if ( specialCasePtr->lang == NULL || _CheckForTag(inLocaleString, specialCasePtr->lang, 2) ) { |
| // OK, we matched any language specified. Now what needs updating? |
| char * foundTag; |
| |
| if ( isupper(specialCasePtr->update1[0]) ) { |
| // updating a region code |
| if ( ( foundTag = strstr(inLocaleString, specialCasePtr->reg1) ) && !isalnum(foundTag[3]) ) { |
| _CopyReplacementAtPointer(foundTag+1, specialCasePtr->update1); |
| } |
| if ( regionTag && _CheckForTag(regionTag+1, specialCasePtr->reg1 + 1, 2) ) { |
| _CopyReplacementAtPointer(regionTag+1, specialCasePtr->update1); |
| } |
| |
| } else { |
| // updating the language, there will be two choices based on region |
| if ( ( regionTag && _CheckForTag(regionTag+1, specialCasePtr->reg1 + 1, 2) ) || |
| ( ( foundTag = strstr(inLocaleString, specialCasePtr->reg1) ) && !isalnum(foundTag[3]) ) ) { |
| _CopyReplacementAtPointer(inLocaleString, specialCasePtr->update1); |
| } else if ( ( regionTag && _CheckForTag(regionTag+1, specialCasePtr->reg2 + 1, 2) ) || |
| ( ( foundTag = strstr(inLocaleString, specialCasePtr->reg2) ) && !isalnum(foundTag[3]) ) ) { |
| _CopyReplacementAtPointer(inLocaleString, specialCasePtr->update2); |
| } |
| } |
| } |
| } |
| |
| // 5. return pointers if requested. |
| if (langRegSubtagRef != NULL) { |
| *langRegSubtagRef = langRegSubtag; |
| } |
| if (regionTagRef != NULL) { |
| *regionTagRef = regionTag; |
| } |
| } |
| |
| |
| // _RemoveSubstringsIfPresent |
| // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString) |
| // substringList is a list of space-separated substrings to strip if found in localeString |
| static void _RemoveSubstringsIfPresent(char *localeString, const char *substringList) { |
| while (*substringList != 0) { |
| char currentSubstring[kLocaleIdentifierCStringMax]; |
| int substringLength = 0; |
| char * foundSubstring; |
| |
| // copy current substring & get its length |
| while ( isgraph(*substringList) ) { |
| currentSubstring[substringLength++] = *substringList++; |
| } |
| // move to next substring |
| while ( isspace(*substringList) ) { |
| substringList++; |
| } |
| |
| // search for current substring in locale string |
| if (substringLength == 0) |
| continue; |
| currentSubstring[substringLength] = 0; |
| foundSubstring = strstr(localeString, currentSubstring); |
| |
| // if substring is found, delete it |
| if (foundSubstring) { |
| _DeleteCharsAtPointer(foundSubstring, substringLength); |
| } |
| } |
| } |
| |
| |
| // _GetKeyValueString // <1.10> |
| // Removes any key-value string from inLocaleString, puts canonized version in keyValueString |
| |
| static void _GetKeyValueString(char inLocaleString[], char keyValueString[]) { |
| char * inLocalePtr = inLocaleString; |
| |
| while (*inLocalePtr != 0 && *inLocalePtr != ULOC_KEYWORD_SEPARATOR) { |
| inLocalePtr++; |
| } |
| if (*inLocalePtr != 0) { // we found a key-value section |
| char * keyValuePtr = keyValueString; |
| |
| *keyValuePtr = *inLocalePtr; |
| *inLocalePtr = 0; |
| do { |
| if ( *(++inLocalePtr) != ' ' ) { |
| *(++keyValuePtr) = *inLocalePtr; // remove "tolower() for *inLocalePtr" // <1.11> |
| } |
| } while (*inLocalePtr != 0); |
| } else { |
| keyValueString[0] = 0; |
| } |
| } |
| |
| static void _AppendKeyValueString(char inLocaleString[], int locStringMaxLen, char keyValueString[]) { |
| #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX |
| if (keyValueString[0] != 0) { |
| UErrorCode uerr = U_ZERO_ERROR; |
| UEnumeration * uenum = uloc_openKeywords(keyValueString, &uerr); |
| if ( uenum != NULL ) { |
| const char * keyword; |
| int32_t length; |
| char value[ULOC_KEYWORDS_CAPACITY]; // use as max for keyword value |
| while ( U_SUCCESS(uerr) ) { |
| keyword = uenum_next(uenum, &length, &uerr); |
| if ( keyword == NULL ) { |
| break; |
| } |
| length = uloc_getKeywordValue( keyValueString, keyword, value, sizeof(value), &uerr ); |
| length = uloc_setKeywordValue( keyword, value, inLocaleString, locStringMaxLen, &uerr ); |
| } |
| uenum_close(uenum); |
| } |
| } |
| #endif |
| } |
| |
| // __private_extern__ CFStringRef _CFLocaleCreateCanonicalLanguageIdentifierForCFBundle(CFAllocatorRef allocator, CFStringRef localeIdentifier) {} |
| |
| CFStringRef CFLocaleCreateCanonicalLanguageIdentifierFromString(CFAllocatorRef allocator, CFStringRef localeIdentifier) { |
| char inLocaleString[kLocaleIdentifierCStringMax]; |
| CFStringRef outStringRef = NULL; |
| |
| if ( localeIdentifier && CFStringGetCString(localeIdentifier, inLocaleString, sizeof(inLocaleString), kCFStringEncodingASCII) ) { |
| KeyStringToResultString testEntry; |
| KeyStringToResultString * foundEntry; |
| char keyValueString[sizeof(inLocaleString)]; // <1.10> |
| char varKeyValueString[sizeof(inLocaleString)]; // <1.17> |
| |
| _GetKeyValueString(inLocaleString, keyValueString); // <1.10> |
| testEntry.result = NULL; |
| |
| // A. Special case aa_SAAHO, no_BOKMAL, and no_NYNORSK since they are legacy identifiers that don't follow the normal rules (http://unicode.org/cldr/trac/browser/trunk/common/supplemental/supplementalMetadata.xml) |
| |
| testEntry.key = inLocaleString; |
| KeyStringToResultString specialCase = testEntry; |
| foundEntry = &specialCase; |
| |
| if (strncmp("aa_SAAHO", testEntry.key, strlen("aa_SAAHO")) == 0) { |
| foundEntry->result = "ssy"; |
| } else if (strncmp("no_BOKMAL", testEntry.key, strlen("no_BOKMAL")) == 0) { |
| foundEntry->result = "nb"; |
| } else if (strncmp("no_NYNORSK", testEntry.key, strlen("no_NYNORSK")) == 0) { |
| foundEntry->result = "nn"; |
| } else { |
| // B. First check if input string matches an old-style string that has a replacement |
| // (do this before case normalization) |
| foundEntry = (KeyStringToResultString *)bsearch( &testEntry, oldAppleLocaleToCanonical, kNumOldAppleLocaleToCanonical, |
| sizeof(KeyStringToResultString), _CompareTestEntryToTableEntryKey ); |
| } |
| if (foundEntry) { |
| // It does match, so replace old string with new |
| strlcpy(inLocaleString, foundEntry->result, sizeof(inLocaleString)); |
| varKeyValueString[0] = 0; |
| } else { |
| char * langRegSubtag = NULL; |
| char * regionTag = NULL; |
| |
| // C. No match with an old-style string, use input string but update codes, normalize case, etc. |
| _UpdateFullLocaleString(inLocaleString, sizeof(inLocaleString), &langRegSubtag, ®ionTag, varKeyValueString); // <1.10><1.17><1.19> |
| |
| // if the language part already includes a regional variant, then delete any region tag. <1.19> |
| if (langRegSubtag && regionTag) |
| *regionTag = 0; |
| } |
| |
| // D. Now we have an up-to-date locale string, but we need to strip defaults and turn it into a language string |
| |
| // 1. Strip defaults in input string based on initial part of locale string |
| // (mainly to strip default script tag for a language) |
| testEntry.key = inLocaleString; |
| foundEntry = (KeyStringToResultString *)bsearch( &testEntry, localeStringPrefixToDefaults, kNumLocaleStringPrefixToDefaults, |
| sizeof(KeyStringToResultString), _CompareTestEntryPrefixToTableEntryKey ); |
| if (foundEntry) { |
| // The input string begins with a character sequence for which |
| // there are default substrings which should be stripped if present |
| _RemoveSubstringsIfPresent(inLocaleString, foundEntry->result); |
| } |
| |
| // 2. If the string matches a locale string used by Apple as a language string, turn it into a language string |
| testEntry.key = inLocaleString; |
| foundEntry = (KeyStringToResultString *)bsearch( &testEntry, appleLocaleToLanguageString, kNumAppleLocaleToLanguageString, |
| sizeof(KeyStringToResultString), _CompareTestEntryToTableEntryKey ); |
| if (foundEntry) { |
| // it does match |
| strlcpy(inLocaleString, foundEntry->result, sizeof(inLocaleString)); |
| } else { |
| // skip to any region tag or java-type variant |
| char * inLocalePtr = inLocaleString; |
| while (*inLocalePtr != 0 && *inLocalePtr != '_') { |
| inLocalePtr++; |
| } |
| // if there is still a region tag, turn it into a language variant <1.19> |
| if (*inLocalePtr == '_') { |
| // handle 3-digit regions in addition to 2-letter ones |
| char * regionTag = inLocalePtr++; |
| long expectedLength = 0; |
| if ( isalpha(*inLocalePtr) ) { |
| while ( isalpha(*(++inLocalePtr)) ) |
| ; |
| expectedLength = 3; |
| } else if ( isdigit(*inLocalePtr) ) { |
| while ( isdigit(*(++inLocalePtr)) ) |
| ; |
| expectedLength = 4; |
| } |
| *regionTag = (inLocalePtr - regionTag == expectedLength)? '-': 0; |
| } |
| // anything else at/after '_' just gets deleted |
| *inLocalePtr = 0; |
| } |
| |
| // E. Re-append any key-value strings, now canonical // <1.10><1.17> |
| _AppendKeyValueString( inLocaleString, sizeof(inLocaleString), varKeyValueString ); |
| _AppendKeyValueString( inLocaleString, sizeof(inLocaleString), keyValueString ); |
| |
| // All done, return what we came up with. |
| outStringRef = CFStringCreateWithCString(allocator, inLocaleString, kCFStringEncodingASCII); |
| } |
| |
| return outStringRef; |
| } |
| |
| |
| CFStringRef CFLocaleCreateCanonicalLocaleIdentifierFromString(CFAllocatorRef allocator, CFStringRef localeIdentifier) { |
| char inLocaleString[kLocaleIdentifierCStringMax]; |
| CFStringRef outStringRef = NULL; |
| |
| if ( localeIdentifier && CFStringGetCString(localeIdentifier, inLocaleString, sizeof(inLocaleString), kCFStringEncodingASCII) ) { |
| KeyStringToResultString testEntry; |
| KeyStringToResultString * foundEntry; |
| char keyValueString[sizeof(inLocaleString)]; // <1.10> |
| char varKeyValueString[sizeof(inLocaleString)]; // <1.17> |
| |
| _GetKeyValueString(inLocaleString, keyValueString); // <1.10> |
| testEntry.result = NULL; |
| |
| // A. First check if input string matches an old-style Apple string that has a replacement |
| // (do this before case normalization) |
| testEntry.key = inLocaleString; |
| foundEntry = (KeyStringToResultString *)bsearch( &testEntry, oldAppleLocaleToCanonical, kNumOldAppleLocaleToCanonical, |
| sizeof(KeyStringToResultString), _CompareTestEntryToTableEntryKey ); |
| if (foundEntry) { |
| // It does match, so replace old string with new // <1.10> |
| strlcpy(inLocaleString, foundEntry->result, sizeof(inLocaleString)); |
| varKeyValueString[0] = 0; |
| } else { |
| char * langRegSubtag = NULL; |
| char * regionTag = NULL; |
| |
| // B. No match with an old-style string, use input string but update codes, normalize case, etc. |
| _UpdateFullLocaleString(inLocaleString, sizeof(inLocaleString), &langRegSubtag, ®ionTag, varKeyValueString); // <1.10><1.17> |
| |
| |
| // C. Now strip defaults that are implied by other fields. |
| |
| // 1. If an ISO 3166 region tag matches an ISO 3166 regional language variant subtag, strip the latter. |
| if ( langRegSubtag && regionTag && strncmp(langRegSubtag+1, regionTag+1, 2) == 0 ) { |
| _DeleteCharsAtPointer(langRegSubtag, 3); |
| } |
| |
| // 2. Strip defaults in input string based on final region tag in locale string |
| // (mainly for Chinese, to strip -Hans for _CN/_SG, -Hant for _TW/_HK/_MO) |
| if ( regionTag ) { |
| testEntry.key = regionTag; |
| foundEntry = (KeyStringToResultString *)bsearch( &testEntry, localeStringRegionToDefaults, kNumLocaleStringRegionToDefaults, |
| sizeof(KeyStringToResultString), _CompareTestEntryToTableEntryKey ); |
| if (foundEntry) { |
| _RemoveSubstringsIfPresent(inLocaleString, foundEntry->result); |
| } |
| } |
| |
| // 3. Strip defaults in input string based on initial part of locale string |
| // (mainly to strip default script tag for a language) |
| testEntry.key = inLocaleString; |
| foundEntry = (KeyStringToResultString *)bsearch( &testEntry, localeStringPrefixToDefaults, kNumLocaleStringPrefixToDefaults, |
| sizeof(KeyStringToResultString), _CompareTestEntryPrefixToTableEntryKey ); |
| if (foundEntry) { |
| // The input string begins with a character sequence for which |
| // there are default substrings which should be stripped if present |
| _RemoveSubstringsIfPresent(inLocaleString, foundEntry->result); |
| } |
| } |
| |
| // D. Re-append any key-value strings, now canonical // <1.10><1.17> |
| _AppendKeyValueString( inLocaleString, sizeof(inLocaleString), varKeyValueString ); |
| _AppendKeyValueString( inLocaleString, sizeof(inLocaleString), keyValueString ); |
| |
| // Now create the CFString (even if empty!) |
| outStringRef = CFStringCreateWithCString(allocator, inLocaleString, kCFStringEncodingASCII); |
| } |
| |
| return outStringRef; |
| } |
| |
| // CFLocaleCreateCanonicalLocaleIdentifierFromScriptManagerCodes, based on |
| // the first part of the SPI CFBundleCopyLocalizationForLocalizationInfo in CFBundle_Resources.c |
| CFStringRef CFLocaleCreateCanonicalLocaleIdentifierFromScriptManagerCodes(CFAllocatorRef allocator, LangCode lcode, RegionCode rcode) { |
| CFStringRef result = NULL; |
| if (0 <= rcode && rcode < kNumRegionCodeToLocaleString) { |
| const char *localeString = regionCodeToLocaleString[rcode]; |
| if (localeString != NULL && *localeString != '\0') { |
| result = CFStringCreateWithCStringNoCopy(allocator, localeString, kCFStringEncodingASCII, kCFAllocatorNull); |
| } |
| } |
| if (result) return result; |
| if (0 <= lcode && lcode < kNumLangCodeToLocaleString) { |
| const char *localeString = langCodeToLocaleString[lcode]; |
| if (localeString != NULL && *localeString != '\0') { |
| result = CFStringCreateWithCStringNoCopy(allocator, localeString, kCFStringEncodingASCII, kCFAllocatorNull); |
| } |
| } |
| return result; |
| } |
| |
| |
| /* |
| SPI: CFLocaleGetLanguageRegionEncodingForLocaleIdentifier gets the appropriate language and region codes, |
| and the default legacy script code and encoding, for the specified locale (or language) string. |
| Returns false if CFLocale has no information about the given locale (in which case none of the by-reference return values are set); |
| otherwise may set *langCode and/or *regCode to -1 if there is no appropriate legacy value for the locale. |
| This is a replacement for the CFBundle SPI CFBundleGetLocalizationInfoForLocalization (which was intended to be temporary and transitional); |
| this function is more up-to-date in its handling of locale strings, and is in CFLocale where this functionality should belong. Compared |
| to CFBundleGetLocalizationInfoForLocalization, this function does not spcially interpret a NULL localeIdentifier to mean use the single most |
| preferred localization in the current context (this function returns NO for a NULL localeIdentifier); and in this function |
| langCode, regCode, and scriptCode are all SInt16* (not SInt32* like the equivalent parameters in CFBundleGetLocalizationInfoForLocalization). |
| */ |
| #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX |
| static int CompareLocaleToLegacyCodesEntries( const void *entry1, const void *entry2 ); |
| #endif |
| |
| Boolean CFLocaleGetLanguageRegionEncodingForLocaleIdentifier(CFStringRef localeIdentifier, LangCode *langCode, RegionCode *regCode, ScriptCode *scriptCode, CFStringEncoding *stringEncoding) { |
| #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX |
| Boolean returnValue = false; |
| CFStringRef canonicalIdentifier = CFLocaleCreateCanonicalLocaleIdentifierFromString(NULL, localeIdentifier); |
| if (canonicalIdentifier) { |
| char localeCString[kLocaleIdentifierCStringMax]; |
| if ( CFStringGetCString(canonicalIdentifier, localeCString, sizeof(localeCString), kCFStringEncodingASCII) ) { |
| UErrorCode icuStatus = U_ZERO_ERROR; |
| int32_t languagelength; |
| char searchString[ULOC_LANG_CAPACITY + ULOC_FULLNAME_CAPACITY]; |
| |
| languagelength = uloc_getLanguage( localeCString, searchString, ULOC_LANG_CAPACITY, &icuStatus ); |
| if ( U_SUCCESS(icuStatus) && languagelength > 0 ) { |
| // OK, here we have at least a language code, check for other components in order |
| LocaleToLegacyCodes searchEntry = { (const char *)searchString, 0, 0, 0 }; |
| const LocaleToLegacyCodes * foundEntryPtr; |
| int32_t componentLength; |
| char componentString[ULOC_FULLNAME_CAPACITY]; |
| |
| languagelength = strlen(searchString); // in case it got truncated |
| icuStatus = U_ZERO_ERROR; |
| componentLength = uloc_getScript( localeCString, componentString, sizeof(componentString), &icuStatus ); |
| if ( U_FAILURE(icuStatus) || componentLength == 0 ) { |
| icuStatus = U_ZERO_ERROR; |
| componentLength = uloc_getCountry( localeCString, componentString, sizeof(componentString), &icuStatus ); |
| if ( U_FAILURE(icuStatus) || componentLength == 0 ) { |
| icuStatus = U_ZERO_ERROR; |
| componentLength = uloc_getVariant( localeCString, componentString, sizeof(componentString), &icuStatus ); |
| if ( U_FAILURE(icuStatus) ) { |
| componentLength = 0; |
| } |
| } |
| } |
| |
| // Append whichever other component we first found |
| if (componentLength > 0) { |
| strlcat(searchString, "_", sizeof(searchString)); |
| strlcat(searchString, componentString, sizeof(searchString)); |
| } |
| |
| // Search |
| foundEntryPtr = (const LocaleToLegacyCodes *)bsearch( &searchEntry, localeToLegacyCodes, kNumLocaleToLegacyCodes, sizeof(LocaleToLegacyCodes), CompareLocaleToLegacyCodesEntries ); |
| if (foundEntryPtr == NULL && (int32_t) strlen(searchString) > languagelength) { |
| // truncate to language al;one and try again |
| searchString[languagelength] = 0; |
| foundEntryPtr = (const LocaleToLegacyCodes *)bsearch( &searchEntry, localeToLegacyCodes, kNumLocaleToLegacyCodes, sizeof(LocaleToLegacyCodes), CompareLocaleToLegacyCodesEntries ); |
| } |
| |
| // If found a matching entry, return requested values |
| if (foundEntryPtr) { |
| returnValue = true; |
| if (langCode) *langCode = foundEntryPtr->langCode; |
| if (regCode) *regCode = foundEntryPtr->regCode; |
| if (stringEncoding) *stringEncoding = foundEntryPtr->encoding; |
| if (scriptCode) { |
| // map CFStringEncoding to ScriptCode |
| if (foundEntryPtr->encoding < 33/*kCFStringEncodingMacSymbol*/) { |
| *scriptCode = foundEntryPtr->encoding; |
| } else { |
| switch (foundEntryPtr->encoding) { |
| case 0x8C/*kCFStringEncodingMacFarsi*/: *scriptCode = 4/*smArabic*/; break; |
| case 0x98/*kCFStringEncodingMacUkrainian*/: *scriptCode = 7/*smCyrillic*/; break; |
| case 0xEC/*kCFStringEncodingMacInuit*/: *scriptCode = 28/*smEthiopic*/; break; |
| case 0xFC/*kCFStringEncodingMacVT100*/: *scriptCode = 32/*smUninterp*/; break; |
| default: *scriptCode = 0/*smRoman*/; break; |
| } |
| } |
| } |
| } |
| } |
| } |
| CFRelease(canonicalIdentifier); |
| } |
| return returnValue; |
| #else |
| return false; |
| #endif |
| } |
| |
| #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX |
| static int CompareLocaleToLegacyCodesEntries( const void *entry1, const void *entry2 ) { |
| const char * localeString1 = ((const LocaleToLegacyCodes *)entry1)->locale; |
| const char * localeString2 = ((const LocaleToLegacyCodes *)entry2)->locale; |
| return strcmp(localeString1, localeString2); |
| } |
| #endif |
| |
| CFDictionaryRef CFLocaleCreateComponentsFromLocaleIdentifier(CFAllocatorRef allocator, CFStringRef localeID) { |
| CFMutableDictionaryRef working = CFDictionaryCreateMutable(allocator, 10, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks); |
| #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX |
| char cLocaleID[ULOC_FULLNAME_CAPACITY+ULOC_KEYWORD_AND_VALUES_CAPACITY]; |
| char buffer[ULOC_FULLNAME_CAPACITY+ULOC_KEYWORD_AND_VALUES_CAPACITY]; |
| |
| UErrorCode icuStatus = U_ZERO_ERROR; |
| int32_t length = 0; |
| |
| if (!localeID) goto out; |
| |
| // Extract the C string locale ID, for ICU |
| CFIndex outBytes = 0; |
| CFStringGetBytes(localeID, CFRangeMake(0, CFStringGetLength(localeID)), kCFStringEncodingASCII, (UInt8) '?', true, (unsigned char *)cLocaleID, sizeof(cLocaleID)/sizeof(char) - 1, &outBytes); |
| cLocaleID[outBytes] = '\0'; |
| |
| // Get the components |
| length = uloc_getLanguage(cLocaleID, buffer, sizeof(buffer)/sizeof(char), &icuStatus); |
| if (U_SUCCESS(icuStatus) && length > 0) |
| { |
| CFStringRef string = CFStringCreateWithBytes(allocator, (UInt8 *)buffer, length, kCFStringEncodingASCII, true); |
| CFDictionaryAddValue(working, kCFLocaleLanguageCodeKey, string); |
| CFRelease(string); |
| } |
| icuStatus = U_ZERO_ERROR; |
| |
| length = uloc_getScript(cLocaleID, buffer, sizeof(buffer)/sizeof(char), &icuStatus); |
| if (U_SUCCESS(icuStatus) && length > 0) |
| { |
| CFStringRef string = CFStringCreateWithBytes(allocator, (UInt8 *)buffer, length, kCFStringEncodingASCII, true); |
| CFDictionaryAddValue(working, kCFLocaleScriptCodeKey, string); |
| CFRelease(string); |
| } |
| icuStatus = U_ZERO_ERROR; |
| |
| length = uloc_getCountry(cLocaleID, buffer, sizeof(buffer)/sizeof(char), &icuStatus); |
| if (U_SUCCESS(icuStatus) && length > 0) |
| { |
| CFStringRef string = CFStringCreateWithBytes(allocator, (UInt8 *)buffer, length, kCFStringEncodingASCII, true); |
| CFDictionaryAddValue(working, kCFLocaleCountryCodeKey, string); |
| CFRelease(string); |
| } |
| icuStatus = U_ZERO_ERROR; |
| |
| length = uloc_getVariant(cLocaleID, buffer, sizeof(buffer)/sizeof(char), &icuStatus); |
| if (U_SUCCESS(icuStatus) && length > 0) |
| { |
| CFStringRef string = CFStringCreateWithBytes(allocator, (UInt8 *)buffer, length, kCFStringEncodingASCII, true); |
| CFDictionaryAddValue(working, kCFLocaleVariantCodeKey, string); |
| CFRelease(string); |
| } |
| icuStatus = U_ZERO_ERROR; |
| |
| // Now get the keywords; open an enumerator on them |
| UEnumeration *iter = uloc_openKeywords(cLocaleID, &icuStatus); |
| const char *locKey = NULL; |
| int32_t locKeyLen = 0; |
| while ((locKey = uenum_next(iter, &locKeyLen, &icuStatus)) && U_SUCCESS(icuStatus)) |
| { |
| char locValue[ULOC_KEYWORD_AND_VALUES_CAPACITY]; |
| |
| // Get the value for this keyword |
| if (uloc_getKeywordValue(cLocaleID, locKey, locValue, sizeof(locValue)/sizeof(char), &icuStatus) > 0 |
| && U_SUCCESS(icuStatus)) |
| { |
| CFStringRef key = CFStringCreateWithBytes(allocator, (UInt8 *)locKey, strlen(locKey), kCFStringEncodingASCII, true); |
| CFStringRef value = CFStringCreateWithBytes(allocator, (UInt8 *)locValue, strlen(locValue), kCFStringEncodingASCII, true); |
| if (key && value) |
| CFDictionaryAddValue(working, key, value); |
| if (key) |
| CFRelease(key); |
| if (value) |
| CFRelease(value); |
| } |
| } |
| uenum_close(iter); |
| |
| out:; |
| #endif |
| // Convert to an immutable dictionary and return |
| CFDictionaryRef result = CFDictionaryCreateCopy(allocator, working); |
| CFRelease(working); |
| return result; |
| } |
| |
| static char *__CStringFromString(CFStringRef str) { |
| if (!str) return NULL; |
| CFRange rg = CFRangeMake(0, CFStringGetLength(str)); |
| CFIndex neededLength = 0; |
| CFStringGetBytes(str, rg, kCFStringEncodingASCII, (UInt8)'?', false, NULL, 0, &neededLength); |
| char *buf = (char *)malloc(neededLength + 1); |
| CFStringGetBytes(str, rg, kCFStringEncodingASCII, (UInt8)'?', false, (uint8_t *)buf, neededLength, &neededLength); |
| buf[neededLength] = '\0'; |
| return buf; |
| } |
| |
| CFStringRef CFLocaleCreateLocaleIdentifierFromComponents(CFAllocatorRef allocator, CFDictionaryRef dictionary) { |
| if (!dictionary) return NULL; |
| |
| CFIndex cnt = CFDictionaryGetCount(dictionary); |
| STACK_BUFFER_DECL(CFStringRef, values, cnt); |
| STACK_BUFFER_DECL(CFStringRef, keys, cnt); |
| CFDictionaryGetKeysAndValues(dictionary, (const void **)keys, (const void **)values); |
| |
| char *language = NULL, *script = NULL, *country = NULL, *variant = NULL; |
| for (CFIndex idx = 0; idx < cnt; idx++) { |
| if (CFEqual(kCFLocaleLanguageCodeKey, keys[idx])) { |
| language = __CStringFromString(values[idx]); |
| keys[idx] = NULL; |
| } else if (CFEqual(kCFLocaleScriptCodeKey, keys[idx])) { |
| script = __CStringFromString(values[idx]); |
| keys[idx] = NULL; |
| } else if (CFEqual(kCFLocaleCountryCodeKey, keys[idx])) { |
| country = __CStringFromString(values[idx]); |
| keys[idx] = NULL; |
| } else if (CFEqual(kCFLocaleVariantCodeKey, keys[idx])) { |
| variant = __CStringFromString(values[idx]); |
| keys[idx] = NULL; |
| } |
| } |
| |
| char *buf1 = NULL; // (|L)(|_S)(|_C|_C_V|__V) |
| asprintf(&buf1, "%s%s%s%s%s%s%s", language ? language : "", script ? "_" : "", script ? script : "", (country || variant ? "_" : ""), country ? country : "", variant ? "_" : "", variant ? variant : ""); |
| |
| char cLocaleID[2 * ULOC_FULLNAME_CAPACITY + 2 * ULOC_KEYWORD_AND_VALUES_CAPACITY]; |
| strlcpy(cLocaleID, buf1, sizeof(cLocaleID)); |
| free(language); |
| free(script); |
| free(country); |
| free(variant); |
| free(buf1); |
| |
| #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX |
| for (CFIndex idx = 0; idx < cnt; idx++) { |
| if (keys[idx]) { |
| char *key = __CStringFromString(keys[idx]); |
| char *value; |
| if (0 == strcmp(key, "kCFLocaleCalendarKey")) { |
| // For interchangeability convenience, we alternatively allow a |
| // calendar object to be passed in, with the alternate key, and |
| // we'll extract the identifier. |
| CFCalendarRef cal = (CFCalendarRef)values[idx]; |
| CFStringRef ident = CFCalendarGetIdentifier(cal); |
| value = __CStringFromString(ident); |
| char *oldkey = key; |
| key = strdup("calendar"); |
| free(oldkey); |
| } else { |
| value = __CStringFromString(values[idx]); |
| } |
| UErrorCode status = U_ZERO_ERROR; |
| uloc_setKeywordValue(key, value, cLocaleID, sizeof(cLocaleID), &status); |
| free(key); |
| free(value); |
| } |
| } |
| #endif |
| |
| return CFStringCreateWithCString(allocator, cLocaleID, kCFStringEncodingASCII); |
| } |
| |