/** @file | |
Language Library implementation that provides functions for language conversion | |
between ISO 639-2 and RFC 4646 language codes. | |
Copyright (c) 2009 - 2010, Intel Corporation. All rights reserved.<BR> | |
This program and the accompanying materials | |
are licensed and made available under the terms and conditions of the BSD License | |
which accompanies this distribution. The full text of the license may be found at | |
http://opensource.org/licenses/bsd-license.php | |
THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, | |
WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. | |
**/ | |
#include <Uefi.h> | |
#include <Library/LanguageLib.h> | |
#include <Library/BaseLib.h> | |
#include <Library/DebugLib.h> | |
#include <Library/MemoryAllocationLib.h> | |
// | |
// Lookup table of ISO639-2 3 character language codes to ISO 639-1 2 character language codes | |
// Each entry is 5 CHAR8 values long. The first 3 CHAR8 values are the ISO 639-2 code. | |
// The last 2 CHAR8 values are the ISO 639-1 code. | |
// | |
// ISO 639-2 B codes and deprecated ISO 639-1 codes are not supported. | |
// | |
// Commonly used language codes such as English and French are put in the front of the table for quick match. | |
// | |
GLOBAL_REMOVE_IF_UNREFERENCED CONST CHAR8 mIso639ToRfc4646ConversionTable[] = | |
"\ | |
engen\ | |
frafr\ | |
aaraa\ | |
abkab\ | |
aveae\ | |
afraf\ | |
akaak\ | |
amham\ | |
argan\ | |
araar\ | |
asmas\ | |
avaav\ | |
aymay\ | |
azeaz\ | |
bakba\ | |
belbe\ | |
bulbg\ | |
bihbh\ | |
bisbi\ | |
bambm\ | |
benbn\ | |
bodbo\ | |
brebr\ | |
bosbs\ | |
catca\ | |
chece\ | |
chach\ | |
cosco\ | |
crecr\ | |
cescs\ | |
chucu\ | |
chvcv\ | |
cymcy\ | |
danda\ | |
deude\ | |
divdv\ | |
dzodz\ | |
eweee\ | |
ellel\ | |
epoeo\ | |
spaes\ | |
estet\ | |
euseu\ | |
fasfa\ | |
fulff\ | |
finfi\ | |
fijfj\ | |
faofo\ | |
fryfy\ | |
glega\ | |
glagd\ | |
glggl\ | |
grngn\ | |
gujgu\ | |
glvgv\ | |
hauha\ | |
hebhe\ | |
hinhi\ | |
hmoho\ | |
hrvhr\ | |
hatht\ | |
hunhu\ | |
hyehy\ | |
herhz\ | |
inaia\ | |
indid\ | |
ileie\ | |
iboig\ | |
iiiii\ | |
ipkik\ | |
idoio\ | |
islis\ | |
itait\ | |
ikuiu\ | |
jpnja\ | |
javjv\ | |
katka\ | |
konkg\ | |
kikki\ | |
kuakj\ | |
kazkk\ | |
kalkl\ | |
khmkm\ | |
kankn\ | |
korko\ | |
kaukr\ | |
kasks\ | |
kurku\ | |
komkv\ | |
corkw\ | |
kirky\ | |
latla\ | |
ltzlb\ | |
luglg\ | |
limli\ | |
linln\ | |
laolo\ | |
litlt\ | |
lublu\ | |
lavlv\ | |
mlgmg\ | |
mahmh\ | |
mrimi\ | |
mkdmk\ | |
malml\ | |
monmn\ | |
marmr\ | |
msams\ | |
mltmt\ | |
myamy\ | |
nauna\ | |
nobnb\ | |
ndend\ | |
nepne\ | |
ndong\ | |
nldnl\ | |
nnonn\ | |
norno\ | |
nblnr\ | |
navnv\ | |
nyany\ | |
ocioc\ | |
ojioj\ | |
ormom\ | |
orior\ | |
ossos\ | |
panpa\ | |
plipi\ | |
polpl\ | |
pusps\ | |
porpt\ | |
quequ\ | |
rohrm\ | |
runrn\ | |
ronro\ | |
rusru\ | |
kinrw\ | |
sansa\ | |
srdsc\ | |
sndsd\ | |
smese\ | |
sagsg\ | |
sinsi\ | |
slksk\ | |
slvsl\ | |
smosm\ | |
snasn\ | |
somso\ | |
sqisq\ | |
srpsr\ | |
sswss\ | |
sotst\ | |
sunsu\ | |
swesv\ | |
swasw\ | |
tamta\ | |
telte\ | |
tgktg\ | |
thath\ | |
tirti\ | |
tuktk\ | |
tgltl\ | |
tsntn\ | |
tonto\ | |
turtr\ | |
tsots\ | |
tattt\ | |
twitw\ | |
tahty\ | |
uigug\ | |
ukruk\ | |
urdur\ | |
uzbuz\ | |
venve\ | |
vievi\ | |
volvo\ | |
wlnwa\ | |
wolwo\ | |
xhoxh\ | |
yidyi\ | |
yoryo\ | |
zhaza\ | |
zhozh\ | |
zulzu\ | |
"; | |
/** | |
Converts upper case ASCII characters in an ASCII string to lower case ASCII | |
characters in an ASCII string. | |
If a an ASCII character in Source is in the range 'A'..'Z', then it is converted | |
to an ASCII character in the range 'a'..'z' in Destination. Otherwise, no | |
conversion is performed. Length ASCII characters from Source are convertered and | |
stored in Destination. | |
@param Destination An ASCII string to store the results of the conversion. | |
@param Source The source ASCII string of the conversion. | |
@param Length The number of ASCII characters to convert. | |
**/ | |
VOID | |
EFIAPI | |
InternalLanguageLibToLower ( | |
OUT CHAR8 *Destination, | |
IN CONST CHAR8 *Source, | |
IN UINTN Length | |
) | |
{ | |
for (; Length > 0; Length--, Destination++, Source++) { | |
*Destination = (CHAR8)((*Source >= 'A' && *Source <= 'Z') ? *Source + ('a' - 'A') : *Source); | |
} | |
} | |
/** | |
Convert an ISO 639-2 language code to a RFC 4646 language code. | |
If the ISO 639-2 language code has a corresponding ISO 639-1 code, then the ISO 639-1 | |
code is returned. Else the original ISO 639-2 code is returned. The returned RFC 4646 | |
language code is composed of only a primary language subtag. | |
If Iso639Language is NULL, then ASSERT. | |
If Rfc4646Language is NULL, then ASSERT. | |
@param[out] Rfc4646Language Pointers to a buffer large enough for an ASCII string | |
which reprsents a RFC 4646 language code containging only | |
either a ISO 639-1 or ISO 639-2 primary language subtag. | |
This string is Null-terminated. | |
@param[in] Iso639Language Pointer to a 3-letter ASCII string which represents | |
an ISO 639-2 language code. This string is not required | |
to be Null-terminated. | |
@retval TRUE The ISO 639-2 language code was converted to a ISO 639-1 code. | |
@retval FALSE The language code does not have corresponding ISO 639-1 code. | |
**/ | |
BOOLEAN | |
EFIAPI | |
ConvertIso639ToRfc4646 ( | |
OUT CHAR8 *Rfc4646Language, | |
IN CONST CHAR8 *Iso639Language | |
) | |
{ | |
CONST CHAR8 *Match; | |
ASSERT (Iso639Language != NULL); | |
ASSERT (Rfc4646Language != NULL); | |
// | |
// Convert first 3 characters of Iso639Language to lower case ASCII characters in Rfc4646Language | |
// | |
InternalLanguageLibToLower (Rfc4646Language, Iso639Language, 3); | |
Rfc4646Language[3] = '\0'; | |
Match = mIso639ToRfc4646ConversionTable; | |
do { | |
Match = AsciiStrStr (Match, Rfc4646Language); | |
if (Match == NULL) { | |
return FALSE; | |
} | |
if (((Match - mIso639ToRfc4646ConversionTable) % 5) == 0) { | |
break; | |
} | |
++Match; | |
} while (TRUE); | |
Rfc4646Language[0] = Match[3]; | |
Rfc4646Language[1] = Match[4]; | |
Rfc4646Language[2] = '\0'; | |
return TRUE; | |
} | |
/** | |
Convert a RFC 4646 language code to an ISO 639-2 language code. The primary language | |
subtag of the RFC 4646 code must be either an ISO 639-1 or 639-2 code. If the primary | |
language subtag is an ISO 639-1 code, then it is converted to its corresponding ISO 639-2 | |
code (T code if applies). Else the ISO 639-2 code is returned. | |
If Rfc4646Language is NULL, then ASSERT. | |
If Iso639Language is NULL, then ASSERT. | |
@param[out] Iso639Language Pointers to a buffer large enough for a 3-letter ASCII string | |
which reprsents an ISO 639-2 language code. The string is Null-terminated. | |
@param[in] Rfc4646Language Pointer to a RFC 4646 language code string. This string is terminated | |
by a NULL or a ';' character. | |
@retval TRUE Language code converted successfully. | |
@retval FALSE The RFC 4646 language code is invalid or unsupported. | |
**/ | |
BOOLEAN | |
EFIAPI | |
ConvertRfc4646ToIso639 ( | |
OUT CHAR8 *Iso639Language, | |
IN CONST CHAR8 *Rfc4646Language | |
) | |
{ | |
CONST CHAR8 *Match; | |
ASSERT (Rfc4646Language != NULL); | |
ASSERT (Iso639Language != NULL); | |
// | |
// RFC 4646 language code check before determining | |
// if the primary language subtag is ISO 639-1 or 639-2 code | |
// | |
if (Rfc4646Language[0] == '\0' || Rfc4646Language[1] == '\0') { | |
return FALSE; | |
} | |
// | |
// Check if the primary language subtag is ISO 639-1 code | |
// | |
if (Rfc4646Language[2] == ';' || Rfc4646Language[2] == '-' || Rfc4646Language[2] == '\0') { | |
// | |
// Convert first 2 characters of Rfc4646Language to lower case ASCII characters in Iso639Language | |
// | |
InternalLanguageLibToLower (Iso639Language, Rfc4646Language, 2); | |
// | |
// Convert ISO 639-1 code to ISO 639-2 code | |
// | |
Iso639Language[2] = '\0'; | |
Match = mIso639ToRfc4646ConversionTable; | |
do { | |
Match = AsciiStrStr (Match, Iso639Language); | |
if (Match == NULL) { | |
return FALSE; | |
} | |
if (((Match - mIso639ToRfc4646ConversionTable) % 5) == 3) { | |
break; | |
} | |
++Match; | |
} while (TRUE); | |
Rfc4646Language = Match - 3; | |
} else if (!(Rfc4646Language[3] == ';' || Rfc4646Language[3] == '-' || Rfc4646Language[3] == '\0')) { | |
return FALSE; | |
} | |
Iso639Language[0] = Rfc4646Language[0]; | |
Iso639Language[1] = Rfc4646Language[1]; | |
Iso639Language[2] = Rfc4646Language[2]; | |
Iso639Language[3] = '\0'; | |
return TRUE; | |
} | |
/** | |
Convert ISO 639-2 language codes to RFC 4646 codes and return the converted codes. | |
Caller is responsible for freeing the allocated buffer. | |
If Iso639Languages is NULL, then ASSERT. | |
@param[in] Iso639Languages Pointers to a Null-terminated ISO 639-2 language codes string containing | |
one or more ISO 639-2 3-letter language codes. | |
@retval NULL Invalid ISO 639-2 language code found. | |
@retval NULL Out of memory. | |
@return Pointer to the allocate buffer containing the Null-terminated converted language codes string. | |
This string is composed of one or more RFC4646 language codes each of which has only | |
ISO 639-1 2-letter primary language subtag. | |
**/ | |
CHAR8 * | |
EFIAPI | |
ConvertLanguagesIso639ToRfc4646 ( | |
IN CONST CHAR8 *Iso639Languages | |
) | |
{ | |
UINTN Length; | |
UINTN Iso639Index; | |
UINTN Rfc4646Index; | |
CHAR8 *Rfc4646Languages; | |
ASSERT (Iso639Languages != NULL); | |
// | |
// The length of ISO 639-2 lanugage codes string must be multiple of 3 | |
// | |
Length = AsciiStrLen (Iso639Languages); | |
if (Length % 3 != 0) { | |
return NULL; | |
} | |
// | |
// Allocate buffer for RFC 4646 language codes string | |
// | |
Rfc4646Languages = AllocatePool (Length + (Length / 3)); | |
if (Rfc4646Languages == NULL) { | |
return NULL; | |
} | |
for (Iso639Index = 0, Rfc4646Index = 0; Iso639Languages[Iso639Index] != '\0'; Iso639Index += 3) { | |
if (ConvertIso639ToRfc4646 (&Rfc4646Languages[Rfc4646Index], &Iso639Languages[Iso639Index])) { | |
Rfc4646Index += 2; | |
} else { | |
Rfc4646Index += 3; | |
} | |
Rfc4646Languages[Rfc4646Index++] = ';'; | |
} | |
Rfc4646Languages[Rfc4646Index - 1] = '\0'; | |
return Rfc4646Languages; | |
} | |
/** | |
Convert RFC 4646 language codes to ISO 639-2 codes and return the converted codes. | |
The primary language subtag of the RFC 4646 code must be either an ISO 639-1 or 639-2 code. | |
Caller is responsible for freeing the allocated buffer. | |
If Rfc4646Languages is NULL, then ASSERT. | |
@param[in] Rfc4646Languages Pointers to a Null-terminated RFC 4646 language codes string containing | |
one or more RFC 4646 language codes. | |
@retval NULL Invalid or unsupported RFC 4646 language code found. | |
@retval NULL Out of memory. | |
@return Pointer to the allocate buffer containing the Null-terminated converted language codes string. | |
This string is composed of one or more ISO 639-2 language codes. | |
**/ | |
CHAR8 * | |
EFIAPI | |
ConvertLanguagesRfc4646ToIso639 ( | |
IN CONST CHAR8 *Rfc4646Languages | |
) | |
{ | |
UINTN NumLanguages; | |
UINTN Iso639Index; | |
UINTN Rfc4646Index; | |
CHAR8 *Iso639Languages; | |
ASSERT (Rfc4646Languages != NULL); | |
// | |
// Determine the number of languages in the RFC 4646 language codes string | |
// | |
for (Rfc4646Index = 0, NumLanguages = 1; Rfc4646Languages[Rfc4646Index] != '\0'; Rfc4646Index++) { | |
if (Rfc4646Languages[Rfc4646Index] == ';') { | |
NumLanguages++; | |
} | |
} | |
// | |
// Allocate buffer for ISO 639-2 language codes string | |
// | |
Iso639Languages = AllocateZeroPool (NumLanguages * 3 + 1); | |
if (Iso639Languages == NULL) { | |
return NULL; | |
} | |
// | |
// Do the conversion for each RFC 4646 language code | |
// | |
for (Rfc4646Index = 0, Iso639Index = 0; Rfc4646Languages[Rfc4646Index] != '\0';) { | |
if (ConvertRfc4646ToIso639 (&Iso639Languages[Iso639Index], &Rfc4646Languages[Rfc4646Index])) { | |
Iso639Index += 3; | |
} else { | |
FreePool (Iso639Languages); | |
return NULL; | |
} | |
// | |
// Locate next language code | |
// | |
while (Rfc4646Languages[Rfc4646Index] != ';' && Rfc4646Languages[Rfc4646Index] != '\0') { | |
Rfc4646Index++; | |
} | |
if (Rfc4646Languages[Rfc4646Index] == ';') { | |
Rfc4646Index++; | |
} | |
} | |
Iso639Languages[Iso639Index] = '\0'; | |
return Iso639Languages; | |
} |