language/internal: export tag types and rename fields
Change-Id: I5c8a590d2fdeeb5aa223c821751455c2f3eb9d9b
Reviewed-on: https://go-review.googlesource.com/95818
Run-TryBot: Marcel van Lohuizen <mpvl@golang.org>
Reviewed-by: Nigel Tao <nigeltao@golang.org>
diff --git a/language/internal/common.go b/language/internal/common.go
index 9d86e18..cdfdb74 100644
--- a/language/internal/common.go
+++ b/language/internal/common.go
@@ -4,13 +4,13 @@
// This file contains code common to the maketables.go and the package code.
-// langAliasType is the type of an alias in langAliasMap.
-type langAliasType int8
+// AliasType is the type of an alias in AliasMap.
+type AliasType int8
const (
- langDeprecated langAliasType = iota
- langMacro
- langLegacy
+ Deprecated AliasType = iota
+ Macro
+ Legacy
- langAliasTypeUnknown langAliasType = -1
+ AliasTypeUnknown AliasType = -1
)
diff --git a/language/internal/coverage.go b/language/internal/coverage.go
index 2ee499e..9b20b88 100644
--- a/language/internal/coverage.go
+++ b/language/internal/coverage.go
@@ -6,19 +6,19 @@
// BaseLanguages returns the list of all supported base languages. It generates
// the list by traversing the internal structures.
-func BaseLanguages() []langID {
- base := make([]langID, 0, numLanguages)
+func BaseLanguages() []Language {
+ base := make([]Language, 0, NumLanguages)
for i := 0; i < langNoIndexOffset; i++ {
// We included "und" already for the value 0.
if i != nonCanonicalUnd {
- base = append(base, langID(i))
+ base = append(base, Language(i))
}
}
i := langNoIndexOffset
for _, v := range langNoIndex {
for k := 0; k < 8; k++ {
if v&1 == 1 {
- base = append(base, langID(i))
+ base = append(base, Language(i))
}
v >>= 1
i++
diff --git a/language/internal/gen.go b/language/internal/gen.go
index 4d99097..b4717b9 100644
--- a/language/internal/gen.go
+++ b/language/internal/gen.go
@@ -114,13 +114,6 @@
likelyScript is a lookup table, indexed by scriptID, for the most likely
languages and regions given a script.`,
`
-matchLang holds pairs of langIDs of base languages that are typically
-mutually intelligible. Each pair is associated with a confidence and
-whether the intelligibility goes one or both ways.`,
- `
-matchScript holds pairs of scriptIDs where readers of one script
-can typically also read the other. Each is associated with a confidence.`,
- `
nRegionGroups is the number of region groups.`,
`
regionInclusion maps region identifiers to sets of regions in regionInclusionBits,
@@ -665,9 +658,9 @@
b.langNoIndex.remove(s)
}
}
- b.writeConst("numLanguages", len(b.lang.slice())+len(b.langNoIndex.slice()))
- b.writeConst("numScripts", len(b.script.slice()))
- b.writeConst("numRegions", len(b.region.slice()))
+ b.writeConst("NumLanguages", len(b.lang.slice())+len(b.langNoIndex.slice()))
+ b.writeConst("NumScripts", len(b.script.slice()))
+ b.writeConst("NumRegions", len(b.region.slice()))
// Add dummy codes at the start of each list to represent "unspecified".
b.lang.add("---")
@@ -729,7 +722,7 @@
// Get language codes that need to be mapped (overlong 3-letter codes,
// deprecated 2-letter codes, legacy and grandfathered tags.)
langAliasMap := stringSet{}
- aliasTypeMap := map[string]langAliasType{}
+ aliasTypeMap := map[string]AliasType{}
// altLangISO3 get the alternative ISO3 names that need to be mapped.
altLangISO3 := stringSet{}
@@ -751,7 +744,7 @@
} else if len(a.Type) <= 3 {
switch a.Reason {
case "macrolanguage":
- aliasTypeMap[a.Type] = langMacro
+ aliasTypeMap[a.Type] = Macro
case "deprecated":
// handled elsewhere
continue
@@ -759,7 +752,7 @@
if a.Type == "no" {
continue
}
- aliasTypeMap[a.Type] = langLegacy
+ aliasTypeMap[a.Type] = Legacy
default:
log.Fatalf("new %s alias: %s", a.Reason, a.Type)
}
@@ -771,14 +764,14 @@
// This can be removed if CLDR adopts this change.
langAliasMap.add("nb")
langAliasMap.updateLater("nb", "no")
- aliasTypeMap["nb"] = langMacro
+ aliasTypeMap["nb"] = Macro
for k, v := range b.registry {
// Also add deprecated values for 3-letter ISO codes, which CLDR omits.
if v.typ == "language" && v.deprecated != "" && v.preferred != "" {
langAliasMap.add(k)
langAliasMap.updateLater(k, v.preferred)
- aliasTypeMap[k] = langDeprecated
+ aliasTypeMap[k] = Deprecated
}
}
// Fix CLDR mappings.
@@ -845,7 +838,7 @@
b.writeSlice("altLangIndex", altLangIndex)
b.writeSortedMap("langAliasMap", &langAliasMap, b.langIndex)
- types := make([]langAliasType, len(langAliasMap.s))
+ types := make([]AliasType, len(langAliasMap.s))
for i, s := range langAliasMap.s {
types[i] = aliasTypeMap[s]
}
diff --git a/language/internal/gen_common.go b/language/internal/gen_common.go
index 83ce180..c419cee 100644
--- a/language/internal/gen_common.go
+++ b/language/internal/gen_common.go
@@ -8,13 +8,13 @@
// This file contains code common to the maketables.go and the package code.
-// langAliasType is the type of an alias in langAliasMap.
-type langAliasType int8
+// AliasType is the type of an alias in AliasMap.
+type AliasType int8
const (
- langDeprecated langAliasType = iota
- langMacro
- langLegacy
+ Deprecated AliasType = iota
+ Macro
+ Legacy
- langAliasTypeUnknown langAliasType = -1
+ AliasTypeUnknown AliasType = -1
)
diff --git a/language/internal/language.go b/language/internal/language.go
index 40f0170..b0f0b6c 100644
--- a/language/internal/language.go
+++ b/language/internal/language.go
@@ -33,16 +33,21 @@
// specific language or locale. All language tag values are guaranteed to be
// well-formed.
type Tag struct {
- lang langID
- region regionID
- // TODO: we will soon run out of positions for script. Idea: instead of
- // storing lang, region, and script codes, store only the compact index and
+ // TODO: the following fields have the form TagTypeID. This name is chosen
+ // to allow refactoring the public package without conflicting with its
+ // Base, Script, and Region methods. Once the transition is fully completed
+ // the ID can be stripped from the name.
+
+ LangID Language
+ RegionID Region
+ // TODO: we will soon run out of positions for ScriptID. Idea: instead of
+ // storing lang, region, and ScriptID codes, store only the compact index and
// have a lookup table from this code to its expansion. This greatly speeds
// up table lookup, speed up common variant cases.
// This will also immediately free up 3 extra bytes. Also, the pVariant
// field can now be moved to the lookup table, as the compact index uniquely
// determines the offset of a possible variant.
- script scriptID
+ ScriptID Script
pVariant byte // offset in str, includes preceding '-'
pExt uint16 // offset of first extension, includes preceding '-'
@@ -61,13 +66,13 @@
// Raw returns the raw base language, script and region, without making an
// attempt to infer their values.
// TODO: consider removing
-func (t Tag) Raw() (b langID, s scriptID, r regionID) {
- return t.lang, t.script, t.region
+func (t Tag) Raw() (b Language, s Script, r Region) {
+ return t.LangID, t.ScriptID, t.RegionID
}
// equalTags compares language, script and region subtags only.
func (t Tag) equalTags(a Tag) bool {
- return t.lang == a.lang && t.script == a.script && t.region == a.region
+ return t.LangID == a.LangID && t.ScriptID == a.ScriptID && t.RegionID == a.RegionID
}
// IsRoot returns true if t is equal to language "und".
@@ -78,15 +83,16 @@
return t.equalTags(und)
}
-// private reports whether the Tag consists solely of a private use tag.
-func (t Tag) private() bool {
+// IsPrivateUse reports whether the Tag consists solely of an IsPrivateUse use
+// tag.
+func (t Tag) IsPrivateUse() bool {
return t.str != "" && t.pVariant == 0
}
-// remakeString is used to update t.str in case lang, script or region changed.
+// RemakeString is used to update t.str in case lang, script or region changed.
// It is assumed that pExt and pVariant still point to the start of the
// respective parts.
-func (t *Tag) remakeString() {
+func (t *Tag) RemakeString() {
if t.str == "" {
return
}
@@ -119,14 +125,14 @@
// to the given buffer and returns the number of bytes written. It will never
// write more than maxCoreSize bytes.
func (t *Tag) genCoreBytes(buf []byte) int {
- n := t.lang.stringToBuf(buf[:])
- if t.script != 0 {
+ n := t.LangID.stringToBuf(buf[:])
+ if t.ScriptID != 0 {
n += copy(buf[n:], "-")
- n += copy(buf[n:], t.script.String())
+ n += copy(buf[n:], t.ScriptID.String())
}
- if t.region != 0 {
+ if t.RegionID != 0 {
n += copy(buf[n:], "-")
- n += copy(buf[n:], t.region.String())
+ n += copy(buf[n:], t.RegionID.String())
}
return n
}
@@ -136,8 +142,8 @@
if t.str != "" {
return t.str
}
- if t.script == 0 && t.region == 0 {
- return t.lang.String()
+ if t.ScriptID == 0 && t.RegionID == 0 {
+ return t.LangID.String()
}
buf := [maxCoreSize]byte{}
return string(buf[:t.genCoreBytes(buf[:])])
@@ -147,8 +153,8 @@
func (t Tag) MarshalText() (text []byte, err error) {
if t.str != "" {
text = append(text, t.str...)
- } else if t.script == 0 && t.region == 0 {
- text = append(text, t.lang.String()...)
+ } else if t.ScriptID == 0 && t.RegionID == 0 {
+ text = append(text, t.LangID.String()...)
} else {
buf := [maxCoreSize]byte{}
text = buf[:t.genCoreBytes(buf[:])]
@@ -183,31 +189,31 @@
if t.str != "" {
// Strip the variants and extensions.
b, s, r := t.Raw()
- t = Tag{lang: b, script: s, region: r}
- if t.region == 0 && t.script != 0 && t.lang != 0 {
- base, _ := addTags(Tag{lang: t.lang})
- if base.script == t.script {
- return Tag{lang: t.lang}
+ t = Tag{LangID: b, ScriptID: s, RegionID: r}
+ if t.RegionID == 0 && t.ScriptID != 0 && t.LangID != 0 {
+ base, _ := addTags(Tag{LangID: t.LangID})
+ if base.ScriptID == t.ScriptID {
+ return Tag{LangID: t.LangID}
}
}
return t
}
- if t.lang != 0 {
- if t.region != 0 {
- maxScript := t.script
+ if t.LangID != 0 {
+ if t.RegionID != 0 {
+ maxScript := t.ScriptID
if maxScript == 0 {
max, _ := addTags(t)
- maxScript = max.script
+ maxScript = max.ScriptID
}
for i := range parents {
- if langID(parents[i].lang) == t.lang && scriptID(parents[i].maxScript) == maxScript {
+ if Language(parents[i].lang) == t.LangID && Script(parents[i].maxScript) == maxScript {
for _, r := range parents[i].fromRegion {
- if regionID(r) == t.region {
+ if Region(r) == t.RegionID {
return Tag{
- lang: t.lang,
- script: scriptID(parents[i].script),
- region: regionID(parents[i].toRegion),
+ LangID: t.LangID,
+ ScriptID: Script(parents[i].script),
+ RegionID: Region(parents[i].toRegion),
}
}
}
@@ -215,19 +221,19 @@
}
// Strip the script if it is the default one.
- base, _ := addTags(Tag{lang: t.lang})
- if base.script != maxScript {
- return Tag{lang: t.lang, script: maxScript}
+ base, _ := addTags(Tag{LangID: t.LangID})
+ if base.ScriptID != maxScript {
+ return Tag{LangID: t.LangID, ScriptID: maxScript}
}
- return Tag{lang: t.lang}
- } else if t.script != 0 {
+ return Tag{LangID: t.LangID}
+ } else if t.ScriptID != 0 {
// The parent for an base-script pair with a non-default script is
// "und" instead of the base language.
- base, _ := addTags(Tag{lang: t.lang})
- if base.script != t.script {
+ base, _ := addTags(Tag{LangID: t.LangID})
+ if base.ScriptID != t.ScriptID {
return und
}
- return Tag{lang: t.lang}
+ return Tag{LangID: t.LangID}
}
}
return und
@@ -329,7 +335,7 @@
// http://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
// An empty value removes an existing pair with the same key.
func (t Tag) SetTypeForKey(key, value string) (Tag, error) {
- if t.private() {
+ if t.IsPrivateUse() {
return t, errPrivateUse
}
if len(key) != 2 {
@@ -474,7 +480,7 @@
// ParseBase parses a 2- or 3-letter ISO 639 code.
// It returns a ValueError if s is a well-formed but unknown language identifier
// or another error if another error occurred.
-func ParseBase(s string) (langID, error) {
+func ParseBase(s string) (Language, error) {
if n := len(s); n < 2 || 3 < n {
return 0, errSyntax
}
@@ -485,7 +491,7 @@
// ParseScript parses a 4-letter ISO 15924 code.
// It returns a ValueError if s is a well-formed but unknown script identifier
// or another error if another error occurred.
-func ParseScript(s string) (scriptID, error) {
+func ParseScript(s string) (Script, error) {
if len(s) != 4 {
return 0, errSyntax
}
@@ -495,14 +501,14 @@
// EncodeM49 returns the Region for the given UN M.49 code.
// It returns an error if r is not a valid code.
-func EncodeM49(r int) (regionID, error) {
+func EncodeM49(r int) (Region, error) {
return getRegionM49(r)
}
// ParseRegion parses a 2- or 3-letter ISO 3166-1 or a UN M.49 code.
// It returns a ValueError if s is a well-formed but unknown region identifier
// or another error if another error occurred.
-func ParseRegion(s string) (regionID, error) {
+func ParseRegion(s string) (Region, error) {
if n := len(s); n < 2 || 3 < n {
return 0, errSyntax
}
@@ -512,7 +518,7 @@
// IsCountry returns whether this region is a country or autonomous area. This
// includes non-standard definitions from CLDR.
-func (r regionID) IsCountry() bool {
+func (r Region) IsCountry() bool {
if r == 0 || r.IsGroup() || r.IsPrivateUse() && r != _XK {
return false
}
@@ -521,7 +527,7 @@
// IsGroup returns whether this region defines a collection of regions. This
// includes non-standard definitions from CLDR.
-func (r regionID) IsGroup() bool {
+func (r Region) IsGroup() bool {
if r == 0 {
return false
}
@@ -530,7 +536,7 @@
// Contains returns whether Region c is contained by Region r. It returns true
// if c == r.
-func (r regionID) Contains(c regionID) bool {
+func (r Region) Contains(c Region) bool {
if r == c {
return true
}
@@ -561,7 +567,7 @@
// canonical form with a ccTLD. To get that ccTLD canonicalize r first. The
// region will already be canonicalized it was obtained from a Tag that was
// obtained using any of the default methods.
-func (r regionID) TLD() (regionID, error) {
+func (r Region) TLD() (Region, error) {
// See http://en.wikipedia.org/wiki/Country_code_top-level_domain for the
// difference between ISO 3166-1 and IANA ccTLD.
if r == _GB {
@@ -576,7 +582,7 @@
// Canonicalize returns the region or a possible replacement if the region is
// deprecated. It will not return a replacement for deprecated regions that
// are split into multiple regions.
-func (r regionID) Canonicalize() regionID {
+func (r Region) Canonicalize() Region {
if cr := normRegion(r); cr != 0 {
return cr
}
diff --git a/language/internal/language_test.go b/language/internal/language_test.go
index e2cd965..ecba137 100644
--- a/language/internal/language_test.go
+++ b/language/internal/language_test.go
@@ -59,7 +59,7 @@
mod, _ := Parse(tt.out)
id.setTagsFrom(mod)
for j := 0; j < 2; j++ {
- id.remakeString()
+ id.RemakeString()
if str := id.String(); str != tt.out {
t.Errorf("%d:%d: found %s; want %s", i, j, id.String(), tt.out)
}
@@ -67,7 +67,7 @@
// The bytes to string conversion as used in remakeString
// occasionally measures as more than one alloc, breaking this test.
// To alleviate this we set the number of runs to more than 1.
- if n := testtext.AllocsPerRun(8, id.remakeString); n > 1 {
+ if n := testtext.AllocsPerRun(8, id.RemakeString); n > 1 {
t.Errorf("%d: # allocs got %.1f; want <= 1", i, n)
}
}
@@ -393,7 +393,7 @@
}
r := MustParseRegion(tt.in)
- var want regionID
+ var want Region
if tt.out != "ZZ" {
want = MustParseRegion(tt.out)
}
diff --git a/language/internal/lookup.go b/language/internal/lookup.go
index 6c55f8d..73fc34b 100644
--- a/language/internal/lookup.go
+++ b/language/internal/lookup.go
@@ -32,11 +32,11 @@
})
}
-type langID uint16
+type Language uint16
// getLangID returns the langID of s if s is a canonical subtag
// or langUnknown if s is not a canonical subtag.
-func getLangID(s []byte) (langID, error) {
+func getLangID(s []byte) (Language, error) {
if len(s) == 2 {
return getLangISO2(s)
}
@@ -44,24 +44,24 @@
}
// mapLang returns the mapped langID of id according to mapping m.
-func normLang(id langID) (langID, langAliasType) {
+func normLang(id Language) (Language, AliasType) {
k := sort.Search(len(langAliasMap), func(i int) bool {
return langAliasMap[i].from >= uint16(id)
})
if k < len(langAliasMap) && langAliasMap[k].from == uint16(id) {
- return langID(langAliasMap[k].to), langAliasTypes[k]
+ return Language(langAliasMap[k].to), langAliasTypes[k]
}
- return id, langAliasTypeUnknown
+ return id, AliasTypeUnknown
}
// getLangISO2 returns the langID for the given 2-letter ISO language code
// or unknownLang if this does not exist.
-func getLangISO2(s []byte) (langID, error) {
+func getLangISO2(s []byte) (Language, error) {
if !tag.FixCase("zz", s) {
return 0, errSyntax
}
if i := lang.Index(s); i != -1 && lang.Elem(i)[3] != 0 {
- return langID(i), nil
+ return Language(i), nil
}
return 0, mkErrInvalid(s)
}
@@ -88,7 +88,7 @@
// getLangISO3 returns the langID for the given 3-letter ISO language code
// or unknownLang if this does not exist.
-func getLangISO3(s []byte) (langID, error) {
+func getLangISO3(s []byte) (Language, error) {
if tag.FixCase("und", s) {
// first try to match canonical 3-letter entries
for i := lang.Index(s[:2]); i != -1; i = lang.Next(s[:2], i) {
@@ -96,7 +96,7 @@
// We treat "und" as special and always translate it to "unspecified".
// Note that ZZ and Zzzz are private use and are not treated as
// unspecified by default.
- id := langID(i)
+ id := Language(i)
if id == nonCanonicalUnd {
return 0, nil
}
@@ -104,16 +104,16 @@
}
}
if i := altLangISO3.Index(s); i != -1 {
- return langID(altLangIndex[altLangISO3.Elem(i)[3]]), nil
+ return Language(altLangIndex[altLangISO3.Elem(i)[3]]), nil
}
n := strToInt(s)
if langNoIndex[n/8]&(1<<(n%8)) != 0 {
- return langID(n) + langNoIndexOffset, nil
+ return Language(n) + langNoIndexOffset, nil
}
// Check for non-canonical uses of ISO3.
for i := lang.Index(s[:1]); i != -1; i = lang.Next(s[:1], i) {
if e := lang.Elem(i); e[2] == s[1] && e[3] == s[2] {
- return langID(i), nil
+ return Language(i), nil
}
}
return 0, mkErrInvalid(s)
@@ -123,7 +123,7 @@
// stringToBuf writes the string to b and returns the number of bytes
// written. cap(b) must be >= 3.
-func (id langID) stringToBuf(b []byte) int {
+func (id Language) stringToBuf(b []byte) int {
if id >= langNoIndexOffset {
intToStr(uint(id)-langNoIndexOffset, b[:3])
return 3
@@ -140,7 +140,7 @@
// String returns the BCP 47 representation of the langID.
// Use b as variable name, instead of id, to ensure the variable
// used is consistent with that of Base in which this type is embedded.
-func (b langID) String() string {
+func (b Language) String() string {
if b == 0 {
return "und"
} else if b >= langNoIndexOffset {
@@ -157,7 +157,7 @@
}
// ISO3 returns the ISO 639-3 language code.
-func (b langID) ISO3() string {
+func (b Language) ISO3() string {
if b == 0 || b >= langNoIndexOffset {
return b.String()
}
@@ -173,24 +173,24 @@
}
// IsPrivateUse reports whether this language code is reserved for private use.
-func (b langID) IsPrivateUse() bool {
+func (b Language) IsPrivateUse() bool {
return langPrivateStart <= b && b <= langPrivateEnd
}
// SuppressScript returns the script marked as SuppressScript in the IANA
// language tag repository, or 0 if there is no such script.
-func (b langID) SuppressScript() scriptID {
+func (b Language) SuppressScript() Script {
if b < langNoIndexOffset {
- return scriptID(suppressScript[b])
+ return Script(suppressScript[b])
}
return 0
}
-type regionID uint16
+type Region uint16
// getRegionID returns the region id for s if s is a valid 2-letter region code
// or unknownRegion.
-func getRegionID(s []byte) (regionID, error) {
+func getRegionID(s []byte) (Region, error) {
if len(s) == 3 {
if isAlpha(s[0]) {
return getRegionISO3(s)
@@ -204,26 +204,26 @@
// getRegionISO2 returns the regionID for the given 2-letter ISO country code
// or unknownRegion if this does not exist.
-func getRegionISO2(s []byte) (regionID, error) {
+func getRegionISO2(s []byte) (Region, error) {
i, err := findIndex(regionISO, s, "ZZ")
if err != nil {
return 0, err
}
- return regionID(i) + isoRegionOffset, nil
+ return Region(i) + isoRegionOffset, nil
}
// getRegionISO3 returns the regionID for the given 3-letter ISO country code
// or unknownRegion if this does not exist.
-func getRegionISO3(s []byte) (regionID, error) {
+func getRegionISO3(s []byte) (Region, error) {
if tag.FixCase("ZZZ", s) {
for i := regionISO.Index(s[:1]); i != -1; i = regionISO.Next(s[:1], i) {
if e := regionISO.Elem(i); e[2] == s[1] && e[3] == s[2] {
- return regionID(i) + isoRegionOffset, nil
+ return Region(i) + isoRegionOffset, nil
}
}
for i := 0; i < len(altRegionISO3); i += 3 {
if tag.Compare(altRegionISO3[i:i+3], s) == 0 {
- return regionID(altRegionIDs[i/3]), nil
+ return Region(altRegionIDs[i/3]), nil
}
}
return 0, mkErrInvalid(s)
@@ -231,7 +231,7 @@
return 0, errSyntax
}
-func getRegionM49(n int) (regionID, error) {
+func getRegionM49(n int) (Region, error) {
if 0 < n && n <= 999 {
const (
searchBits = 7
@@ -245,7 +245,7 @@
return buf[i] >= val
})
if r := fromM49[int(m49Index[idx])+i]; r&^regionMask == val {
- return regionID(r & regionMask), nil
+ return Region(r & regionMask), nil
}
}
var e ValueError
@@ -256,13 +256,13 @@
// normRegion returns a region if r is deprecated or 0 otherwise.
// TODO: consider supporting BYS (-> BLR), CSK (-> 200 or CZ), PHI (-> PHL) and AFI (-> DJ).
// TODO: consider mapping split up regions to new most populous one (like CLDR).
-func normRegion(r regionID) regionID {
+func normRegion(r Region) Region {
m := regionOldMap
k := sort.Search(len(m), func(i int) bool {
return m[i].from >= uint16(r)
})
if k < len(m) && m[k].from == uint16(r) {
- return regionID(m[k].to)
+ return Region(m[k].to)
}
return 0
}
@@ -273,13 +273,13 @@
bcp47Region
)
-func (r regionID) typ() byte {
+func (r Region) typ() byte {
return regionTypes[r]
}
// String returns the BCP 47 representation for the region.
// It returns "ZZ" for an unspecified region.
-func (r regionID) String() string {
+func (r Region) String() string {
if r < isoRegionOffset {
if r == 0 {
return "ZZ"
@@ -293,7 +293,7 @@
// ISO3 returns the 3-letter ISO code of r.
// Note that not all regions have a 3-letter ISO code.
// In such cases this method returns "ZZZ".
-func (r regionID) ISO3() string {
+func (r Region) ISO3() string {
if r < isoRegionOffset {
return "ZZZ"
}
@@ -310,29 +310,29 @@
// M49 returns the UN M.49 encoding of r, or 0 if this encoding
// is not defined for r.
-func (r regionID) M49() int {
+func (r Region) M49() int {
return int(m49[r])
}
// IsPrivateUse reports whether r has the ISO 3166 User-assigned status. This
// may include private-use tags that are assigned by CLDR and used in this
// implementation. So IsPrivateUse and IsCountry can be simultaneously true.
-func (r regionID) IsPrivateUse() bool {
+func (r Region) IsPrivateUse() bool {
return r.typ()&iso3166UserAssigned != 0
}
-type scriptID uint8
+type Script uint8
// getScriptID returns the script id for string s. It assumes that s
// is of the format [A-Z][a-z]{3}.
-func getScriptID(idx tag.Index, s []byte) (scriptID, error) {
+func getScriptID(idx tag.Index, s []byte) (Script, error) {
i, err := findIndex(idx, s, "Zzzz")
- return scriptID(i), err
+ return Script(i), err
}
// String returns the script code in title case.
// It returns "Zzzz" for an unspecified script.
-func (s scriptID) String() string {
+func (s Script) String() string {
if s == 0 {
return "Zzzz"
}
@@ -340,7 +340,7 @@
}
// IsPrivateUse reports whether this script code is reserved for private use.
-func (s scriptID) IsPrivateUse() bool {
+func (s Script) IsPrivateUse() bool {
return _Qaaa <= s && s <= _Qabx
}
@@ -398,7 +398,7 @@
if v < 0 {
return Make(altTags[altTagIndex[-v-1]:altTagIndex[-v]]), true
}
- t.lang = langID(v)
+ t.LangID = Language(v)
return t, true
}
return t, false
diff --git a/language/internal/lookup_test.go b/language/internal/lookup_test.go
index 6d7cf73..7c0dad1 100644
--- a/language/internal/lookup_test.go
+++ b/language/internal/lookup_test.go
@@ -370,7 +370,7 @@
idx := tag.Index("0000BbbbDdddEeeeZzzz\xff\xff\xff\xff")
tests := []struct {
in string
- out scriptID
+ out Script
}{
{" ", 0},
{" ", 0},
diff --git a/language/internal/match.go b/language/internal/match.go
index 5cbd63f..37f4eb6 100644
--- a/language/internal/match.go
+++ b/language/internal/match.go
@@ -14,21 +14,21 @@
regionInFrom
)
-func (t *Tag) setUndefinedLang(id langID) {
- if t.lang == 0 {
- t.lang = id
+func (t *Tag) setUndefinedLang(id Language) {
+ if t.LangID == 0 {
+ t.LangID = id
}
}
-func (t *Tag) setUndefinedScript(id scriptID) {
- if t.script == 0 {
- t.script = id
+func (t *Tag) setUndefinedScript(id Script) {
+ if t.ScriptID == 0 {
+ t.ScriptID = id
}
}
-func (t *Tag) setUndefinedRegion(id regionID) {
- if t.region == 0 || t.region.Contains(id) {
- t.region = id
+func (t *Tag) setUndefinedRegion(id Region) {
+ if t.RegionID == 0 || t.RegionID.Contains(id) {
+ t.RegionID = id
}
}
@@ -47,61 +47,66 @@
} else if id.equalTags(t) {
return t, nil
}
- id.remakeString()
+ id.RemakeString()
return id, nil
}
// specializeRegion attempts to specialize a group region.
func specializeRegion(t *Tag) bool {
- if i := regionInclusion[t.region]; i < nRegionGroups {
+ if i := regionInclusion[t.RegionID]; i < nRegionGroups {
x := likelyRegionGroup[i]
- if langID(x.lang) == t.lang && scriptID(x.script) == t.script {
- t.region = regionID(x.region)
+ if Language(x.lang) == t.LangID && Script(x.script) == t.ScriptID {
+ t.RegionID = Region(x.region)
}
return true
}
return false
}
+// Maximize returns a new tag with missing tags filled in.
+func (t Tag) Maximize() (Tag, error) {
+ return addTags(t)
+}
+
func addTags(t Tag) (Tag, error) {
// We leave private use identifiers alone.
- if t.private() {
+ if t.IsPrivateUse() {
return t, nil
}
- if t.script != 0 && t.region != 0 {
- if t.lang != 0 {
+ if t.ScriptID != 0 && t.RegionID != 0 {
+ if t.LangID != 0 {
// already fully specified
specializeRegion(&t)
return t, nil
}
// Search matches for und-script-region. Note that for these cases
// region will never be a group so there is no need to check for this.
- list := likelyRegion[t.region : t.region+1]
+ list := likelyRegion[t.RegionID : t.RegionID+1]
if x := list[0]; x.flags&isList != 0 {
list = likelyRegionList[x.lang : x.lang+uint16(x.script)]
}
for _, x := range list {
// Deviating from the spec. See match_test.go for details.
- if scriptID(x.script) == t.script {
- t.setUndefinedLang(langID(x.lang))
+ if Script(x.script) == t.ScriptID {
+ t.setUndefinedLang(Language(x.lang))
return t, nil
}
}
}
- if t.lang != 0 {
+ if t.LangID != 0 {
// Search matches for lang-script and lang-region, where lang != und.
- if t.lang < langNoIndexOffset {
- x := likelyLang[t.lang]
+ if t.LangID < langNoIndexOffset {
+ x := likelyLang[t.LangID]
if x.flags&isList != 0 {
list := likelyLangList[x.region : x.region+uint16(x.script)]
- if t.script != 0 {
+ if t.ScriptID != 0 {
for _, x := range list {
- if scriptID(x.script) == t.script && x.flags&scriptInFrom != 0 {
- t.setUndefinedRegion(regionID(x.region))
+ if Script(x.script) == t.ScriptID && x.flags&scriptInFrom != 0 {
+ t.setUndefinedRegion(Region(x.region))
return t, nil
}
}
- } else if t.region != 0 {
+ } else if t.RegionID != 0 {
count := 0
goodScript := true
tt := t
@@ -110,10 +115,10 @@
// defined, including the ones where the region was not
// defined. This allows for proper disambiguation within
// regions.
- if x.flags&scriptInFrom == 0 && t.region.Contains(regionID(x.region)) {
- tt.region = regionID(x.region)
- tt.setUndefinedScript(scriptID(x.script))
- goodScript = goodScript && tt.script == scriptID(x.script)
+ if x.flags&scriptInFrom == 0 && t.RegionID.Contains(Region(x.region)) {
+ tt.RegionID = Region(x.region)
+ tt.setUndefinedScript(Script(x.script))
+ goodScript = goodScript && tt.ScriptID == Script(x.script)
count++
}
}
@@ -123,39 +128,39 @@
// Even if we fail to find a unique Region, we might have
// an unambiguous script.
if goodScript {
- t.script = tt.script
+ t.ScriptID = tt.ScriptID
}
}
}
}
} else {
// Search matches for und-script.
- if t.script != 0 {
- x := likelyScript[t.script]
+ if t.ScriptID != 0 {
+ x := likelyScript[t.ScriptID]
if x.region != 0 {
- t.setUndefinedRegion(regionID(x.region))
- t.setUndefinedLang(langID(x.lang))
+ t.setUndefinedRegion(Region(x.region))
+ t.setUndefinedLang(Language(x.lang))
return t, nil
}
}
// Search matches for und-region. If und-script-region exists, it would
// have been found earlier.
- if t.region != 0 {
- if i := regionInclusion[t.region]; i < nRegionGroups {
+ if t.RegionID != 0 {
+ if i := regionInclusion[t.RegionID]; i < nRegionGroups {
x := likelyRegionGroup[i]
if x.region != 0 {
- t.setUndefinedLang(langID(x.lang))
- t.setUndefinedScript(scriptID(x.script))
- t.region = regionID(x.region)
+ t.setUndefinedLang(Language(x.lang))
+ t.setUndefinedScript(Script(x.script))
+ t.RegionID = Region(x.region)
}
} else {
- x := likelyRegion[t.region]
+ x := likelyRegion[t.RegionID]
if x.flags&isList != 0 {
x = likelyRegionList[x.lang]
}
if x.script != 0 && x.flags != scriptInFrom {
- t.setUndefinedLang(langID(x.lang))
- t.setUndefinedScript(scriptID(x.script))
+ t.setUndefinedLang(Language(x.lang))
+ t.setUndefinedScript(Script(x.script))
return t, nil
}
}
@@ -163,18 +168,18 @@
}
// Search matches for lang.
- if t.lang < langNoIndexOffset {
- x := likelyLang[t.lang]
+ if t.LangID < langNoIndexOffset {
+ x := likelyLang[t.LangID]
if x.flags&isList != 0 {
x = likelyLangList[x.region]
}
if x.region != 0 {
- t.setUndefinedScript(scriptID(x.script))
- t.setUndefinedRegion(regionID(x.region))
+ t.setUndefinedScript(Script(x.script))
+ t.setUndefinedRegion(Region(x.region))
}
specializeRegion(&t)
- if t.lang == 0 {
- t.lang = _en // default language
+ if t.LangID == 0 {
+ t.LangID = _en // default language
}
return t, nil
}
@@ -182,9 +187,9 @@
}
func (t *Tag) setTagsFrom(id Tag) {
- t.lang = id.lang
- t.script = id.script
- t.region = id.region
+ t.LangID = id.LangID
+ t.ScriptID = id.ScriptID
+ t.RegionID = id.RegionID
}
// minimize removes the region or script subtags from t such that
@@ -194,7 +199,7 @@
if err != nil {
return t, err
}
- t.remakeString()
+ t.RemakeString()
return t, nil
}
@@ -208,9 +213,9 @@
return t, err
}
for _, id := range [...]Tag{
- {lang: t.lang},
- {lang: t.lang, region: t.region},
- {lang: t.lang, script: t.script},
+ {LangID: t.LangID},
+ {LangID: t.LangID, RegionID: t.RegionID},
+ {LangID: t.LangID, ScriptID: t.ScriptID},
} {
if x, err := addTags(id); err == nil && max.equalTags(x) {
t.setTagsFrom(id)
diff --git a/language/internal/parse.go b/language/internal/parse.go
index d449c09..c482eed 100644
--- a/language/internal/parse.go
+++ b/language/internal/parse.go
@@ -287,9 +287,9 @@
func parseTag(scan *scanner) (t Tag, end int) {
var e error
// TODO: set an error if an unknown lang, script or region is encountered.
- t.lang, e = getLangID(scan.token)
+ t.LangID, e = getLangID(scan.token)
scan.setError(e)
- scan.replace(t.lang.String())
+ scan.replace(t.LangID.String())
langStart := scan.start
end = scan.scan()
for len(scan.token) == 3 && isAlpha(scan.token[0]) {
@@ -297,7 +297,7 @@
// to a tag of the form <extlang>.
lang, e := getLangID(scan.token)
if lang != 0 {
- t.lang = lang
+ t.LangID = lang
copy(scan.b[langStart:], lang.String())
scan.b[langStart+3] = '-'
scan.start = langStart + 4
@@ -306,18 +306,18 @@
end = scan.scan()
}
if len(scan.token) == 4 && isAlpha(scan.token[0]) {
- t.script, e = getScriptID(script, scan.token)
- if t.script == 0 {
+ t.ScriptID, e = getScriptID(script, scan.token)
+ if t.ScriptID == 0 {
scan.gobble(e)
}
end = scan.scan()
}
if n := len(scan.token); n >= 2 && n <= 3 {
- t.region, e = getRegionID(scan.token)
- if t.region == 0 {
+ t.RegionID, e = getRegionID(scan.token)
+ if t.RegionID == 0 {
scan.gobble(e)
} else {
- scan.replace(t.region.String())
+ scan.replace(t.RegionID.String())
}
end = scan.scan()
}
diff --git a/language/internal/parse_test.go b/language/internal/parse_test.go
index e228a29..393c59d 100644
--- a/language/internal/parse_test.go
+++ b/language/internal/parse_test.go
@@ -262,14 +262,14 @@
if skip {
continue
}
- if l, _ := getLangID(b(tt.lang)); l != tag.lang {
- t.Errorf("%d: lang was %q; want %q", i, tag.lang, l)
+ if l, _ := getLangID(b(tt.lang)); l != tag.LangID {
+ t.Errorf("%d: lang was %q; want %q", i, tag.LangID, l)
}
- if sc, _ := getScriptID(script, b(tt.script)); sc != tag.script {
- t.Errorf("%d: script was %q; want %q", i, tag.script, sc)
+ if sc, _ := getScriptID(script, b(tt.script)); sc != tag.ScriptID {
+ t.Errorf("%d: script was %q; want %q", i, tag.ScriptID, sc)
}
- if r, _ := getRegionID(b(tt.region)); r != tag.region {
- t.Errorf("%d: region was %q; want %q", i, tag.region, r)
+ if r, _ := getRegionID(b(tt.region)); r != tag.RegionID {
+ t.Errorf("%d: region was %q; want %q", i, tag.RegionID, r)
}
if tag.str == "" {
continue
diff --git a/language/internal/tables.go b/language/internal/tables.go
index 2a7b274..e885942 100644
--- a/language/internal/tables.go
+++ b/language/internal/tables.go
@@ -7,11 +7,11 @@
// CLDRVersion is the CLDR version from which the tables in this package are derived.
const CLDRVersion = "32"
-const numLanguages = 8665
+const NumLanguages = 8665
-const numScripts = 242
+const NumScripts = 242
-const numRegions = 357
+const NumRegions = 357
type fromTo struct {
from uint16
@@ -751,7 +751,7 @@
}
// Size: 164 bytes, 164 elements
-var langAliasTypes = [164]langAliasType{
+var langAliasTypes = [164]AliasType{
// Entry 0 - 3F
1, 0, 0, 0, 0, 0, 0, 1, 2, 2, 0, 1, 0, 0, 1, 2,
1, 1, 2, 0, 1, 0, 1, 2, 1, 1, 0, 0, 2, 1, 1, 0,
diff --git a/language/internal/tags.go b/language/internal/tags.go
index 8f6195f..5d47329 100644
--- a/language/internal/tags.go
+++ b/language/internal/tags.go
@@ -18,7 +18,7 @@
// MustParseBase is like ParseBase, but panics if the given base cannot be parsed.
// It simplifies safe initialization of Base values.
-func MustParseBase(s string) langID {
+func MustParseBase(s string) Language {
b, err := ParseBase(s)
if err != nil {
panic(err)
@@ -28,7 +28,7 @@
// MustParseScript is like ParseScript, but panics if the given script cannot be
// parsed. It simplifies safe initialization of Script values.
-func MustParseScript(s string) scriptID {
+func MustParseScript(s string) Script {
scr, err := ParseScript(s)
if err != nil {
panic(err)
@@ -38,7 +38,7 @@
// MustParseRegion is like ParseRegion, but panics if the given region cannot be
// parsed. It simplifies safe initialization of Region values.
-func MustParseRegion(s string) regionID {
+func MustParseRegion(s string) Region {
r, err := ParseRegion(s)
if err != nil {
panic(err)