internal/language/compact: copy of language
Copy language package to internal/language/compact
The purpose is to expose the compact ID functionality
to other packages in x/text, but without exporting
it outside of x/text.
The next steps will be to delete respective
functionality in the original and destination
package and expose the functionality in the
the internal package.
Change-Id: I35ff1a041069729f75bab743148f4cf3f16c2058
Reviewed-on: https://go-review.googlesource.com/96637
Run-TryBot: Marcel van Lohuizen <mpvl@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ross Light <light@google.com>
diff --git a/internal/language/compact/compact.go b/internal/language/compact/compact.go
new file mode 100644
index 0000000..0106331
--- /dev/null
+++ b/internal/language/compact/compact.go
@@ -0,0 +1,45 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package language // import "golang.org/x/text/internal/language/compact"
+
+import (
+ "sort"
+ "strings"
+
+ "golang.org/x/text/internal/language"
+)
+
+type compactID uint16
+
+func getCoreIndex(t language.Tag) (id compactID, ok bool) {
+ cci, ok := language.GetCompactCore(t)
+ if !ok {
+ return 0, false
+ }
+ i := sort.Search(len(coreTags), func(i int) bool {
+ return cci <= coreTags[i]
+ })
+ if i == len(coreTags) || coreTags[i] != cci {
+ return 0, false
+ }
+ return compactID(i), true
+}
+
+func (c compactID) tag() language.Tag {
+ if int(c) >= len(coreTags) {
+ return specialTags[int(c)-len(coreTags)]
+ }
+ return coreTags[c].Tag()
+}
+
+var specialTags []language.Tag
+
+func init() {
+ tags := strings.Split(specialTagsStr, " ")
+ specialTags = make([]language.Tag, len(tags))
+ for i, t := range tags {
+ specialTags[i] = language.MustParse(t)
+ }
+}
diff --git a/internal/language/compact/coverage.go b/internal/language/compact/coverage.go
new file mode 100644
index 0000000..fdb6156
--- /dev/null
+++ b/internal/language/compact/coverage.go
@@ -0,0 +1,187 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package language
+
+import (
+ "fmt"
+ "sort"
+
+ "golang.org/x/text/internal/language"
+)
+
+// The Coverage interface is used to define the level of coverage of an
+// internationalization service. Note that not all types are supported by all
+// services. As lists may be generated on the fly, it is recommended that users
+// of a Coverage cache the results.
+type Coverage interface {
+ // Tags returns the list of supported tags.
+ Tags() []Tag
+
+ // BaseLanguages returns the list of supported base languages.
+ BaseLanguages() []Base
+
+ // Scripts returns the list of supported scripts.
+ Scripts() []Script
+
+ // Regions returns the list of supported regions.
+ Regions() []Region
+}
+
+var (
+ // Supported defines a Coverage that lists all supported subtags. Tags
+ // always returns nil.
+ Supported Coverage = allSubtags{}
+)
+
+// TODO:
+// - Support Variants, numbering systems.
+// - CLDR coverage levels.
+// - Set of common tags defined in this package.
+
+type allSubtags struct{}
+
+// Regions returns the list of supported regions. As all regions are in a
+// consecutive range, it simply returns a slice of numbers in increasing order.
+// The "undefined" region is not returned.
+func (s allSubtags) Regions() []Region {
+ reg := make([]Region, language.NumRegions)
+ for i := range reg {
+ reg[i] = Region{language.Region(i + 1)}
+ }
+ return reg
+}
+
+// Scripts returns the list of supported scripts. As all scripts are in a
+// consecutive range, it simply returns a slice of numbers in increasing order.
+// The "undefined" script is not returned.
+func (s allSubtags) Scripts() []Script {
+ scr := make([]Script, language.NumScripts)
+ for i := range scr {
+ scr[i] = Script{language.Script(i + 1)}
+ }
+ return scr
+}
+
+// BaseLanguages returns the list of all supported base languages. It generates
+// the list by traversing the internal structures.
+func (s allSubtags) BaseLanguages() []Base {
+ bs := language.BaseLanguages()
+ base := make([]Base, len(bs))
+ for i, b := range bs {
+ base[i] = Base{b}
+ }
+ return base
+}
+
+// Tags always returns nil.
+func (s allSubtags) Tags() []Tag {
+ return nil
+}
+
+// coverage is used used by NewCoverage which is used as a convenient way for
+// creating Coverage implementations for partially defined data. Very often a
+// package will only need to define a subset of slices. coverage provides a
+// convenient way to do this. Moreover, packages using NewCoverage, instead of
+// their own implementation, will not break if later new slice types are added.
+type coverage struct {
+ tags func() []Tag
+ bases func() []Base
+ scripts func() []Script
+ regions func() []Region
+}
+
+func (s *coverage) Tags() []Tag {
+ if s.tags == nil {
+ return nil
+ }
+ return s.tags()
+}
+
+// bases implements sort.Interface and is used to sort base languages.
+type bases []Base
+
+func (b bases) Len() int {
+ return len(b)
+}
+
+func (b bases) Swap(i, j int) {
+ b[i], b[j] = b[j], b[i]
+}
+
+func (b bases) Less(i, j int) bool {
+ return b[i].langID < b[j].langID
+}
+
+// BaseLanguages returns the result from calling s.bases if it is specified or
+// otherwise derives the set of supported base languages from tags.
+func (s *coverage) BaseLanguages() []Base {
+ if s.bases == nil {
+ tags := s.Tags()
+ if len(tags) == 0 {
+ return nil
+ }
+ a := make([]Base, len(tags))
+ for i, t := range tags {
+ a[i] = Base{language.Language(t.lang())}
+ }
+ sort.Sort(bases(a))
+ k := 0
+ for i := 1; i < len(a); i++ {
+ if a[k] != a[i] {
+ k++
+ a[k] = a[i]
+ }
+ }
+ return a[:k+1]
+ }
+ return s.bases()
+}
+
+func (s *coverage) Scripts() []Script {
+ if s.scripts == nil {
+ return nil
+ }
+ return s.scripts()
+}
+
+func (s *coverage) Regions() []Region {
+ if s.regions == nil {
+ return nil
+ }
+ return s.regions()
+}
+
+// NewCoverage returns a Coverage for the given lists. It is typically used by
+// packages providing internationalization services to define their level of
+// coverage. A list may be of type []T or func() []T, where T is either Tag,
+// Base, Script or Region. The returned Coverage derives the value for Bases
+// from Tags if no func or slice for []Base is specified. For other unspecified
+// types the returned Coverage will return nil for the respective methods.
+func NewCoverage(list ...interface{}) Coverage {
+ s := &coverage{}
+ for _, x := range list {
+ switch v := x.(type) {
+ case func() []Base:
+ s.bases = v
+ case func() []Script:
+ s.scripts = v
+ case func() []Region:
+ s.regions = v
+ case func() []Tag:
+ s.tags = v
+ case []Base:
+ s.bases = func() []Base { return v }
+ case []Script:
+ s.scripts = func() []Script { return v }
+ case []Region:
+ s.regions = func() []Region { return v }
+ case []Tag:
+ s.tags = func() []Tag { return v }
+ default:
+ panic(fmt.Sprintf("language: unsupported set type %T", v))
+ }
+ }
+ return s
+}
diff --git a/internal/language/compact/coverage_test.go b/internal/language/compact/coverage_test.go
new file mode 100644
index 0000000..bbc092c
--- /dev/null
+++ b/internal/language/compact/coverage_test.go
@@ -0,0 +1,156 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package language
+
+import (
+ "fmt"
+ "reflect"
+ "testing"
+
+ "golang.org/x/text/internal/language"
+)
+
+func TestSupported(t *testing.T) {
+ // To prove the results are correct for a type, we test that the number of
+ // results is identical to the number of results on record, that all results
+ // are distinct and that all results are valid.
+ tests := map[string]int{
+ "BaseLanguages": language.NumLanguages,
+ "Scripts": language.NumScripts,
+ "Regions": language.NumRegions,
+ "Tags": 0,
+ }
+ sup := reflect.ValueOf(Supported)
+ for name, num := range tests {
+ v := sup.MethodByName(name).Call(nil)[0]
+ if n := v.Len(); n != num {
+ t.Errorf("len(%s()) was %d; want %d", name, n, num)
+ }
+ dup := make(map[string]bool)
+ for i := 0; i < v.Len(); i++ {
+ x := v.Index(i).Interface()
+ // An invalid value will either cause a crash or result in a
+ // duplicate when passed to Sprint.
+ s := fmt.Sprint(x)
+ if dup[s] {
+ t.Errorf("%s: duplicate entry %q", name, s)
+ }
+ dup[s] = true
+ }
+ if len(dup) != v.Len() {
+ t.Errorf("%s: # unique entries was %d; want %d", name, len(dup), v.Len())
+ }
+ }
+}
+
+func TestNewCoverage(t *testing.T) {
+ bases := []Base{Base{0}, Base{3}, Base{7}}
+ scripts := []Script{Script{11}, Script{17}, Script{23}}
+ regions := []Region{Region{101}, Region{103}, Region{107}}
+ tags := []Tag{Make("pt"), Make("en"), Make("en-GB"), Make("en-US"), Make("pt-PT")}
+ fbases := func() []Base { return bases }
+ fscripts := func() []Script { return scripts }
+ fregions := func() []Region { return regions }
+ ftags := func() []Tag { return tags }
+
+ tests := []struct {
+ desc string
+ list []interface{}
+ bases []Base
+ scripts []Script
+ regions []Region
+ tags []Tag
+ }{
+ {
+ desc: "empty",
+ },
+ {
+ desc: "bases",
+ list: []interface{}{bases},
+ bases: bases,
+ },
+ {
+ desc: "scripts",
+ list: []interface{}{scripts},
+ scripts: scripts,
+ },
+ {
+ desc: "regions",
+ list: []interface{}{regions},
+ regions: regions,
+ },
+ {
+ desc: "bases derives from tags",
+ list: []interface{}{tags},
+ bases: []Base{Base{_en}, Base{_pt}},
+ tags: tags,
+ },
+ {
+ desc: "tags and bases",
+ list: []interface{}{tags, bases},
+ bases: bases,
+ tags: tags,
+ },
+ {
+ desc: "fully specified",
+ list: []interface{}{tags, bases, scripts, regions},
+ bases: bases,
+ scripts: scripts,
+ regions: regions,
+ tags: tags,
+ },
+ {
+ desc: "bases func",
+ list: []interface{}{fbases},
+ bases: bases,
+ },
+ {
+ desc: "scripts func",
+ list: []interface{}{fscripts},
+ scripts: scripts,
+ },
+ {
+ desc: "regions func",
+ list: []interface{}{fregions},
+ regions: regions,
+ },
+ {
+ desc: "tags func",
+ list: []interface{}{ftags},
+ bases: []Base{Base{_en}, Base{_pt}},
+ tags: tags,
+ },
+ {
+ desc: "tags and bases",
+ list: []interface{}{ftags, fbases},
+ bases: bases,
+ tags: tags,
+ },
+ {
+ desc: "fully specified",
+ list: []interface{}{ftags, fbases, fscripts, fregions},
+ bases: bases,
+ scripts: scripts,
+ regions: regions,
+ tags: tags,
+ },
+ }
+
+ for i, tt := range tests {
+ l := NewCoverage(tt.list...)
+ if a := l.BaseLanguages(); !reflect.DeepEqual(a, tt.bases) {
+ t.Errorf("%d:%s: BaseLanguages was %v; want %v", i, tt.desc, a, tt.bases)
+ }
+ if a := l.Scripts(); !reflect.DeepEqual(a, tt.scripts) {
+ t.Errorf("%d:%s: Scripts was %v; want %v", i, tt.desc, a, tt.scripts)
+ }
+ if a := l.Regions(); !reflect.DeepEqual(a, tt.regions) {
+ t.Errorf("%d:%s: Regions was %v; want %v", i, tt.desc, a, tt.regions)
+ }
+ if a := l.Tags(); !reflect.DeepEqual(a, tt.tags) {
+ t.Errorf("%d:%s: Tags was %v; want %v", i, tt.desc, a, tt.tags)
+ }
+ }
+}
diff --git a/internal/language/compact/examples_test.go b/internal/language/compact/examples_test.go
new file mode 100644
index 0000000..68caa3f
--- /dev/null
+++ b/internal/language/compact/examples_test.go
@@ -0,0 +1,413 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package language_test
+
+import (
+ "fmt"
+ "net/http"
+
+ "golang.org/x/text/language"
+)
+
+func ExampleCanonType() {
+ p := func(id string) {
+ fmt.Printf("Default(%s) -> %s\n", id, language.Make(id))
+ fmt.Printf("BCP47(%s) -> %s\n", id, language.BCP47.Make(id))
+ fmt.Printf("Macro(%s) -> %s\n", id, language.Macro.Make(id))
+ fmt.Printf("All(%s) -> %s\n", id, language.All.Make(id))
+ }
+ p("en-Latn")
+ p("sh")
+ p("zh-cmn")
+ p("bjd")
+ p("iw-Latn-fonipa-u-cu-usd")
+ // Output:
+ // Default(en-Latn) -> en-Latn
+ // BCP47(en-Latn) -> en
+ // Macro(en-Latn) -> en-Latn
+ // All(en-Latn) -> en
+ // Default(sh) -> sr-Latn
+ // BCP47(sh) -> sh
+ // Macro(sh) -> sh
+ // All(sh) -> sr-Latn
+ // Default(zh-cmn) -> cmn
+ // BCP47(zh-cmn) -> cmn
+ // Macro(zh-cmn) -> zh
+ // All(zh-cmn) -> zh
+ // Default(bjd) -> drl
+ // BCP47(bjd) -> drl
+ // Macro(bjd) -> bjd
+ // All(bjd) -> drl
+ // Default(iw-Latn-fonipa-u-cu-usd) -> he-Latn-fonipa-u-cu-usd
+ // BCP47(iw-Latn-fonipa-u-cu-usd) -> he-Latn-fonipa-u-cu-usd
+ // Macro(iw-Latn-fonipa-u-cu-usd) -> iw-Latn-fonipa-u-cu-usd
+ // All(iw-Latn-fonipa-u-cu-usd) -> he-Latn-fonipa-u-cu-usd
+}
+
+func ExampleTag_Base() {
+ fmt.Println(language.Make("und").Base())
+ fmt.Println(language.Make("und-US").Base())
+ fmt.Println(language.Make("und-NL").Base())
+ fmt.Println(language.Make("und-419").Base()) // Latin America
+ fmt.Println(language.Make("und-ZZ").Base())
+ // Output:
+ // en Low
+ // en High
+ // nl High
+ // es Low
+ // en Low
+}
+
+func ExampleTag_Script() {
+ en := language.Make("en")
+ sr := language.Make("sr")
+ sr_Latn := language.Make("sr_Latn")
+ fmt.Println(en.Script())
+ fmt.Println(sr.Script())
+ // Was a script explicitly specified?
+ _, c := sr.Script()
+ fmt.Println(c == language.Exact)
+ _, c = sr_Latn.Script()
+ fmt.Println(c == language.Exact)
+ // Output:
+ // Latn High
+ // Cyrl Low
+ // false
+ // true
+}
+
+func ExampleTag_Region() {
+ ru := language.Make("ru")
+ en := language.Make("en")
+ fmt.Println(ru.Region())
+ fmt.Println(en.Region())
+ // Output:
+ // RU Low
+ // US Low
+}
+
+func ExampleRegion_TLD() {
+ us := language.MustParseRegion("US")
+ gb := language.MustParseRegion("GB")
+ uk := language.MustParseRegion("UK")
+ bu := language.MustParseRegion("BU")
+
+ fmt.Println(us.TLD())
+ fmt.Println(gb.TLD())
+ fmt.Println(uk.TLD())
+ fmt.Println(bu.TLD())
+
+ fmt.Println(us.Canonicalize().TLD())
+ fmt.Println(gb.Canonicalize().TLD())
+ fmt.Println(uk.Canonicalize().TLD())
+ fmt.Println(bu.Canonicalize().TLD())
+ // Output:
+ // US <nil>
+ // UK <nil>
+ // UK <nil>
+ // ZZ language: region is not a valid ccTLD
+ // US <nil>
+ // UK <nil>
+ // UK <nil>
+ // MM <nil>
+}
+
+func ExampleCompose() {
+ nl, _ := language.ParseBase("nl")
+ us, _ := language.ParseRegion("US")
+ de := language.Make("de-1901-u-co-phonebk")
+ jp := language.Make("ja-JP")
+ fi := language.Make("fi-x-ing")
+
+ u, _ := language.ParseExtension("u-nu-arabic")
+ x, _ := language.ParseExtension("x-piglatin")
+
+ // Combine a base language and region.
+ fmt.Println(language.Compose(nl, us))
+ // Combine a base language and extension.
+ fmt.Println(language.Compose(nl, x))
+ // Replace the region.
+ fmt.Println(language.Compose(jp, us))
+ // Combine several tags.
+ fmt.Println(language.Compose(us, nl, u))
+
+ // Replace the base language of a tag.
+ fmt.Println(language.Compose(de, nl))
+ fmt.Println(language.Compose(de, nl, u))
+ // Remove the base language.
+ fmt.Println(language.Compose(de, language.Base{}))
+ // Remove all variants.
+ fmt.Println(language.Compose(de, []language.Variant{}))
+ // Remove all extensions.
+ fmt.Println(language.Compose(de, []language.Extension{}))
+ fmt.Println(language.Compose(fi, []language.Extension{}))
+ // Remove all variants and extensions.
+ fmt.Println(language.Compose(de.Raw()))
+
+ // An error is gobbled or returned if non-nil.
+ fmt.Println(language.Compose(language.ParseRegion("ZA")))
+ fmt.Println(language.Compose(language.ParseRegion("HH")))
+
+ // Compose uses the same Default canonicalization as Make.
+ fmt.Println(language.Compose(language.Raw.Parse("en-Latn-UK")))
+
+ // Call compose on a different CanonType for different results.
+ fmt.Println(language.All.Compose(language.Raw.Parse("en-Latn-UK")))
+
+ // Output:
+ // nl-US <nil>
+ // nl-x-piglatin <nil>
+ // ja-US <nil>
+ // nl-US-u-nu-arabic <nil>
+ // nl-1901-u-co-phonebk <nil>
+ // nl-1901-u-co-phonebk-nu-arabic <nil>
+ // und-1901-u-co-phonebk <nil>
+ // de-u-co-phonebk <nil>
+ // de-1901 <nil>
+ // fi <nil>
+ // de <nil>
+ // und-ZA <nil>
+ // und language: subtag "HH" is well-formed but unknown
+ // en-Latn-GB <nil>
+ // en-GB <nil>
+}
+
+func ExampleParse_errors() {
+ for _, s := range []string{"Foo", "Bar", "Foobar"} {
+ _, err := language.Parse(s)
+ if err != nil {
+ if inv, ok := err.(language.ValueError); ok {
+ fmt.Println(inv.Subtag())
+ } else {
+ fmt.Println(s)
+ }
+ }
+ }
+ for _, s := range []string{"en", "aa-Uuuu", "AC", "ac-u"} {
+ _, err := language.Parse(s)
+ switch e := err.(type) {
+ case language.ValueError:
+ fmt.Printf("%s: culprit %q\n", s, e.Subtag())
+ case nil:
+ // No error.
+ default:
+ // A syntax error.
+ fmt.Printf("%s: ill-formed\n", s)
+ }
+ }
+ // Output:
+ // foo
+ // Foobar
+ // aa-Uuuu: culprit "Uuuu"
+ // AC: culprit "ac"
+ // ac-u: ill-formed
+}
+
+func ExampleParent() {
+ p := func(tag string) {
+ fmt.Printf("parent(%v): %v\n", tag, language.Make(tag).Parent())
+ }
+ p("zh-CN")
+
+ // Australian English inherits from World English.
+ p("en-AU")
+
+ // If the tag has a different maximized script from its parent, a tag with
+ // this maximized script is inserted. This allows different language tags
+ // which have the same base language and script in common to inherit from
+ // a common set of settings.
+ p("zh-HK")
+
+ // If the maximized script of the parent is not identical, CLDR will skip
+ // inheriting from it, as it means there will not be many entries in common
+ // and inheriting from it is nonsensical.
+ p("zh-Hant")
+
+ // The parent of a tag with variants and extensions is the tag with all
+ // variants and extensions removed.
+ p("de-1994-u-co-phonebk")
+
+ // Remove default script.
+ p("de-Latn-LU")
+
+ // Output:
+ // parent(zh-CN): zh
+ // parent(en-AU): en-001
+ // parent(zh-HK): zh-Hant
+ // parent(zh-Hant): und
+ // parent(de-1994-u-co-phonebk): de
+ // parent(de-Latn-LU): de
+}
+
+// ExampleMatcher_bestMatch gives some examples of getting the best match of
+// a set of tags to any of the tags of given set.
+func ExampleMatcher() {
+ // This is the set of tags from which we want to pick the best match. These
+ // can be, for example, the supported languages for some package.
+ tags := []language.Tag{
+ language.English,
+ language.BritishEnglish,
+ language.French,
+ language.Afrikaans,
+ language.BrazilianPortuguese,
+ language.EuropeanPortuguese,
+ language.Croatian,
+ language.SimplifiedChinese,
+ language.Raw.Make("iw-IL"),
+ language.Raw.Make("iw"),
+ language.Raw.Make("he"),
+ }
+ m := language.NewMatcher(tags)
+
+ // A simple match.
+ fmt.Println(m.Match(language.Make("fr")))
+
+ // Australian English is closer to British than American English.
+ fmt.Println(m.Match(language.Make("en-AU")))
+
+ // Default to the first tag passed to the Matcher if there is no match.
+ fmt.Println(m.Match(language.Make("ar")))
+
+ // Get the default tag.
+ fmt.Println(m.Match())
+
+ fmt.Println("----")
+
+ // Someone specifying sr-Latn is probably fine with getting Croatian.
+ fmt.Println(m.Match(language.Make("sr-Latn")))
+
+ // We match SimplifiedChinese, but with Low confidence.
+ fmt.Println(m.Match(language.TraditionalChinese))
+
+ // Serbian in Latin script is a closer match to Croatian than Traditional
+ // Chinese to Simplified Chinese.
+ fmt.Println(m.Match(language.TraditionalChinese, language.Make("sr-Latn")))
+
+ fmt.Println("----")
+
+ // In case a multiple variants of a language are available, the most spoken
+ // variant is typically returned.
+ fmt.Println(m.Match(language.Portuguese))
+
+ // Pick the first value passed to Match in case of a tie.
+ fmt.Println(m.Match(language.Dutch, language.Make("fr-BE"), language.Make("af-NA")))
+ fmt.Println(m.Match(language.Dutch, language.Make("af-NA"), language.Make("fr-BE")))
+
+ fmt.Println("----")
+
+ // If a Matcher is initialized with a language and it's deprecated version,
+ // it will distinguish between them.
+ fmt.Println(m.Match(language.Raw.Make("iw")))
+
+ // However, for non-exact matches, it will treat deprecated versions as
+ // equivalent and consider other factors first.
+ fmt.Println(m.Match(language.Raw.Make("he-IL")))
+
+ fmt.Println("----")
+
+ // User settings passed to the Unicode extension are ignored for matching
+ // and preserved in the returned tag.
+ fmt.Println(m.Match(language.Make("de-u-co-phonebk"), language.Make("fr-u-cu-frf")))
+
+ // Even if the matching language is different.
+ fmt.Println(m.Match(language.Make("de-u-co-phonebk"), language.Make("br-u-cu-frf")))
+
+ // If there is no matching language, the options of the first preferred tag are used.
+ fmt.Println(m.Match(language.Make("de-u-co-phonebk")))
+
+ // Output:
+ // fr 2 Exact
+ // en-GB 1 High
+ // en 0 No
+ // en 0 No
+ // ----
+ // hr 6 High
+ // zh-Hans 7 Low
+ // hr 6 High
+ // ----
+ // pt-BR 4 High
+ // fr 2 High
+ // af 3 High
+ // ----
+ // iw 9 Exact
+ // he 10 Exact
+ // ----
+ // fr-u-cu-frf 2 Exact
+ // fr-u-cu-frf 2 High
+ // en-u-co-phonebk 0 No
+
+ // TODO: "he" should be "he-u-rg-IL High"
+}
+
+func ExampleMatchStrings() {
+ // languages supported by this service:
+ matcher := language.NewMatcher([]language.Tag{
+ language.English, language.Dutch, language.German,
+ })
+
+ http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
+ lang, _ := r.Cookie("lang")
+ tag, _ := language.MatchStrings(matcher, lang.String(), r.Header.Get("Accept-Language"))
+
+ fmt.Println("User language:", tag)
+ })
+}
+
+func ExampleComprehends() {
+ // Various levels of comprehensibility.
+ fmt.Println(language.Comprehends(language.English, language.English))
+ fmt.Println(language.Comprehends(language.AmericanEnglish, language.BritishEnglish))
+
+ // An explicit Und results in no match.
+ fmt.Println(language.Comprehends(language.English, language.Und))
+
+ fmt.Println("----")
+
+ // There is usually no mutual comprehensibility between different scripts.
+ fmt.Println(language.Comprehends(language.Make("en-Dsrt"), language.English))
+
+ // One exception is for Traditional versus Simplified Chinese, albeit with
+ // a low confidence.
+ fmt.Println(language.Comprehends(language.TraditionalChinese, language.SimplifiedChinese))
+
+ fmt.Println("----")
+
+ // A Swiss German speaker will often understand High German.
+ fmt.Println(language.Comprehends(language.Make("gsw"), language.Make("de")))
+
+ // The converse is not generally the case.
+ fmt.Println(language.Comprehends(language.Make("de"), language.Make("gsw")))
+
+ // Output:
+ // Exact
+ // High
+ // No
+ // ----
+ // No
+ // Low
+ // ----
+ // High
+ // No
+}
+
+func ExampleTag_values() {
+ us := language.MustParseRegion("US")
+ en := language.MustParseBase("en")
+
+ lang, _, region := language.AmericanEnglish.Raw()
+ fmt.Println(lang == en, region == us)
+
+ lang, _, region = language.BritishEnglish.Raw()
+ fmt.Println(lang == en, region == us)
+
+ // Tags can be compared for exact equivalence using '=='.
+ en_us, _ := language.Compose(en, us)
+ fmt.Println(en_us == language.AmericanEnglish)
+
+ // Output:
+ // true true
+ // true false
+ // true
+}
diff --git a/internal/language/compact/gen.go b/internal/language/compact/gen.go
new file mode 100644
index 0000000..5190040
--- /dev/null
+++ b/internal/language/compact/gen.go
@@ -0,0 +1,309 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+// Language tag table generator.
+// Data read from the web.
+
+package main
+
+import (
+ "flag"
+ "fmt"
+ "io"
+ "log"
+ "sort"
+ "strconv"
+ "strings"
+
+ "golang.org/x/text/internal/gen"
+ "golang.org/x/text/internal/language"
+ "golang.org/x/text/unicode/cldr"
+)
+
+var (
+ test = flag.Bool("test",
+ false,
+ "test existing tables; can be used to compare web data with package data.")
+ outputFile = flag.String("output",
+ "tables.go",
+ "output file for generated tables")
+)
+
+func main() {
+ gen.Init()
+
+ w := gen.NewCodeWriter()
+ defer w.WriteGoFile("tables.go", "language")
+
+ fmt.Fprintln(w, `import "golang.org/x/text/internal/language"`)
+
+ b := newBuilder(w)
+ gen.WriteCLDRVersion(w)
+
+ b.writeConstants()
+ b.writeCompactIndex()
+ b.writeMatchData()
+}
+
+type builder struct {
+ w *gen.CodeWriter
+ hw io.Writer // MultiWriter for w and w.Hash
+ data *cldr.CLDR
+ supp *cldr.SupplementalData
+}
+
+func (b *builder) langIndex(s string) uint16 {
+ return uint16(language.MustParseBase(s))
+}
+
+func (b *builder) regionIndex(s string) int {
+ return int(language.MustParseRegion(s))
+}
+
+func (b *builder) scriptIndex(s string) int {
+ return int(language.MustParseScript(s))
+}
+
+func newBuilder(w *gen.CodeWriter) *builder {
+ r := gen.OpenCLDRCoreZip()
+ defer r.Close()
+ d := &cldr.Decoder{}
+ data, err := d.DecodeZip(r)
+ if err != nil {
+ log.Fatal(err)
+ }
+ b := builder{
+ w: w,
+ hw: io.MultiWriter(w, w.Hash),
+ data: data,
+ supp: data.Supplemental(),
+ }
+ return &b
+}
+
+// writeConsts computes f(v) for all v in values and writes the results
+// as constants named _v to a single constant block.
+func (b *builder) writeConsts(f func(string) int, values ...string) {
+ fmt.Fprintln(b.w, "const (")
+ for _, v := range values {
+ fmt.Fprintf(b.w, "\t_%s = %v\n", v, f(v))
+ }
+ fmt.Fprintln(b.w, ")")
+}
+
+// TODO: region inclusion data will probably not be use used in future matchers.
+
+var langConsts = []string{
+ "de", "en", "fr", "it", "mo", "no", "nb", "pt", "sh", "mul", "und",
+}
+
+var scriptConsts = []string{
+ "Latn", "Hani", "Hans", "Hant", "Qaaa", "Qaai", "Qabx", "Zinh", "Zyyy",
+ "Zzzz",
+}
+
+var regionConsts = []string{
+ "001", "419", "BR", "CA", "ES", "GB", "MD", "PT", "UK", "US",
+ "ZZ", "XA", "XC", "XK", // Unofficial tag for Kosovo.
+}
+
+// writeLanguage generates all tables needed for language canonicalization.
+func (b *builder) writeConstants() {
+ b.writeConsts(func(s string) int { return int(b.langIndex(s)) }, langConsts...)
+ b.writeConsts(b.regionIndex, regionConsts...)
+ b.writeConsts(b.scriptIndex, scriptConsts...)
+}
+
+type mutualIntelligibility struct {
+ want, have uint16
+ distance uint8
+ oneway bool
+}
+
+type scriptIntelligibility struct {
+ wantLang, haveLang uint16
+ wantScript, haveScript uint8
+ distance uint8
+ // Always oneway
+}
+
+type regionIntelligibility struct {
+ lang uint16 // compact language id
+ script uint8 // 0 means any
+ group uint8 // 0 means any; if bit 7 is set it means inverse
+ distance uint8
+ // Always twoway.
+}
+
+// writeMatchData writes tables with languages and scripts for which there is
+// mutual intelligibility. The data is based on CLDR's languageMatching data.
+// Note that we use a different algorithm than the one defined by CLDR and that
+// we slightly modify the data. For example, we convert scores to confidence levels.
+// We also drop all region-related data as we use a different algorithm to
+// determine region equivalence.
+func (b *builder) writeMatchData() {
+ lm := b.supp.LanguageMatching.LanguageMatches
+ cldr.MakeSlice(&lm).SelectAnyOf("type", "written_new")
+
+ regionHierarchy := map[string][]string{}
+ for _, g := range b.supp.TerritoryContainment.Group {
+ regions := strings.Split(g.Contains, " ")
+ regionHierarchy[g.Type] = append(regionHierarchy[g.Type], regions...)
+ }
+ regionToGroups := make([]uint8, language.NumRegions)
+
+ idToIndex := map[string]uint8{}
+ for i, mv := range lm[0].MatchVariable {
+ if i > 6 {
+ log.Fatalf("Too many groups: %d", i)
+ }
+ idToIndex[mv.Id] = uint8(i + 1)
+ // TODO: also handle '-'
+ for _, r := range strings.Split(mv.Value, "+") {
+ todo := []string{r}
+ for k := 0; k < len(todo); k++ {
+ r := todo[k]
+ regionToGroups[b.regionIndex(r)] |= 1 << uint8(i)
+ todo = append(todo, regionHierarchy[r]...)
+ }
+ }
+ }
+ b.w.WriteVar("regionToGroups", regionToGroups)
+
+ // maps language id to in- and out-of-group region.
+ paradigmLocales := [][3]uint16{}
+ locales := strings.Split(lm[0].ParadigmLocales[0].Locales, " ")
+ for i := 0; i < len(locales); i += 2 {
+ x := [3]uint16{}
+ for j := 0; j < 2; j++ {
+ pc := strings.SplitN(locales[i+j], "-", 2)
+ x[0] = b.langIndex(pc[0])
+ if len(pc) == 2 {
+ x[1+j] = uint16(b.regionIndex(pc[1]))
+ }
+ }
+ paradigmLocales = append(paradigmLocales, x)
+ }
+ b.w.WriteVar("paradigmLocales", paradigmLocales)
+
+ b.w.WriteType(mutualIntelligibility{})
+ b.w.WriteType(scriptIntelligibility{})
+ b.w.WriteType(regionIntelligibility{})
+
+ matchLang := []mutualIntelligibility{}
+ matchScript := []scriptIntelligibility{}
+ matchRegion := []regionIntelligibility{}
+ // Convert the languageMatch entries in lists keyed by desired language.
+ for _, m := range lm[0].LanguageMatch {
+ // Different versions of CLDR use different separators.
+ desired := strings.Replace(m.Desired, "-", "_", -1)
+ supported := strings.Replace(m.Supported, "-", "_", -1)
+ d := strings.Split(desired, "_")
+ s := strings.Split(supported, "_")
+ if len(d) != len(s) {
+ log.Fatalf("not supported: desired=%q; supported=%q", desired, supported)
+ continue
+ }
+ distance, _ := strconv.ParseInt(m.Distance, 10, 8)
+ switch len(d) {
+ case 2:
+ if desired == supported && desired == "*_*" {
+ continue
+ }
+ // language-script pair.
+ matchScript = append(matchScript, scriptIntelligibility{
+ wantLang: uint16(b.langIndex(d[0])),
+ haveLang: uint16(b.langIndex(s[0])),
+ wantScript: uint8(b.scriptIndex(d[1])),
+ haveScript: uint8(b.scriptIndex(s[1])),
+ distance: uint8(distance),
+ })
+ if m.Oneway != "true" {
+ matchScript = append(matchScript, scriptIntelligibility{
+ wantLang: uint16(b.langIndex(s[0])),
+ haveLang: uint16(b.langIndex(d[0])),
+ wantScript: uint8(b.scriptIndex(s[1])),
+ haveScript: uint8(b.scriptIndex(d[1])),
+ distance: uint8(distance),
+ })
+ }
+ case 1:
+ if desired == supported && desired == "*" {
+ continue
+ }
+ if distance == 1 {
+ // nb == no is already handled by macro mapping. Check there
+ // really is only this case.
+ if d[0] != "no" || s[0] != "nb" {
+ log.Fatalf("unhandled equivalence %s == %s", s[0], d[0])
+ }
+ continue
+ }
+ // TODO: consider dropping oneway field and just doubling the entry.
+ matchLang = append(matchLang, mutualIntelligibility{
+ want: uint16(b.langIndex(d[0])),
+ have: uint16(b.langIndex(s[0])),
+ distance: uint8(distance),
+ oneway: m.Oneway == "true",
+ })
+ case 3:
+ if desired == supported && desired == "*_*_*" {
+ continue
+ }
+ if desired != supported {
+ // This is now supported by CLDR, but only one case, which
+ // should already be covered by paradigm locales. For instance,
+ // test case "und, en, en-GU, en-IN, en-GB ; en-ZA ; en-GB" in
+ // testdata/CLDRLocaleMatcherTest.txt tests this.
+ if supported != "en_*_GB" {
+ log.Fatalf("not supported: desired=%q; supported=%q", desired, supported)
+ }
+ continue
+ }
+ ri := regionIntelligibility{
+ lang: b.langIndex(d[0]),
+ distance: uint8(distance),
+ }
+ if d[1] != "*" {
+ ri.script = uint8(b.scriptIndex(d[1]))
+ }
+ switch {
+ case d[2] == "*":
+ ri.group = 0x80 // not contained in anything
+ case strings.HasPrefix(d[2], "$!"):
+ ri.group = 0x80
+ d[2] = "$" + d[2][len("$!"):]
+ fallthrough
+ case strings.HasPrefix(d[2], "$"):
+ ri.group |= idToIndex[d[2]]
+ }
+ matchRegion = append(matchRegion, ri)
+ default:
+ log.Fatalf("not supported: desired=%q; supported=%q", desired, supported)
+ }
+ }
+ sort.SliceStable(matchLang, func(i, j int) bool {
+ return matchLang[i].distance < matchLang[j].distance
+ })
+ b.w.WriteComment(`
+ matchLang holds pairs of langIDs of base languages that are typically
+ mutually intelligible. Each pair is associated with a confidence and
+ whether the intelligibility goes one or both ways.`)
+ b.w.WriteVar("matchLang", matchLang)
+
+ b.w.WriteComment(`
+ matchScript holds pairs of scriptIDs where readers of one script
+ can typically also read the other. Each is associated with a confidence.`)
+ sort.SliceStable(matchScript, func(i, j int) bool {
+ return matchScript[i].distance < matchScript[j].distance
+ })
+ b.w.WriteVar("matchScript", matchScript)
+
+ sort.SliceStable(matchRegion, func(i, j int) bool {
+ return matchRegion[i].distance < matchRegion[j].distance
+ })
+ b.w.WriteVar("matchRegion", matchRegion)
+}
diff --git a/internal/language/compact/gen_index.go b/internal/language/compact/gen_index.go
new file mode 100644
index 0000000..2a84a91
--- /dev/null
+++ b/internal/language/compact/gen_index.go
@@ -0,0 +1,108 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+package main
+
+// This file generates derivative tables based on the language package itself.
+
+import (
+ "fmt"
+ "log"
+ "sort"
+ "strings"
+
+ "golang.org/x/text/internal/language"
+)
+
+// Compact indices:
+// Note -va-X variants only apply to localization variants.
+// BCP variants only ever apply to language.
+// The only ambiguity between tags is with regions.
+
+func (b *builder) writeCompactIndex() {
+ // Collect all language tags for which we have any data in CLDR.
+ m := map[language.Tag]bool{}
+ for _, lang := range b.data.Locales() {
+ // We include all locales unconditionally to be consistent with en_US.
+ // We want en_US, even though it has no data associated with it.
+
+ // TODO: put any of the languages for which no data exists at the end
+ // of the index. This allows all components based on ICU to use that
+ // as the cutoff point.
+ // if x := data.RawLDML(lang); false ||
+ // x.LocaleDisplayNames != nil ||
+ // x.Characters != nil ||
+ // x.Delimiters != nil ||
+ // x.Measurement != nil ||
+ // x.Dates != nil ||
+ // x.Numbers != nil ||
+ // x.Units != nil ||
+ // x.ListPatterns != nil ||
+ // x.Collations != nil ||
+ // x.Segmentations != nil ||
+ // x.Rbnf != nil ||
+ // x.Annotations != nil ||
+ // x.Metadata != nil {
+
+ // TODO: support POSIX natively, albeit non-standard.
+ tag := language.Make(strings.Replace(lang, "_POSIX", "-u-va-posix", 1))
+ m[tag] = true
+ // }
+ }
+ // Include locales for plural rules, which uses a different structure.
+ for _, plurals := range b.supp.Plurals {
+ for _, rules := range plurals.PluralRules {
+ for _, lang := range strings.Split(rules.Locales, " ") {
+ m[language.Make(lang)] = true
+ }
+ }
+ }
+
+ var coreTags []language.CompactCoreInfo
+ var special []string
+
+ for t := range m {
+ if x := t.Extensions(); len(x) != 0 && fmt.Sprint(x) != "[u-va-posix]" {
+ log.Fatalf("Unexpected extension %v in %v", x, t)
+ }
+ if len(t.Variants()) == 0 && len(t.Extensions()) == 0 {
+ cci, ok := language.GetCompactCore(t)
+ if !ok {
+ log.Fatalf("Locale for non-basic language %q", t)
+ }
+ coreTags = append(coreTags, cci)
+ } else {
+ special = append(special, t.String())
+ }
+ }
+
+ w := b.w
+
+ sort.Slice(coreTags, func(i, j int) bool { return coreTags[i] < coreTags[j] })
+ sort.Strings(special)
+
+ w.WriteComment(`
+ NumCompactTags is the number of common tags. The maximum tag is
+ NumCompactTags-1.`)
+ w.WriteConst("NumCompactTags", len(m))
+
+ fmt.Fprintln(w, "const (")
+ for i, t := range coreTags {
+ fmt.Fprintf(w, "%s compactID = %d\n", ident(t.Tag().String()), i)
+ }
+ for i, t := range special {
+ fmt.Fprintf(w, "%s compactID = %d\n", ident(t), i+len(coreTags))
+ }
+ fmt.Fprintln(w, ")")
+
+ w.WriteVar("coreTags", coreTags)
+
+ w.WriteConst("specialTagsStr", strings.Join(special, " "))
+}
+
+func ident(s string) string {
+ return strings.Replace(s, "-", "", -1) + "Index"
+}
diff --git a/internal/language/compact/go1_1.go b/internal/language/compact/go1_1.go
new file mode 100644
index 0000000..380f4c0
--- /dev/null
+++ b/internal/language/compact/go1_1.go
@@ -0,0 +1,38 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !go1.2
+
+package language
+
+import "sort"
+
+func sortStable(s sort.Interface) {
+ ss := stableSort{
+ s: s,
+ pos: make([]int, s.Len()),
+ }
+ for i := range ss.pos {
+ ss.pos[i] = i
+ }
+ sort.Sort(&ss)
+}
+
+type stableSort struct {
+ s sort.Interface
+ pos []int
+}
+
+func (s *stableSort) Len() int {
+ return len(s.pos)
+}
+
+func (s *stableSort) Less(i, j int) bool {
+ return s.s.Less(i, j) || !s.s.Less(j, i) && s.pos[i] < s.pos[j]
+}
+
+func (s *stableSort) Swap(i, j int) {
+ s.s.Swap(i, j)
+ s.pos[i], s.pos[j] = s.pos[j], s.pos[i]
+}
diff --git a/internal/language/compact/go1_2.go b/internal/language/compact/go1_2.go
new file mode 100644
index 0000000..38268c5
--- /dev/null
+++ b/internal/language/compact/go1_2.go
@@ -0,0 +1,11 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build go1.2
+
+package language
+
+import "sort"
+
+var sortStable = sort.Stable
diff --git a/internal/language/compact/httpexample_test.go b/internal/language/compact/httpexample_test.go
new file mode 100644
index 0000000..03c0ab9
--- /dev/null
+++ b/internal/language/compact/httpexample_test.go
@@ -0,0 +1,48 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package language_test
+
+import (
+ "fmt"
+ "net/http"
+ "strings"
+
+ "golang.org/x/text/language"
+)
+
+// matcher is a language.Matcher configured for all supported languages.
+var matcher = language.NewMatcher([]language.Tag{
+ language.BritishEnglish,
+ language.Norwegian,
+ language.German,
+})
+
+// handler is a http.HandlerFunc.
+func handler(w http.ResponseWriter, r *http.Request) {
+ t, q, err := language.ParseAcceptLanguage(r.Header.Get("Accept-Language"))
+ // We ignore the error: the default language will be selected for t == nil.
+ tag, _, _ := matcher.Match(t...)
+ fmt.Printf("%17v (t: %6v; q: %3v; err: %v)\n", tag, t, q, err)
+}
+
+func ExampleParseAcceptLanguage() {
+ for _, al := range []string{
+ "nn;q=0.3, en-us;q=0.8, en,",
+ "gsw, en;q=0.7, en-US;q=0.8",
+ "gsw, nl, da",
+ "invalid",
+ } {
+ // Create dummy request with Accept-Language set and pass it to handler.
+ r, _ := http.NewRequest("GET", "example.com", strings.NewReader("Hello"))
+ r.Header.Set("Accept-Language", al)
+ handler(nil, r)
+ }
+
+ // Output:
+ // en-GB (t: [ en en-US nn]; q: [ 1 0.8 0.3]; err: <nil>)
+ // en-GB-u-rg-uszzzz (t: [ gsw en-US en]; q: [ 1 0.8 0.7]; err: <nil>)
+ // de (t: [ gsw nl da]; q: [ 1 1 1]; err: <nil>)
+ // en-GB (t: []; q: []; err: language: tag is not well-formed)
+}
diff --git a/internal/language/compact/language.go b/internal/language/compact/language.go
new file mode 100644
index 0000000..c4855b5
--- /dev/null
+++ b/internal/language/compact/language.go
@@ -0,0 +1,762 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:generate go run gen.go gen_index.go -output tables.go
+
+package language
+
+// TODO: Remove above NOTE after:
+// - verifying that tables are dropped correctly (most notably matcher tables).
+
+import (
+ "strings"
+
+ "golang.org/x/text/internal/language"
+)
+
+// Tag represents a BCP 47 language tag. It is used to specify an instance of a
+// specific language or locale. All language tag values are guaranteed to be
+// well-formed.
+type Tag struct {
+ language compactID
+ locale compactID
+ full fullTag // always a language.Tag for now.
+}
+
+type fullTag interface {
+ IsRoot() bool
+ Parent() language.Tag
+}
+
+func makeTag(t language.Tag) (tag Tag) {
+ if region := t.TypeForKey("rg"); len(region) == 6 && region[2:] == "zzzz" {
+ if r, err := language.ParseRegion(region[:2]); err == nil {
+ tFull := t
+ t, _ = t.SetTypeForKey("rg", "")
+ // TODO: should we not consider "va" for the language tag?
+ var exact1, exact2 bool
+ tag.language, exact1 = compactIndex(t)
+ t.RegionID = r
+ tag.locale, exact2 = compactIndex(t)
+ if !exact1 || !exact2 {
+ tag.full = tFull
+ }
+ return tag
+ }
+ }
+ lang, ok := compactIndex(t)
+ tag.language = lang
+ tag.locale = lang
+ if !ok {
+ tag.full = t
+ }
+ return tag
+}
+
+func (t *Tag) tag() language.Tag {
+ if t.full != nil {
+ return t.full.(language.Tag)
+ }
+ tag := t.language.tag()
+ if t.language != t.locale {
+ loc := t.locale.tag()
+ tag, _ = tag.SetTypeForKey("rg", strings.ToLower(loc.RegionID.String())+"zzzz")
+ }
+ return tag
+}
+
+func (t *Tag) mayHaveVariants() bool {
+ return t.full != nil || int(t.language) >= len(coreTags)
+}
+
+func (t *Tag) mayHaveExtensions() bool {
+ return t.full != nil ||
+ int(t.language) >= len(coreTags) ||
+ t.language != t.locale
+}
+
+// TODO: improve performance.
+func (t *Tag) lang() language.Language { return t.tag().LangID }
+func (t *Tag) region() language.Region { return t.tag().RegionID }
+func (t *Tag) script() language.Script { return t.tag().ScriptID }
+
+// Make is a convenience wrapper for Parse that omits the error.
+// In case of an error, a sensible default is returned.
+func Make(s string) Tag {
+ return Default.Make(s)
+}
+
+// Make is a convenience wrapper for c.Parse that omits the error.
+// In case of an error, a sensible default is returned.
+func (c CanonType) Make(s string) Tag {
+ t, _ := c.Parse(s)
+ return t
+}
+
+// Raw returns the raw base language, script and region, without making an
+// attempt to infer their values.
+func (t Tag) Raw() (b Base, s Script, r Region) {
+ tt := t.tag()
+ return Base{tt.LangID}, Script{tt.ScriptID}, Region{tt.RegionID}
+}
+
+// IsRoot returns true if t is equal to language "und".
+func (t Tag) IsRoot() bool {
+ if t.full != nil {
+ return t.full.IsRoot()
+ }
+ return t.language == _und
+}
+
+// CanonType can be used to enable or disable various types of canonicalization.
+type CanonType int
+
+const (
+ // Replace deprecated base languages with their preferred replacements.
+ DeprecatedBase CanonType = 1 << iota
+ // Replace deprecated scripts with their preferred replacements.
+ DeprecatedScript
+ // Replace deprecated regions with their preferred replacements.
+ DeprecatedRegion
+ // Remove redundant scripts.
+ SuppressScript
+ // Normalize legacy encodings. This includes legacy languages defined in
+ // CLDR as well as bibliographic codes defined in ISO-639.
+ Legacy
+ // Map the dominant language of a macro language group to the macro language
+ // subtag. For example cmn -> zh.
+ Macro
+ // The CLDR flag should be used if full compatibility with CLDR is required.
+ // There are a few cases where language.Tag may differ from CLDR. To follow all
+ // of CLDR's suggestions, use All|CLDR.
+ CLDR
+
+ // Raw can be used to Compose or Parse without Canonicalization.
+ Raw CanonType = 0
+
+ // Replace all deprecated tags with their preferred replacements.
+ Deprecated = DeprecatedBase | DeprecatedScript | DeprecatedRegion
+
+ // All canonicalizations recommended by BCP 47.
+ BCP47 = Deprecated | SuppressScript
+
+ // All canonicalizations.
+ All = BCP47 | Legacy | Macro
+
+ // Default is the canonicalization used by Parse, Make and Compose. To
+ // preserve as much information as possible, canonicalizations that remove
+ // potentially valuable information are not included. The Matcher is
+ // designed to recognize similar tags that would be the same if
+ // they were canonicalized using All.
+ Default = Deprecated | Legacy
+
+ canonLang = DeprecatedBase | Legacy | Macro
+
+ // TODO: LikelyScript, LikelyRegion: suppress similar to ICU.
+)
+
+// canonicalize returns the canonicalized equivalent of the tag and
+// whether there was any change.
+func canonicalize(c CanonType, t language.Tag) (language.Tag, bool) {
+ if c == Raw {
+ return t, false
+ }
+ changed := false
+ if c&SuppressScript != 0 {
+ if t.LangID.SuppressScript() == t.ScriptID {
+ t.ScriptID = 0
+ changed = true
+ }
+ }
+ if c&canonLang != 0 {
+ for {
+ if l, aliasType := t.LangID.Canonicalize(); l != t.LangID {
+ switch aliasType {
+ case language.Legacy:
+ if c&Legacy != 0 {
+ if t.LangID == _sh && t.ScriptID == 0 {
+ t.ScriptID = _Latn
+ }
+ t.LangID = l
+ changed = true
+ }
+ case language.Macro:
+ if c&Macro != 0 {
+ // We deviate here from CLDR. The mapping "nb" -> "no"
+ // qualifies as a typical Macro language mapping. However,
+ // for legacy reasons, CLDR maps "no", the macro language
+ // code for Norwegian, to the dominant variant "nb". This
+ // change is currently under consideration for CLDR as well.
+ // See http://unicode.org/cldr/trac/ticket/2698 and also
+ // http://unicode.org/cldr/trac/ticket/1790 for some of the
+ // practical implications. TODO: this check could be removed
+ // if CLDR adopts this change.
+ if c&CLDR == 0 || t.LangID != _nb {
+ changed = true
+ t.LangID = l
+ }
+ }
+ case language.Deprecated:
+ if c&DeprecatedBase != 0 {
+ if t.LangID == _mo && t.RegionID == 0 {
+ t.RegionID = _MD
+ }
+ t.LangID = l
+ changed = true
+ // Other canonicalization types may still apply.
+ continue
+ }
+ }
+ } else if c&Legacy != 0 && t.LangID == _no && c&CLDR != 0 {
+ t.LangID = _nb
+ changed = true
+ }
+ break
+ }
+ }
+ if c&DeprecatedScript != 0 {
+ if t.ScriptID == _Qaai {
+ changed = true
+ t.ScriptID = _Zinh
+ }
+ }
+ if c&DeprecatedRegion != 0 {
+ if r := t.RegionID.Canonicalize(); r != t.RegionID {
+ changed = true
+ t.RegionID = r
+ }
+ }
+ return t, changed
+}
+
+// Canonicalize returns the canonicalized equivalent of the tag.
+func (c CanonType) Canonicalize(t Tag) (Tag, error) {
+ // First try fast path.
+ if t.full == nil {
+ if _, changed := canonicalize(c, t.language.tag()); !changed {
+ return t, nil
+ }
+ }
+ // It is unlikely that one will canonicalize a tag after matching. So do
+ // a slow but simple approach here.
+ if tag, changed := canonicalize(c, t.tag()); changed {
+ tag.RemakeString()
+ return makeTag(tag), nil
+ }
+ return t, nil
+
+}
+
+// Confidence indicates the level of certainty for a given return value.
+// For example, Serbian may be written in Cyrillic or Latin script.
+// The confidence level indicates whether a value was explicitly specified,
+// whether it is typically the only possible value, or whether there is
+// an ambiguity.
+type Confidence int
+
+const (
+ No Confidence = iota // full confidence that there was no match
+ Low // most likely value picked out of a set of alternatives
+ High // value is generally assumed to be the correct match
+ Exact // exact match or explicitly specified value
+)
+
+var confName = []string{"No", "Low", "High", "Exact"}
+
+func (c Confidence) String() string {
+ return confName[c]
+}
+
+// String returns the canonical string representation of the language tag.
+func (t Tag) String() string {
+ return t.tag().String()
+}
+
+// MarshalText implements encoding.TextMarshaler.
+func (t Tag) MarshalText() (text []byte, err error) {
+ return t.tag().MarshalText()
+}
+
+// UnmarshalText implements encoding.TextUnmarshaler.
+func (t *Tag) UnmarshalText(text []byte) error {
+ var tag language.Tag
+ err := tag.UnmarshalText(text)
+ *t = makeTag(tag)
+ return err
+}
+
+// Base returns the base language of the language tag. If the base language is
+// unspecified, an attempt will be made to infer it from the context.
+// It uses a variant of CLDR's Add Likely Subtags algorithm. This is subject to change.
+func (t Tag) Base() (Base, Confidence) {
+ if b := t.lang(); b != 0 {
+ return Base{b}, Exact
+ }
+ tt := t.tag()
+ c := High
+ if tt.ScriptID == 0 && !tt.RegionID.IsCountry() {
+ c = Low
+ }
+ if tag, err := tt.Maximize(); err == nil && tag.LangID != 0 {
+ return Base{tag.LangID}, c
+ }
+ return Base{0}, No
+}
+
+// Script infers the script for the language tag. If it was not explicitly given, it will infer
+// a most likely candidate.
+// If more than one script is commonly used for a language, the most likely one
+// is returned with a low confidence indication. For example, it returns (Cyrl, Low)
+// for Serbian.
+// If a script cannot be inferred (Zzzz, No) is returned. We do not use Zyyy (undetermined)
+// as one would suspect from the IANA registry for BCP 47. In a Unicode context Zyyy marks
+// common characters (like 1, 2, 3, '.', etc.) and is therefore more like multiple scripts.
+// See http://www.unicode.org/reports/tr24/#Values for more details. Zzzz is also used for
+// unknown value in CLDR. (Zzzz, Exact) is returned if Zzzz was explicitly specified.
+// Note that an inferred script is never guaranteed to be the correct one. Latin is
+// almost exclusively used for Afrikaans, but Arabic has been used for some texts
+// in the past. Also, the script that is commonly used may change over time.
+// It uses a variant of CLDR's Add Likely Subtags algorithm. This is subject to change.
+func (t Tag) Script() (Script, Confidence) {
+ if scr := t.script(); scr != 0 {
+ return Script{scr}, Exact
+ }
+ tt := t.tag()
+ sc, c := language.Script(_Zzzz), No
+ if scr := tt.LangID.SuppressScript(); scr != 0 {
+ // Note: it is not always the case that a language with a suppress
+ // script value is only written in one script (e.g. kk, ms, pa).
+ if tt.RegionID == 0 {
+ return Script{scr}, High
+ }
+ sc, c = scr, High
+ }
+ if tag, err := tt.Maximize(); err == nil {
+ if tag.ScriptID != sc {
+ sc, c = tag.ScriptID, Low
+ }
+ } else {
+ tt, _ = canonicalize(Deprecated|Macro, tt)
+ if tag, err := tt.Maximize(); err == nil && tag.ScriptID != sc {
+ sc, c = tag.ScriptID, Low
+ }
+ }
+ return Script{sc}, c
+}
+
+// Region returns the region for the language tag. If it was not explicitly given, it will
+// infer a most likely candidate from the context.
+// It uses a variant of CLDR's Add Likely Subtags algorithm. This is subject to change.
+func (t Tag) Region() (Region, Confidence) {
+ if r := t.region(); r != 0 {
+ return Region{r}, Exact
+ }
+ tt := t.tag()
+ if tt, err := tt.Maximize(); err == nil {
+ return Region{tt.RegionID}, Low // TODO: differentiate between high and low.
+ }
+ tt, _ = canonicalize(Deprecated|Macro, tt)
+ if tag, err := tt.Maximize(); err == nil {
+ return Region{tag.RegionID}, Low
+ }
+ return Region{_ZZ}, No // TODO: return world instead of undetermined?
+}
+
+// Variants returns the variants specified explicitly for this language tag.
+// or nil if no variant was specified.
+func (t Tag) Variants() []Variant {
+ if !t.mayHaveVariants() {
+ return nil
+ }
+ v := []Variant{}
+ x, str := "", t.tag().Variants()
+ for str != "" {
+ x, str = nextToken(str)
+ v = append(v, Variant{x})
+ }
+ return v
+}
+
+// Parent returns the CLDR parent of t. In CLDR, missing fields in data for a
+// specific language are substituted with fields from the parent language.
+// The parent for a language may change for newer versions of CLDR.
+func (t Tag) Parent() Tag {
+ if t.full != nil {
+ return makeTag(t.full.Parent())
+ }
+ if t.language != t.locale {
+ // Simulate stripping -u-rg-xxxxxx
+ return Tag{language: t.language, locale: t.language}
+ }
+ // TODO: use parent lookup table once cycle from internal package is
+ // removed. Probably by internalizing the table and declaring this fast
+ // enough.
+ // lang := compactID(internal.Parent(uint16(t.language)))
+ lang, _ := compactIndex(t.language.tag().Parent())
+ return Tag{language: lang, locale: lang}
+}
+
+// returns token t and the rest of the string.
+func nextToken(s string) (t, tail string) {
+ p := strings.Index(s[1:], "-")
+ if p == -1 {
+ return s[1:], ""
+ }
+ p++
+ return s[1:p], s[p:]
+}
+
+// Extension is a single BCP 47 extension.
+type Extension struct {
+ s string
+}
+
+// String returns the string representation of the extension, including the
+// type tag.
+func (e Extension) String() string {
+ return e.s
+}
+
+// ParseExtension parses s as an extension and returns it on success.
+func ParseExtension(s string) (e Extension, err error) {
+ ext, err := language.ParseExtension(s)
+ return Extension{ext}, err
+}
+
+// Type returns the one-byte extension type of e. It returns 0 for the zero
+// exception.
+func (e Extension) Type() byte {
+ if e.s == "" {
+ return 0
+ }
+ return e.s[0]
+}
+
+// Tokens returns the list of tokens of e.
+func (e Extension) Tokens() []string {
+ return strings.Split(e.s, "-")
+}
+
+// Extension returns the extension of type x for tag t. It will return
+// false for ok if t does not have the requested extension. The returned
+// extension will be invalid in this case.
+func (t Tag) Extension(x byte) (ext Extension, ok bool) {
+ if !t.mayHaveExtensions() {
+ return Extension{}, false
+ }
+ e, ok := t.tag().Extension(x)
+ return Extension{e}, ok
+}
+
+// Extensions returns all extensions of t.
+func (t Tag) Extensions() []Extension {
+ if !t.mayHaveExtensions() {
+ return nil
+ }
+ e := []Extension{}
+ for _, ext := range t.tag().Extensions() {
+ e = append(e, Extension{ext})
+ }
+ return e
+}
+
+// TypeForKey returns the type associated with the given key, where key and type
+// are of the allowed values defined for the Unicode locale extension ('u') in
+// http://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
+// TypeForKey will traverse the inheritance chain to get the correct value.
+func (t Tag) TypeForKey(key string) string {
+ if !t.mayHaveExtensions() {
+ if key != "rg" && key != "va" {
+ return ""
+ }
+ }
+ return t.tag().TypeForKey(key)
+}
+
+// SetTypeForKey returns a new Tag with the key set to type, where key and type
+// are of the allowed values defined for the Unicode locale extension ('u') in
+// http://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
+// An empty value removes an existing pair with the same key.
+func (t Tag) SetTypeForKey(key, value string) (Tag, error) {
+ tt, err := t.tag().SetTypeForKey(key, value)
+ return makeTag(tt), err
+}
+
+// CompactIndex returns an index, where 0 <= index < NumCompactTags, for tags
+// for which data exists in the text repository.The index will change over time
+// and should not be stored in persistent storage. If t does not match a compact
+// index, exact will be false and the compact index will be returned for the
+// first match after repeatedly taking the Parent of t.
+func CompactIndex(t Tag) (index int, exact bool) {
+ return int(t.language), t.full == nil
+}
+
+// TODO: make these functions and methods public once we settle on the API and
+//
+
+// regionalCompactIndex returns the CompactIndex for the regional variant of this
+// tag. This index is used to indicate region-specific overrides, such as
+// default currency, default calendar and week data, default time cycle, and
+// default measurement system and unit preferences.
+//
+// For instance, the tag en-GB-u-rg-uszzzz specifies British English with US
+// settings for currency, number formatting, etc. The CompactIndex for this tag
+// will be that for en-GB, while the regionalCompactIndex will be the one
+// corresponding to en-US.
+func regionalCompactIndex(t Tag) (index int, exact bool) {
+ return int(t.locale), t.full == nil
+}
+
+// languageTag returns t stripped of regional variant indicators.
+//
+// At the moment this means it is stripped of a regional and variant subtag "rg"
+// and "va" in the "u" extension.
+func (t Tag) languageTag() Tag {
+ if t.full == nil {
+ return Tag{language: t.language, locale: t.language}
+ }
+ tt := t.tag()
+ tt.SetTypeForKey("rg", "")
+ tt.SetTypeForKey("va", "")
+ return makeTag(tt)
+}
+
+// regionalTag returns the regional variant of the tag.
+//
+// At the moment this means that the region is set from the regional subtag
+// "rg" in the "u" extension.
+func (t Tag) regionalTag() Tag {
+ rt := Tag{language: t.locale, locale: t.locale}
+ if t.full == nil {
+ return rt
+ }
+ t, _ = Raw.Compose(rt, t.Variants(), t.Extensions())
+ t, _ = t.SetTypeForKey("rg", "")
+ return t
+}
+
+func compactIndex(t language.Tag) (index compactID, exact bool) {
+ // TODO: perhaps give more frequent tags a lower index.
+ // TODO: we could make the indexes stable. This will excluded some
+ // possibilities for optimization, so don't do this quite yet.
+ exact = true
+
+ b, s, r := t.Raw()
+ switch {
+ case t.HasString():
+ if t.IsPrivateUse() {
+ // We have no entries for user-defined tags.
+ return 0, false
+ }
+ hasExtra := false
+ if t.HasVariants() {
+ if t.HasExtensions() {
+ build := language.Builder{}
+ build.SetTag(language.Tag{LangID: b, ScriptID: s, RegionID: r})
+ build.AddVariant(t.Variants())
+ exact = false
+ t = build.Make()
+ }
+ hasExtra = true
+ } else if _, ok := t.Extension('u'); ok {
+ // TODO: va may mean something else. Consider not considering it.
+ // Strip all but the 'va' entry.
+ old := t
+ variant := t.TypeForKey("va")
+ t = language.Tag{LangID: b, ScriptID: s, RegionID: r}
+ if variant != "" {
+ t, _ = t.SetTypeForKey("va", variant)
+ hasExtra = true
+ }
+ exact = old == t
+ } else {
+ exact = false
+ }
+ if hasExtra {
+ // We have some variants.
+ for i, s := range specialTags {
+ if s == t {
+ return compactID(i + len(coreTags)), exact
+ }
+ }
+ exact = false
+ }
+ }
+ if x, ok := getCoreIndex(t); ok {
+ return x, exact
+ }
+ exact = false
+ if r != 0 && s == 0 {
+ // Deal with cases where an extra script is inserted for the region.
+ t, _ := t.Maximize()
+ if x, ok := getCoreIndex(t); ok {
+ return x, exact
+ }
+ }
+ for t = t.Parent(); t != root; t = t.Parent() {
+ // No variants specified: just compare core components.
+ // The key has the form lllssrrr, where l, s, and r are nibbles for
+ // respectively the langID, scriptID, and regionID.
+ if x, ok := getCoreIndex(t); ok {
+ return x, exact
+ }
+ }
+ return 0, exact
+}
+
+var root = language.Tag{}
+
+// Base is an ISO 639 language code, used for encoding the base language
+// of a language tag.
+type Base struct {
+ langID language.Language
+}
+
+// ParseBase parses a 2- or 3-letter ISO 639 code.
+// It returns a ValueError if s is a well-formed but unknown language identifier
+// or another error if another error occurred.
+func ParseBase(s string) (Base, error) {
+ l, err := language.ParseBase(s)
+ return Base{l}, err
+}
+
+// String returns the BCP 47 representation of the base language.
+func (b Base) String() string {
+ return b.langID.String()
+}
+
+// ISO3 returns the ISO 639-3 language code.
+func (b Base) ISO3() string {
+ return b.langID.ISO3()
+}
+
+// IsPrivateUse reports whether this language code is reserved for private use.
+func (b Base) IsPrivateUse() bool {
+ return b.langID.IsPrivateUse()
+}
+
+// Script is a 4-letter ISO 15924 code for representing scripts.
+// It is idiomatically represented in title case.
+type Script struct {
+ scriptID language.Script
+}
+
+// ParseScript parses a 4-letter ISO 15924 code.
+// It returns a ValueError if s is a well-formed but unknown script identifier
+// or another error if another error occurred.
+func ParseScript(s string) (Script, error) {
+ sc, err := language.ParseScript(s)
+ return Script{sc}, err
+}
+
+// String returns the script code in title case.
+// It returns "Zzzz" for an unspecified script.
+func (s Script) String() string {
+ return s.scriptID.String()
+}
+
+// IsPrivateUse reports whether this script code is reserved for private use.
+func (s Script) IsPrivateUse() bool {
+ return s.scriptID.IsPrivateUse()
+}
+
+// Region is an ISO 3166-1 or UN M.49 code for representing countries and regions.
+type Region struct {
+ regionID language.Region
+}
+
+// EncodeM49 returns the Region for the given UN M.49 code.
+// It returns an error if r is not a valid code.
+func EncodeM49(r int) (Region, error) {
+ rid, err := language.EncodeM49(r)
+ return Region{rid}, err
+}
+
+// ParseRegion parses a 2- or 3-letter ISO 3166-1 or a UN M.49 code.
+// It returns a ValueError if s is a well-formed but unknown region identifier
+// or another error if another error occurred.
+func ParseRegion(s string) (Region, error) {
+ r, err := language.ParseRegion(s)
+ return Region{r}, err
+}
+
+// String returns the BCP 47 representation for the region.
+// It returns "ZZ" for an unspecified region.
+func (r Region) String() string {
+ return r.regionID.String()
+}
+
+// ISO3 returns the 3-letter ISO code of r.
+// Note that not all regions have a 3-letter ISO code.
+// In such cases this method returns "ZZZ".
+func (r Region) ISO3() string {
+ return r.regionID.String()
+}
+
+// M49 returns the UN M.49 encoding of r, or 0 if this encoding
+// is not defined for r.
+func (r Region) M49() int {
+ return r.regionID.M49()
+}
+
+// IsPrivateUse reports whether r has the ISO 3166 User-assigned status. This
+// may include private-use tags that are assigned by CLDR and used in this
+// implementation. So IsPrivateUse and IsCountry can be simultaneously true.
+func (r Region) IsPrivateUse() bool {
+ return r.regionID.IsPrivateUse()
+}
+
+// IsCountry returns whether this region is a country or autonomous area. This
+// includes non-standard definitions from CLDR.
+func (r Region) IsCountry() bool {
+ return r.regionID.IsCountry()
+}
+
+// IsGroup returns whether this region defines a collection of regions. This
+// includes non-standard definitions from CLDR.
+func (r Region) IsGroup() bool {
+ return r.regionID.IsGroup()
+}
+
+// Contains returns whether Region c is contained by Region r. It returns true
+// if c == r.
+func (r Region) Contains(c Region) bool {
+ return r.regionID.Contains(c.regionID)
+}
+
+// TLD returns the country code top-level domain (ccTLD). UK is returned for GB.
+// In all other cases it returns either the region itself or an error.
+//
+// This method may return an error for a region for which there exists a
+// canonical form with a ccTLD. To get that ccTLD canonicalize r first. The
+// region will already be canonicalized it was obtained from a Tag that was
+// obtained using any of the default methods.
+func (r Region) TLD() (Region, error) {
+ tld, err := r.regionID.TLD()
+ return Region{tld}, err
+}
+
+// Canonicalize returns the region or a possible replacement if the region is
+// deprecated. It will not return a replacement for deprecated regions that
+// are split into multiple regions.
+func (r Region) Canonicalize() Region {
+ return Region{r.regionID.Canonicalize()}
+}
+
+// Variant represents a registered variant of a language as defined by BCP 47.
+type Variant struct {
+ variant string
+}
+
+// ParseVariant parses and returns a Variant. An error is returned if s is not
+// a valid variant.
+func ParseVariant(s string) (Variant, error) {
+ v, err := language.ParseVariant(s)
+ return Variant{v.String()}, err
+}
+
+// String returns the string representation of the variant.
+func (v Variant) String() string {
+ return v.variant
+}
diff --git a/internal/language/compact/language_test.go b/internal/language/compact/language_test.go
new file mode 100644
index 0000000..20bc48c
--- /dev/null
+++ b/internal/language/compact/language_test.go
@@ -0,0 +1,844 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package language
+
+import (
+ "reflect"
+ "testing"
+)
+
+func TestTagSize(t *testing.T) {
+ id := Tag{}
+ typ := reflect.TypeOf(id)
+ if typ.Size() > 24 {
+ t.Errorf("size of Tag was %d; want 24", typ.Size())
+ }
+}
+
+func TestIsRoot(t *testing.T) {
+ loc := Tag{}
+ if !loc.IsRoot() {
+ t.Errorf("unspecified should be root.")
+ }
+ for i, tt := range parseTests() {
+ loc, _ := Parse(tt.in)
+ undef := tt.lang == "und" && tt.script == "" && tt.region == "" && tt.ext == ""
+ if loc.IsRoot() != undef {
+ t.Errorf("%d: was %v; want %v", i, loc.IsRoot(), undef)
+ }
+ }
+}
+
+func TestEquality(t *testing.T) {
+ for i, tt := range parseTests() {
+ s := tt.in
+ tag := Make(s)
+ t1 := Make(tag.String())
+ if tag != t1 {
+ t.Errorf("%d:%s: equality test 1 failed\n got: %#v\nwant: %#v)", i, s, t1, tag)
+ }
+ t2, _ := Compose(tag)
+ if tag != t2 {
+ t.Errorf("%d:%s: equality test 2 failed\n got: %#v\nwant: %#v", i, s, t2, tag)
+ }
+ }
+}
+
+func TestString(t *testing.T) {
+ tests := []string{
+ "no-u-rg-dkzzzz",
+ }
+ for i, s := range tests {
+ tag := Make(s)
+ if tag.String() != s {
+ t.Errorf("%d:%s: got %s: want %s (%#v)", i, s, tag.String(), s, tag)
+ }
+ }
+}
+
+type compactTest struct {
+ tag string
+ index compactID
+ ok bool
+}
+
+var compactTests = []compactTest{
+ // TODO: these values will change with each CLDR update. This issue
+ // will be solved if we decide to fix the indexes.
+ {"und", undIndex, true},
+ {"ca-ES-valencia", caESvalenciaIndex, true},
+ {"ca-ES-valencia-u-va-posix", caESvalenciaIndex, false},
+ {"ca-ES-valencia-u-co-phonebk", caESvalenciaIndex, false},
+ {"ca-ES-valencia-u-co-phonebk-va-posix", caESvalenciaIndex, false},
+ {"x-klingon", 0, false},
+ {"en-US", enUSIndex, true},
+ {"en-US-u-va-posix", enUSuvaposixIndex, true},
+ {"en", enIndex, true},
+ {"en-u-co-phonebk", enIndex, false},
+ {"en-001", en001Index, true},
+ {"zh-Hant-HK", zhHantHKIndex, true},
+ {"zh-HK", zhHantHKIndex, false}, // maximized to zh-Hant-HK
+ {"nl-Beng", 0, false}, // parent skips script
+ {"nl-NO", nlIndex, false}, // region is ignored
+ {"nl-Latn-NO", nlIndex, false},
+ {"nl-Latn-NO-u-co-phonebk", nlIndex, false},
+ {"nl-Latn-NO-valencia", nlIndex, false},
+ {"nl-Latn-NO-oxendict", nlIndex, false},
+ {"sh", shIndex, true}, // From plural rules.
+}
+
+func TestCompactIndex(t *testing.T) {
+ tests := append(compactTests, []compactTest{
+ {"en-GB", enGBIndex, true},
+ {"en-GB-u-rg-uszzzz", enGBIndex, true},
+ {"en-GB-u-rg-USZZZZ", enGBIndex, true},
+ {"en-GB-u-rg-uszzzz-va-posix", enGBIndex, false},
+ {"en-GB-u-co-phonebk-rg-uszzzz", enGBIndex, false},
+ // Invalid region specifications are ignored.
+ {"en-GB-u-rg-usz-va-posix", enGBIndex, false},
+ {"en-GB-u-co-phonebk-rg-usz", enGBIndex, false},
+ }...)
+ for _, tt := range tests {
+ x, ok := CompactIndex(Raw.MustParse(tt.tag))
+ if compactID(x) != tt.index || ok != tt.ok {
+ t.Errorf("%s: got %d, %v; want %d %v", tt.tag, x, ok, tt.index, tt.ok)
+ }
+ }
+}
+
+func TestRegionalCompactIndex(t *testing.T) {
+ tests := append(compactTests, []compactTest{
+ {"en-GB", enGBIndex, true},
+ {"en-GB-u-rg-uszzzz", enUSIndex, true},
+ {"en-GB-u-rg-USZZZZ", enUSIndex, true},
+ // TODO: use different exact values for language and regional tag?
+ {"en-GB-u-rg-uszzzz-va-posix", enUSuvaposixIndex, false},
+ {"en-GB-u-co-phonebk-rg-uszzzz-va-posix", enUSuvaposixIndex, false},
+ {"en-GB-u-co-phonebk-rg-uszzzz", enUSIndex, false},
+ // Invalid region specifications are ignored.
+ {"en-GB-u-rg-usz-va-posix", enGBIndex, false},
+ {"en-GB-u-co-phonebk-rg-usz", enGBIndex, false},
+ }...)
+ for _, tt := range tests {
+ x, ok := regionalCompactIndex(Raw.MustParse(tt.tag))
+ if compactID(x) != tt.index || ok != tt.ok {
+ t.Errorf("%s: got %d, %v; want %d %v", tt.tag, x, ok, tt.index, tt.ok)
+ }
+ }
+}
+
+func TestMarshal(t *testing.T) {
+ testCases := []string{
+ // TODO: these values will change with each CLDR update. This issue
+ // will be solved if we decide to fix the indexes.
+ "und",
+ "ca-ES-valencia",
+ "ca-ES-valencia-u-va-posix",
+ "ca-ES-valencia-u-co-phonebk",
+ "ca-ES-valencia-u-co-phonebk-va-posix",
+ "x-klingon",
+ "en-US",
+ "en-US-u-va-posix",
+ "en",
+ "en-u-co-phonebk",
+ "en-001",
+ "sh",
+
+ "en-GB-u-rg-uszzzz",
+ "en-GB-u-rg-uszzzz-va-posix",
+ "en-GB-u-co-phonebk-rg-uszzzz",
+ // Invalid tags should also roundtrip.
+ "en-GB-u-co-phonebk-rg-uszz",
+ }
+ for _, tc := range testCases {
+ var tag Tag
+ err := tag.UnmarshalText([]byte(tc))
+ if err != nil {
+ t.Errorf("UnmarshalText(%q): unexpected error: %v", tc, err)
+ }
+ b, err := tag.MarshalText()
+ if err != nil {
+ t.Errorf("MarshalText(%q): unexpected error: %v", tc, err)
+ }
+ if got := string(b); got != tc {
+ t.Errorf("%s: got %q; want %q", tc, got, tc)
+ }
+ }
+}
+
+func TestBase(t *testing.T) {
+ tests := []struct {
+ loc, lang string
+ conf Confidence
+ }{
+ {"und", "en", Low},
+ {"x-abc", "und", No},
+ {"en", "en", Exact},
+ {"und-Cyrl", "ru", High},
+ // If a region is not included, the official language should be English.
+ {"und-US", "en", High},
+ // TODO: not-explicitly listed scripts should probably be und, No
+ // Modify addTags to return info on how the match was derived.
+ // {"und-Aghb", "und", No},
+ }
+ for i, tt := range tests {
+ loc, _ := Parse(tt.loc)
+ lang, conf := loc.Base()
+ if lang.String() != tt.lang {
+ t.Errorf("%d: language was %s; want %s", i, lang, tt.lang)
+ }
+ if conf != tt.conf {
+ t.Errorf("%d: confidence was %d; want %d", i, conf, tt.conf)
+ }
+ }
+}
+
+func TestParseBase(t *testing.T) {
+ tests := []struct {
+ in string
+ out string
+ ok bool
+ }{
+ {"en", "en", true},
+ {"EN", "en", true},
+ {"nld", "nl", true},
+ {"dut", "dut", true}, // bibliographic
+ {"aaj", "und", false}, // unknown
+ {"qaa", "qaa", true},
+ {"a", "und", false},
+ {"", "und", false},
+ {"aaaa", "und", false},
+ }
+ for i, tt := range tests {
+ x, err := ParseBase(tt.in)
+ if x.String() != tt.out || err == nil != tt.ok {
+ t.Errorf("%d:%s: was %s, %v; want %s, %v", i, tt.in, x, err == nil, tt.out, tt.ok)
+ }
+ if y, _, _ := Raw.Make(tt.out).Raw(); x != y {
+ t.Errorf("%d:%s: tag was %s; want %s", i, tt.in, x, y)
+ }
+ }
+}
+
+func TestScript(t *testing.T) {
+ tests := []struct {
+ loc, scr string
+ conf Confidence
+ }{
+ {"und", "Latn", Low},
+ {"en-Latn", "Latn", Exact},
+ {"en", "Latn", High},
+ {"sr", "Cyrl", Low},
+ {"kk", "Cyrl", High},
+ {"kk-CN", "Arab", Low},
+ {"cmn", "Hans", Low},
+ {"ru", "Cyrl", High},
+ {"ru-RU", "Cyrl", High},
+ {"yue", "Hant", Low},
+ {"x-abc", "Zzzz", Low},
+ {"und-zyyy", "Zyyy", Exact},
+ }
+ for i, tt := range tests {
+ loc, _ := Parse(tt.loc)
+ sc, conf := loc.Script()
+ if sc.String() != tt.scr {
+ t.Errorf("%d:%s: script was %s; want %s", i, tt.loc, sc, tt.scr)
+ }
+ if conf != tt.conf {
+ t.Errorf("%d:%s: confidence was %d; want %d", i, tt.loc, conf, tt.conf)
+ }
+ }
+}
+
+func TestParseScript(t *testing.T) {
+ tests := []struct {
+ in string
+ out string
+ ok bool
+ }{
+ {"Latn", "Latn", true},
+ {"zzzz", "Zzzz", true},
+ {"zyyy", "Zyyy", true},
+ {"Latm", "Zzzz", false},
+ {"Zzz", "Zzzz", false},
+ {"", "Zzzz", false},
+ {"Zzzxx", "Zzzz", false},
+ }
+ for i, tt := range tests {
+ x, err := ParseScript(tt.in)
+ if x.String() != tt.out || err == nil != tt.ok {
+ t.Errorf("%d:%s: was %s, %v; want %s, %v", i, tt.in, x, err == nil, tt.out, tt.ok)
+ }
+ if err == nil {
+ if _, y, _ := Raw.Make("und-" + tt.out).Raw(); x != y {
+ t.Errorf("%d:%s: tag was %s; want %s", i, tt.in, x, y)
+ }
+ }
+ }
+}
+
+func TestRegion(t *testing.T) {
+ tests := []struct {
+ loc, reg string
+ conf Confidence
+ }{
+ {"und", "US", Low},
+ {"en", "US", Low},
+ {"zh-Hant", "TW", Low},
+ {"en-US", "US", Exact},
+ {"cmn", "CN", Low},
+ {"ru", "RU", Low},
+ {"yue", "HK", Low},
+ {"x-abc", "ZZ", Low},
+ }
+ for i, tt := range tests {
+ loc, _ := Raw.Parse(tt.loc)
+ reg, conf := loc.Region()
+ if reg.String() != tt.reg {
+ t.Errorf("%d:%s: region was %s; want %s", i, tt.loc, reg, tt.reg)
+ }
+ if conf != tt.conf {
+ t.Errorf("%d:%s: confidence was %d; want %d", i, tt.loc, conf, tt.conf)
+ }
+ }
+}
+
+func TestEncodeM49(t *testing.T) {
+ tests := []struct {
+ m49 int
+ code string
+ ok bool
+ }{
+ {1, "001", true},
+ {840, "US", true},
+ {899, "ZZ", false},
+ }
+ for i, tt := range tests {
+ if r, err := EncodeM49(tt.m49); r.String() != tt.code || err == nil != tt.ok {
+ t.Errorf("%d:%d: was %s, %v; want %s, %v", i, tt.m49, r, err == nil, tt.code, tt.ok)
+ }
+ }
+ for i := 1; i <= 1000; i++ {
+ if r, err := EncodeM49(i); err == nil && r.M49() == 0 {
+ t.Errorf("%d has no error, but maps to undefined region", i)
+ }
+ }
+}
+
+func TestParseRegion(t *testing.T) {
+ tests := []struct {
+ in string
+ out string
+ ok bool
+ }{
+ {"001", "001", true},
+ {"840", "US", true},
+ {"899", "ZZ", false},
+ {"USA", "US", true},
+ {"US", "US", true},
+ {"BC", "ZZ", false},
+ {"C", "ZZ", false},
+ {"CCCC", "ZZ", false},
+ {"01", "ZZ", false},
+ }
+ for i, tt := range tests {
+ r, err := ParseRegion(tt.in)
+ if r.String() != tt.out || err == nil != tt.ok {
+ t.Errorf("%d:%s: was %s, %v; want %s, %v", i, tt.in, r, err == nil, tt.out, tt.ok)
+ }
+ if err == nil {
+ if _, _, y := Raw.Make("und-" + tt.out).Raw(); r != y {
+ t.Errorf("%d:%s: tag was %s; want %s", i, tt.in, r, y)
+ }
+ }
+ }
+}
+
+func TestIsCountry(t *testing.T) {
+ tests := []struct {
+ reg string
+ country bool
+ }{
+ {"US", true},
+ {"001", false},
+ {"958", false},
+ {"419", false},
+ {"203", true},
+ {"020", true},
+ {"900", false},
+ {"999", false},
+ {"QO", false},
+ {"EU", false},
+ {"AA", false},
+ {"XK", true},
+ }
+ for i, tt := range tests {
+ r, _ := ParseRegion(tt.reg)
+ if r.IsCountry() != tt.country {
+ t.Errorf("%d: IsCountry(%s) was %v; want %v", i, tt.reg, r.IsCountry(), tt.country)
+ }
+ }
+}
+
+func TestIsGroup(t *testing.T) {
+ tests := []struct {
+ reg string
+ group bool
+ }{
+ {"US", false},
+ {"001", true},
+ {"958", false},
+ {"419", true},
+ {"203", false},
+ {"020", false},
+ {"900", false},
+ {"999", false},
+ {"QO", true},
+ {"EU", true},
+ {"AA", false},
+ {"XK", false},
+ }
+ for i, tt := range tests {
+ r, _ := ParseRegion(tt.reg)
+ if r.IsGroup() != tt.group {
+ t.Errorf("%d: IsGroup(%s) was %v; want %v", i, tt.reg, r.IsGroup(), tt.group)
+ }
+ }
+}
+
+func TestContains(t *testing.T) {
+ tests := []struct {
+ enclosing, contained string
+ contains bool
+ }{
+ // A region contains itself.
+ {"US", "US", true},
+ {"001", "001", true},
+
+ // Direct containment.
+ {"001", "002", true},
+ {"039", "XK", true},
+ {"150", "XK", true},
+ {"EU", "AT", true},
+ {"QO", "AQ", true},
+
+ // Indirect containemnt.
+ {"001", "US", true},
+ {"001", "419", true},
+ {"001", "013", true},
+
+ // No containment.
+ {"US", "001", false},
+ {"155", "EU", false},
+ }
+ for i, tt := range tests {
+ r := MustParseRegion(tt.enclosing)
+ con := MustParseRegion(tt.contained)
+ if got := r.Contains(con); got != tt.contains {
+ t.Errorf("%d: %s.Contains(%s) was %v; want %v", i, tt.enclosing, tt.contained, got, tt.contains)
+ }
+ }
+}
+
+func TestRegionCanonicalize(t *testing.T) {
+ for i, tt := range []struct{ in, out string }{
+ {"UK", "GB"},
+ {"TP", "TL"},
+ {"QU", "EU"},
+ {"SU", "SU"},
+ {"VD", "VN"},
+ {"DD", "DE"},
+ } {
+ r := MustParseRegion(tt.in)
+ want := MustParseRegion(tt.out)
+ if got := r.Canonicalize(); got != want {
+ t.Errorf("%d: got %v; want %v", i, got, want)
+ }
+ }
+}
+
+func TestRegionTLD(t *testing.T) {
+ for _, tt := range []struct {
+ in, out string
+ ok bool
+ }{
+ {"EH", "EH", true},
+ {"FR", "FR", true},
+ {"TL", "TL", true},
+
+ // In ccTLD before in ISO.
+ {"GG", "GG", true},
+
+ // Non-standard assignment of ccTLD to ISO code.
+ {"GB", "UK", true},
+
+ // Exceptionally reserved in ISO and valid ccTLD.
+ {"UK", "UK", true},
+ {"AC", "AC", true},
+ {"EU", "EU", true},
+ {"SU", "SU", true},
+
+ // Exceptionally reserved in ISO and invalid ccTLD.
+ {"CP", "ZZ", false},
+ {"DG", "ZZ", false},
+ {"EA", "ZZ", false},
+ {"FX", "ZZ", false},
+ {"IC", "ZZ", false},
+ {"TA", "ZZ", false},
+
+ // Transitionally reserved in ISO (e.g. deprecated) but valid ccTLD as
+ // it is still being phased out.
+ {"AN", "AN", true},
+ {"TP", "TP", true},
+
+ // Transitionally reserved in ISO (e.g. deprecated) and invalid ccTLD.
+ // Defined in package language as it has a mapping in CLDR.
+ {"BU", "ZZ", false},
+ {"CS", "ZZ", false},
+ {"NT", "ZZ", false},
+ {"YU", "ZZ", false},
+ {"ZR", "ZZ", false},
+ // Not defined in package: SF.
+
+ // Indeterminately reserved in ISO.
+ // Defined in package language as it has a legacy mapping in CLDR.
+ {"DY", "ZZ", false},
+ {"RH", "ZZ", false},
+ {"VD", "ZZ", false},
+ // Not defined in package: EW, FL, JA, LF, PI, RA, RB, RC, RI, RL, RM,
+ // RN, RP, WG, WL, WV, and YV.
+
+ // Not assigned in ISO, but legacy definitions in CLDR.
+ {"DD", "ZZ", false},
+ {"YD", "ZZ", false},
+
+ // Normal mappings but somewhat special status in ccTLD.
+ {"BL", "BL", true},
+ {"MF", "MF", true},
+ {"BV", "BV", true},
+ {"SJ", "SJ", true},
+
+ // Have values when normalized, but not as is.
+ {"QU", "ZZ", false},
+
+ // ISO Private Use.
+ {"AA", "ZZ", false},
+ {"QM", "ZZ", false},
+ {"QO", "ZZ", false},
+ {"XA", "ZZ", false},
+ {"XK", "ZZ", false}, // Sometimes used for Kosovo, but invalid ccTLD.
+ } {
+ if tt.in == "" {
+ continue
+ }
+
+ r := MustParseRegion(tt.in)
+ var want Region
+ if tt.out != "ZZ" {
+ want = MustParseRegion(tt.out)
+ }
+ tld, err := r.TLD()
+ if got := err == nil; got != tt.ok {
+ t.Errorf("error(%v): got %v; want %v", r, got, tt.ok)
+ }
+ if tld != want {
+ t.Errorf("TLD(%v): got %v; want %v", r, tld, want)
+ }
+ }
+}
+
+func TestCanonicalize(t *testing.T) {
+ // TODO: do a full test using CLDR data in a separate regression test.
+ tests := []struct {
+ in, out string
+ option CanonType
+ }{
+ {"en-Latn", "en", SuppressScript},
+ {"sr-Cyrl", "sr-Cyrl", SuppressScript},
+ {"sh", "sr-Latn", Legacy},
+ {"sh-HR", "sr-Latn-HR", Legacy},
+ {"sh-Cyrl-HR", "sr-Cyrl-HR", Legacy},
+ {"tl", "fil", Legacy},
+ {"no", "no", Legacy},
+ {"no", "nb", Legacy | CLDR},
+ {"cmn", "cmn", Legacy},
+ {"cmn", "zh", Macro},
+ {"cmn-u-co-stroke", "zh-u-co-stroke", Macro},
+ {"yue", "yue", Macro},
+ {"nb", "no", Macro},
+ {"nb", "nb", Macro | CLDR},
+ {"no", "no", Macro},
+ {"no", "no", Macro | CLDR},
+ {"iw", "he", DeprecatedBase},
+ {"iw", "he", Deprecated | CLDR},
+ {"mo", "ro-MD", Deprecated}, // Adopted by CLDR as of version 25.
+ {"alb", "sq", Legacy}, // bibliographic
+ {"dut", "nl", Legacy}, // bibliographic
+ // As of CLDR 25, mo is no longer considered a legacy mapping.
+ {"mo", "mo", Legacy | CLDR},
+ {"und-AN", "und-AN", Deprecated},
+ {"und-YD", "und-YE", DeprecatedRegion},
+ {"und-YD", "und-YD", DeprecatedBase},
+ {"und-Qaai", "und-Zinh", DeprecatedScript},
+ {"und-Qaai", "und-Qaai", DeprecatedBase},
+ {"drh", "mn", All}, // drh -> khk -> mn
+
+ {"en-GB-u-rg-uszzzz", "en-GB-u-rg-uszzzz", Raw},
+ {"en-GB-u-rg-USZZZZ", "en-GB-u-rg-uszzzz", Raw},
+ // TODO: use different exact values for language and regional tag?
+ {"en-GB-u-rg-uszzzz-va-posix", "en-GB-u-rg-uszzzz-va-posix", Raw},
+ {"en-GB-u-rg-uszzzz-co-phonebk", "en-GB-u-co-phonebk-rg-uszzzz", Raw},
+ // Invalid region specifications are left as is.
+ {"en-GB-u-rg-usz", "en-GB-u-rg-usz", Raw},
+ {"en-GB-u-rg-usz-va-posix", "en-GB-u-rg-usz-va-posix", Raw},
+ {"en-GB-u-rg-usz-co-phonebk", "en-GB-u-co-phonebk-rg-usz", Raw},
+ }
+ for i, tt := range tests {
+ in, _ := Raw.Parse(tt.in)
+ in, _ = tt.option.Canonicalize(in)
+ if in.String() != tt.out {
+ t.Errorf("%d:%s: was %s; want %s", i, tt.in, in.String(), tt.out)
+ }
+ }
+ // Test idempotence.
+ for _, base := range Supported.BaseLanguages() {
+ tag, _ := Raw.Compose(base)
+ got, _ := All.Canonicalize(tag)
+ want, _ := All.Canonicalize(got)
+ if got != want {
+ t.Errorf("idem(%s): got %s; want %s", tag, got, want)
+ }
+ }
+}
+
+func TestTypeForKey(t *testing.T) {
+ tests := []struct{ key, in, out string }{
+ {"co", "en", ""},
+ {"co", "en-u-abc", ""},
+ {"co", "en-u-co-phonebk", "phonebk"},
+ {"co", "en-u-co-phonebk-cu-aud", "phonebk"},
+ {"co", "x-foo-u-co-phonebk", ""},
+ {"va", "en-US-u-va-posix", "posix"},
+ {"rg", "en-u-rg-gbzzzz", "gbzzzz"},
+ {"nu", "en-u-co-phonebk-nu-arabic", "arabic"},
+ {"kc", "cmn-u-co-stroke", ""},
+ }
+ for _, tt := range tests {
+ if v := Make(tt.in).TypeForKey(tt.key); v != tt.out {
+ t.Errorf("%q[%q]: was %q; want %q", tt.in, tt.key, v, tt.out)
+ }
+ }
+}
+
+func TestParent(t *testing.T) {
+ tests := []struct{ in, out string }{
+ // Strip variants and extensions first
+ {"de-u-co-phonebk", "de"},
+ {"de-1994", "de"},
+ {"de-Latn-1994", "de"}, // remove superfluous script.
+
+ // Ensure the canonical Tag for an entry is in the chain for base-script
+ // pairs.
+ {"zh-Hans", "zh"},
+
+ // Skip the script if it is the maximized version. CLDR files for the
+ // skipped tag are always empty.
+ {"zh-Hans-TW", "zh"},
+ {"zh-Hans-CN", "zh"},
+
+ // Insert the script if the maximized script is not the same as the
+ // maximized script of the base language.
+ {"zh-TW", "zh-Hant"},
+ {"zh-HK", "zh-Hant"},
+ {"zh-Hant-TW", "zh-Hant"},
+ {"zh-Hant-HK", "zh-Hant"},
+
+ // Non-default script skips to und.
+ // CLDR
+ {"az-Cyrl", "und"},
+ {"bs-Cyrl", "und"},
+ {"en-Dsrt", "und"},
+ {"ha-Arab", "und"},
+ {"mn-Mong", "und"},
+ {"pa-Arab", "und"},
+ {"shi-Latn", "und"},
+ {"sr-Latn", "und"},
+ {"uz-Arab", "und"},
+ {"uz-Cyrl", "und"},
+ {"vai-Latn", "und"},
+ {"zh-Hant", "und"},
+ // extra
+ {"nl-Cyrl", "und"},
+
+ // World english inherits from en-001.
+ {"en-150", "en-001"},
+ {"en-AU", "en-001"},
+ {"en-BE", "en-001"},
+ {"en-GG", "en-001"},
+ {"en-GI", "en-001"},
+ {"en-HK", "en-001"},
+ {"en-IE", "en-001"},
+ {"en-IM", "en-001"},
+ {"en-IN", "en-001"},
+ {"en-JE", "en-001"},
+ {"en-MT", "en-001"},
+ {"en-NZ", "en-001"},
+ {"en-PK", "en-001"},
+ {"en-SG", "en-001"},
+
+ // Spanish in Latin-American countries have es-419 as parent.
+ {"es-AR", "es-419"},
+ {"es-BO", "es-419"},
+ {"es-CL", "es-419"},
+ {"es-CO", "es-419"},
+ {"es-CR", "es-419"},
+ {"es-CU", "es-419"},
+ {"es-DO", "es-419"},
+ {"es-EC", "es-419"},
+ {"es-GT", "es-419"},
+ {"es-HN", "es-419"},
+ {"es-MX", "es-419"},
+ {"es-NI", "es-419"},
+ {"es-PA", "es-419"},
+ {"es-PE", "es-419"},
+ {"es-PR", "es-419"},
+ {"es-PY", "es-419"},
+ {"es-SV", "es-419"},
+ {"es-US", "es-419"},
+ {"es-UY", "es-419"},
+ {"es-VE", "es-419"},
+ // exceptions (according to CLDR)
+ {"es-CW", "es"},
+
+ // Inherit from pt-PT, instead of pt for these countries.
+ {"pt-AO", "pt-PT"},
+ {"pt-CV", "pt-PT"},
+ {"pt-GW", "pt-PT"},
+ {"pt-MO", "pt-PT"},
+ {"pt-MZ", "pt-PT"},
+ {"pt-ST", "pt-PT"},
+ {"pt-TL", "pt-PT"},
+
+ {"en-GB-u-co-phonebk-rg-uszzzz", "en-GB"},
+ {"en-GB-u-rg-uszzzz", "en-GB"},
+ {"en-US-u-va-posix", "en-US"},
+
+ // Difference between language and regional tag.
+ {"ca-ES-valencia", "ca-ES"},
+ {"ca-ES-valencia-u-rg-ptzzzz", "ca-ES"}, // t.full != nil
+ {"en-US-u-va-variant", "en-US"},
+ {"en-u-va-variant", "en"}, // t.full != nil
+ {"en-u-rg-gbzzzz", "en"},
+ {"en-US-u-rg-gbzzzz", "en-US"},
+ {"nl-US-u-rg-gbzzzz", "nl-US"}, // t.full != nil
+ }
+ for _, tt := range tests {
+ tag := Raw.MustParse(tt.in)
+ if p := Raw.MustParse(tt.out); p != tag.Parent() {
+ t.Errorf("%s: was %v; want %v", tt.in, tag.Parent(), p)
+ }
+ }
+}
+
+var (
+ // Tags without error that don't need to be changed.
+ benchBasic = []string{
+ "en",
+ "en-Latn",
+ "en-GB",
+ "za",
+ "zh-Hant",
+ "zh",
+ "zh-HK",
+ "ar-MK",
+ "en-CA",
+ "fr-CA",
+ "fr-CH",
+ "fr",
+ "lv",
+ "he-IT",
+ "tlh",
+ "ja",
+ "ja-Jpan",
+ "ja-Jpan-JP",
+ "de-1996",
+ "de-CH",
+ "sr",
+ "sr-Latn",
+ }
+ // Tags with extensions, not changes required.
+ benchExt = []string{
+ "x-a-b-c-d",
+ "x-aa-bbbb-cccccccc-d",
+ "en-x_cc-b-bbb-a-aaa",
+ "en-c_cc-b-bbb-a-aaa-x-x",
+ "en-u-co-phonebk",
+ "en-Cyrl-u-co-phonebk",
+ "en-US-u-co-phonebk-cu-xau",
+ "en-nedix-u-co-phonebk",
+ "en-t-t0-abcd",
+ "en-t-nl-latn",
+ "en-t-t0-abcd-x-a",
+ }
+ // Change, but not memory allocation required.
+ benchSimpleChange = []string{
+ "EN",
+ "i-klingon",
+ "en-latn",
+ "zh-cmn-Hans-CN",
+ "iw-NL",
+ }
+ // Change and memory allocation required.
+ benchChangeAlloc = []string{
+ "en-c_cc-b-bbb-a-aaa",
+ "en-u-cu-xua-co-phonebk",
+ "en-u-cu-xua-co-phonebk-a-cd",
+ "en-u-def-abc-cu-xua-co-phonebk",
+ "en-t-en-Cyrl-NL-1994",
+ "en-t-en-Cyrl-NL-1994-t0-abc-def",
+ }
+ // Tags that result in errors.
+ benchErr = []string{
+ // IllFormed
+ "x_A.-B-C_D",
+ "en-u-cu-co-phonebk",
+ "en-u-cu-xau-co",
+ "en-t-nl-abcd",
+ // Invalid
+ "xx",
+ "nl-Uuuu",
+ "nl-QB",
+ }
+ benchChange = append(benchSimpleChange, benchChangeAlloc...)
+ benchAll = append(append(append(benchBasic, benchExt...), benchChange...), benchErr...)
+)
+
+func doParse(b *testing.B, tag []string) {
+ for i := 0; i < b.N; i++ {
+ // Use the modulo instead of looping over all tags so that we get a somewhat
+ // meaningful ns/op.
+ Parse(tag[i%len(tag)])
+ }
+}
+
+func BenchmarkParse(b *testing.B) {
+ doParse(b, benchAll)
+}
+
+func BenchmarkParseBasic(b *testing.B) {
+ doParse(b, benchBasic)
+}
+
+func BenchmarkParseError(b *testing.B) {
+ doParse(b, benchErr)
+}
+
+func BenchmarkParseSimpleChange(b *testing.B) {
+ doParse(b, benchSimpleChange)
+}
+
+func BenchmarkParseChangeAlloc(b *testing.B) {
+ doParse(b, benchChangeAlloc)
+}
diff --git a/internal/language/compact/parse.go b/internal/language/compact/parse.go
new file mode 100644
index 0000000..d50c8aa
--- /dev/null
+++ b/internal/language/compact/parse.go
@@ -0,0 +1,228 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package language
+
+import (
+ "errors"
+ "strconv"
+ "strings"
+
+ "golang.org/x/text/internal/language"
+)
+
+// ValueError is returned by any of the parsing functions when the
+// input is well-formed but the respective subtag is not recognized
+// as a valid value.
+type ValueError interface {
+ error
+
+ // Subtag returns the subtag for which the error occurred.
+ Subtag() string
+}
+
+// Parse parses the given BCP 47 string and returns a valid Tag. If parsing
+// failed it returns an error and any part of the tag that could be parsed.
+// If parsing succeeded but an unknown value was found, it returns
+// ValueError. The Tag returned in this case is just stripped of the unknown
+// value. All other values are preserved. It accepts tags in the BCP 47 format
+// and extensions to this standard defined in
+// http://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
+// The resulting tag is canonicalized using the default canonicalization type.
+func Parse(s string) (t Tag, err error) {
+ return Default.Parse(s)
+}
+
+// Parse parses the given BCP 47 string and returns a valid Tag. If parsing
+// failed it returns an error and any part of the tag that could be parsed.
+// If parsing succeeded but an unknown value was found, it returns
+// ValueError. The Tag returned in this case is just stripped of the unknown
+// value. All other values are preserved. It accepts tags in the BCP 47 format
+// and extensions to this standard defined in
+// http://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
+// The resulting tag is canonicalized using the the canonicalization type c.
+func (c CanonType) Parse(s string) (t Tag, err error) {
+ tt, err := language.Parse(s)
+ if err != nil {
+ return makeTag(tt), err
+ }
+ tt, changed := canonicalize(c, tt)
+ if changed {
+ tt.RemakeString()
+ }
+ return makeTag(tt), err
+}
+
+// Compose creates a Tag from individual parts, which may be of type Tag, Base,
+// Script, Region, Variant, []Variant, Extension, []Extension or error. If a
+// Base, Script or Region or slice of type Variant or Extension is passed more
+// than once, the latter will overwrite the former. Variants and Extensions are
+// accumulated, but if two extensions of the same type are passed, the latter
+// will replace the former. For -u extensions, though, the key-type pairs are
+// added, where later values overwrite older ones. A Tag overwrites all former
+// values and typically only makes sense as the first argument. The resulting
+// tag is returned after canonicalizing using the Default CanonType. If one or
+// more errors are encountered, one of the errors is returned.
+func Compose(part ...interface{}) (t Tag, err error) {
+ return Default.Compose(part...)
+}
+
+// Compose creates a Tag from individual parts, which may be of type Tag, Base,
+// Script, Region, Variant, []Variant, Extension, []Extension or error. If a
+// Base, Script or Region or slice of type Variant or Extension is passed more
+// than once, the latter will overwrite the former. Variants and Extensions are
+// accumulated, but if two extensions of the same type are passed, the latter
+// will replace the former. For -u extensions, though, the key-type pairs are
+// added, where later values overwrite older ones. A Tag overwrites all former
+// values and typically only makes sense as the first argument. The resulting
+// tag is returned after canonicalizing using CanonType c. If one or more errors
+// are encountered, one of the errors is returned.
+func (c CanonType) Compose(part ...interface{}) (t Tag, err error) {
+ var b language.Builder
+ if err = update(&b, part...); err != nil {
+ return und, err
+ }
+ b.Tag, _ = canonicalize(c, b.Tag)
+ return makeTag(b.Make()), err
+}
+
+var errInvalidArgument = errors.New("invalid Extension or Variant")
+
+func update(b *language.Builder, part ...interface{}) (err error) {
+ for _, x := range part {
+ switch v := x.(type) {
+ case Tag:
+ b.SetTag(v.tag())
+ case Base:
+ b.Tag.LangID = v.langID
+ case Script:
+ b.Tag.ScriptID = v.scriptID
+ case Region:
+ b.Tag.RegionID = v.regionID
+ case Variant:
+ if v.variant == "" {
+ err = errInvalidArgument
+ break
+ }
+ b.AddVariant(v.variant)
+ case Extension:
+ if v.s == "" {
+ err = errInvalidArgument
+ break
+ }
+ b.SetExt(v.s)
+ case []Variant:
+ b.ClearVariants()
+ for _, v := range v {
+ b.AddVariant(v.variant)
+ }
+ case []Extension:
+ b.ClearExtensions()
+ for _, e := range v {
+ b.SetExt(e.s)
+ }
+ // TODO: support parsing of raw strings based on morphology or just extensions?
+ case error:
+ if v != nil {
+ err = v
+ }
+ }
+ }
+ return
+}
+
+var errInvalidWeight = errors.New("ParseAcceptLanguage: invalid weight")
+
+// ParseAcceptLanguage parses the contents of an Accept-Language header as
+// defined in http://www.ietf.org/rfc/rfc2616.txt and returns a list of Tags and
+// a list of corresponding quality weights. It is more permissive than RFC 2616
+// and may return non-nil slices even if the input is not valid.
+// The Tags will be sorted by highest weight first and then by first occurrence.
+// Tags with a weight of zero will be dropped. An error will be returned if the
+// input could not be parsed.
+func ParseAcceptLanguage(s string) (tag []Tag, q []float32, err error) {
+ var entry string
+ for s != "" {
+ if entry, s = split(s, ','); entry == "" {
+ continue
+ }
+
+ entry, weight := split(entry, ';')
+
+ // Scan the language.
+ t, err := Parse(entry)
+ if err != nil {
+ id, ok := acceptFallback[entry]
+ if !ok {
+ return nil, nil, err
+ }
+ t = makeTag(language.Tag{LangID: id})
+ }
+
+ // Scan the optional weight.
+ w := 1.0
+ if weight != "" {
+ weight = consume(weight, 'q')
+ weight = consume(weight, '=')
+ // consume returns the empty string when a token could not be
+ // consumed, resulting in an error for ParseFloat.
+ if w, err = strconv.ParseFloat(weight, 32); err != nil {
+ return nil, nil, errInvalidWeight
+ }
+ // Drop tags with a quality weight of 0.
+ if w <= 0 {
+ continue
+ }
+ }
+
+ tag = append(tag, t)
+ q = append(q, float32(w))
+ }
+ sortStable(&tagSort{tag, q})
+ return tag, q, nil
+}
+
+// consume removes a leading token c from s and returns the result or the empty
+// string if there is no such token.
+func consume(s string, c byte) string {
+ if s == "" || s[0] != c {
+ return ""
+ }
+ return strings.TrimSpace(s[1:])
+}
+
+func split(s string, c byte) (head, tail string) {
+ if i := strings.IndexByte(s, c); i >= 0 {
+ return strings.TrimSpace(s[:i]), strings.TrimSpace(s[i+1:])
+ }
+ return strings.TrimSpace(s), ""
+}
+
+// Add hack mapping to deal with a small number of cases that that occur
+// in Accept-Language (with reasonable frequency).
+var acceptFallback = map[string]language.Language{
+ "english": _en,
+ "deutsch": _de,
+ "italian": _it,
+ "french": _fr,
+ "*": _mul, // defined in the spec to match all languages.
+}
+
+type tagSort struct {
+ tag []Tag
+ q []float32
+}
+
+func (s *tagSort) Len() int {
+ return len(s.q)
+}
+
+func (s *tagSort) Less(i, j int) bool {
+ return s.q[i] > s.q[j]
+}
+
+func (s *tagSort) Swap(i, j int) {
+ s.tag[i], s.tag[j] = s.tag[j], s.tag[i]
+ s.q[i], s.q[j] = s.q[j], s.q[i]
+}
diff --git a/internal/language/compact/parse_test.go b/internal/language/compact/parse_test.go
new file mode 100644
index 0000000..2ff28bf
--- /dev/null
+++ b/internal/language/compact/parse_test.go
@@ -0,0 +1,390 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package language
+
+import (
+ "strings"
+ "testing"
+
+ "golang.org/x/text/internal/language"
+)
+
+// equalTags compares language, script and region subtags only.
+func (t Tag) equalTags(a Tag) bool {
+ return t.lang() == a.lang() &&
+ t.script() == a.script() &&
+ t.region() == a.region()
+}
+
+var errSyntax = language.ErrSyntax
+
+type parseTest struct {
+ i int // the index of this test
+ in string
+ lang, script, region string
+ variants, ext string
+ extList []string // only used when more than one extension is present
+ invalid bool
+ rewrite bool // special rewrite not handled by parseTag
+ changed bool // string needed to be reformatted
+}
+
+func parseTests() []parseTest {
+ tests := []parseTest{
+ {in: "root", lang: "und"},
+ {in: "und", lang: "und"},
+ {in: "en", lang: "en"},
+
+ {in: "en-US-u-va-posix", lang: "en", region: "US", ext: "u-va-posix"},
+ {in: "ca-ES-valencia", lang: "ca", region: "ES", variants: "valencia"},
+ {in: "en-US-u-rg-gbzzzz", lang: "en", region: "US", ext: "u-rg-gbzzzz"},
+
+ {in: "xy", lang: "und", invalid: true},
+ {in: "en-ZY", lang: "en", invalid: true},
+ {in: "gsw", lang: "gsw"},
+ {in: "sr_Latn", lang: "sr", script: "Latn"},
+ {in: "af-Arab", lang: "af", script: "Arab"},
+ {in: "nl-BE", lang: "nl", region: "BE"},
+ {in: "es-419", lang: "es", region: "419"},
+ {in: "und-001", lang: "und", region: "001"},
+ {in: "de-latn-be", lang: "de", script: "Latn", region: "BE"},
+ // Variants
+ {in: "de-1901", lang: "de", variants: "1901"},
+ // Accept with unsuppressed script.
+ {in: "de-Latn-1901", lang: "de", script: "Latn", variants: "1901"},
+ // Specialized.
+ {in: "sl-rozaj", lang: "sl", variants: "rozaj"},
+ {in: "sl-rozaj-lipaw", lang: "sl", variants: "rozaj-lipaw"},
+ {in: "sl-rozaj-biske", lang: "sl", variants: "rozaj-biske"},
+ {in: "sl-rozaj-biske-1994", lang: "sl", variants: "rozaj-biske-1994"},
+ {in: "sl-rozaj-1994", lang: "sl", variants: "rozaj-1994"},
+ // Maximum number of variants while adhering to prefix rules.
+ {in: "sl-rozaj-biske-1994-alalc97-fonipa-fonupa-fonxsamp", lang: "sl", variants: "rozaj-biske-1994-alalc97-fonipa-fonupa-fonxsamp"},
+
+ // Sorting.
+ {in: "sl-1994-biske-rozaj", lang: "sl", variants: "rozaj-biske-1994", changed: true},
+ {in: "sl-rozaj-biske-1994-alalc97-fonupa-fonipa-fonxsamp", lang: "sl", variants: "rozaj-biske-1994-alalc97-fonipa-fonupa-fonxsamp", changed: true},
+ {in: "nl-fonxsamp-alalc97-fonipa-fonupa", lang: "nl", variants: "alalc97-fonipa-fonupa-fonxsamp", changed: true},
+
+ // Duplicates variants are removed, but not an error.
+ {in: "nl-fonupa-fonupa", lang: "nl", variants: "fonupa"},
+
+ // Variants that do not have correct prefixes. We still accept these.
+ {in: "de-Cyrl-1901", lang: "de", script: "Cyrl", variants: "1901"},
+ {in: "sl-rozaj-lipaw-1994", lang: "sl", variants: "rozaj-lipaw-1994"},
+ {in: "sl-1994-biske-rozaj-1994-biske-rozaj", lang: "sl", variants: "rozaj-biske-1994", changed: true},
+ {in: "de-Cyrl-1901", lang: "de", script: "Cyrl", variants: "1901"},
+
+ // Invalid variant.
+ {in: "de-1902", lang: "de", variants: "", invalid: true},
+
+ {in: "EN_CYRL", lang: "en", script: "Cyrl"},
+ // private use and extensions
+ {in: "x-a-b-c-d", ext: "x-a-b-c-d"},
+ {in: "x_A.-B-C_D", ext: "x-b-c-d", invalid: true, changed: true},
+ {in: "x-aa-bbbb-cccccccc-d", ext: "x-aa-bbbb-cccccccc-d"},
+ {in: "en-c_cc-b-bbb-a-aaa", lang: "en", changed: true, extList: []string{"a-aaa", "b-bbb", "c-cc"}},
+ {in: "en-x_cc-b-bbb-a-aaa", lang: "en", ext: "x-cc-b-bbb-a-aaa", changed: true},
+ {in: "en-c_cc-b-bbb-a-aaa-x-x", lang: "en", changed: true, extList: []string{"a-aaa", "b-bbb", "c-cc", "x-x"}},
+ {in: "en-v-c", lang: "en", ext: "", invalid: true},
+ {in: "en-v-abcdefghi", lang: "en", ext: "", invalid: true},
+ {in: "en-v-abc-x", lang: "en", ext: "v-abc", invalid: true},
+ {in: "en-v-abc-x-", lang: "en", ext: "v-abc", invalid: true},
+ {in: "en-v-abc-w-x-xx", lang: "en", extList: []string{"v-abc", "x-xx"}, invalid: true, changed: true},
+ {in: "en-v-abc-w-y-yx", lang: "en", extList: []string{"v-abc", "y-yx"}, invalid: true, changed: true},
+ {in: "en-v-c-abc", lang: "en", ext: "c-abc", invalid: true, changed: true},
+ {in: "en-v-w-abc", lang: "en", ext: "w-abc", invalid: true, changed: true},
+ {in: "en-v-x-abc", lang: "en", ext: "x-abc", invalid: true, changed: true},
+ {in: "en-v-x-a", lang: "en", ext: "x-a", invalid: true, changed: true},
+ {in: "en-9-aa-0-aa-z-bb-x-a", lang: "en", extList: []string{"0-aa", "9-aa", "z-bb", "x-a"}, changed: true},
+ {in: "en-u-c", lang: "en", ext: "", invalid: true},
+ {in: "en-u-co-phonebk", lang: "en", ext: "u-co-phonebk"},
+ {in: "en-u-co-phonebk-ca", lang: "en", ext: "u-co-phonebk", invalid: true},
+ {in: "en-u-nu-arabic-co-phonebk-ca", lang: "en", ext: "u-co-phonebk-nu-arabic", invalid: true, changed: true},
+ {in: "en-u-nu-arabic-co-phonebk-ca-x", lang: "en", ext: "u-co-phonebk-nu-arabic", invalid: true, changed: true},
+ {in: "en-u-nu-arabic-co-phonebk-ca-s", lang: "en", ext: "u-co-phonebk-nu-arabic", invalid: true, changed: true},
+ {in: "en-u-nu-arabic-co-phonebk-ca-a12345678", lang: "en", ext: "u-co-phonebk-nu-arabic", invalid: true, changed: true},
+ {in: "en-u-co-phonebook", lang: "en", ext: "", invalid: true},
+ {in: "en-u-co-phonebook-cu-xau", lang: "en", ext: "u-cu-xau", invalid: true, changed: true},
+ {in: "en-Cyrl-u-co-phonebk", lang: "en", script: "Cyrl", ext: "u-co-phonebk"},
+ {in: "en-US-u-co-phonebk", lang: "en", region: "US", ext: "u-co-phonebk"},
+ {in: "en-US-u-co-phonebk-cu-xau", lang: "en", region: "US", ext: "u-co-phonebk-cu-xau"},
+ {in: "en-scotland-u-co-phonebk", lang: "en", variants: "scotland", ext: "u-co-phonebk"},
+ {in: "en-u-cu-xua-co-phonebk", lang: "en", ext: "u-co-phonebk-cu-xua", changed: true},
+ {in: "en-u-def-abc-cu-xua-co-phonebk", lang: "en", ext: "u-abc-def-co-phonebk-cu-xua", changed: true},
+ {in: "en-u-def-abc", lang: "en", ext: "u-abc-def", changed: true},
+ {in: "en-u-cu-xua-co-phonebk-a-cd", lang: "en", extList: []string{"a-cd", "u-co-phonebk-cu-xua"}, changed: true},
+ // Invalid "u" extension. Drop invalid parts.
+ {in: "en-u-cu-co-phonebk", lang: "en", extList: []string{"u-co-phonebk"}, invalid: true, changed: true},
+ {in: "en-u-cu-xau-co", lang: "en", extList: []string{"u-cu-xau"}, invalid: true},
+ // We allow duplicate keys as the LDML spec does not explicitly prohibit it.
+ // TODO: Consider eliminating duplicates and returning an error.
+ {in: "en-u-cu-xau-co-phonebk-cu-xau", lang: "en", ext: "u-co-phonebk-cu-xau", changed: true},
+ {in: "en-t-en-Cyrl-NL-fonipa", lang: "en", ext: "t-en-cyrl-nl-fonipa", changed: true},
+ {in: "en-t-en-Cyrl-NL-fonipa-t0-abc-def", lang: "en", ext: "t-en-cyrl-nl-fonipa-t0-abc-def", changed: true},
+ {in: "en-t-t0-abcd", lang: "en", ext: "t-t0-abcd"},
+ // Not necessary to have changed here.
+ {in: "en-t-nl-abcd", lang: "en", ext: "t-nl", invalid: true},
+ {in: "en-t-nl-latn", lang: "en", ext: "t-nl-latn"},
+ {in: "en-t-t0-abcd-x-a", lang: "en", extList: []string{"t-t0-abcd", "x-a"}},
+ // invalid
+ {in: "", lang: "und", invalid: true},
+ {in: "-", lang: "und", invalid: true},
+ {in: "x", lang: "und", invalid: true},
+ {in: "x-", lang: "und", invalid: true},
+ {in: "x--", lang: "und", invalid: true},
+ {in: "a-a-b-c-d", lang: "und", invalid: true},
+ {in: "en-", lang: "en", invalid: true},
+ {in: "enne-", lang: "und", invalid: true},
+ {in: "en.", lang: "und", invalid: true},
+ {in: "en.-latn", lang: "und", invalid: true},
+ {in: "en.-en", lang: "en", invalid: true},
+ {in: "x-a-tooManyChars-c-d", ext: "x-a-c-d", invalid: true, changed: true},
+ {in: "a-tooManyChars-c-d", lang: "und", invalid: true},
+ // TODO: check key-value validity
+ // { in: "en-u-cu-xd", lang: "en", ext: "u-cu-xd", invalid: true },
+ {in: "en-t-abcd", lang: "en", invalid: true},
+ {in: "en-Latn-US-en", lang: "en", script: "Latn", region: "US", invalid: true},
+ // rewrites (more tests in TestGrandfathered)
+ {in: "zh-min-nan", lang: "nan"},
+ {in: "zh-yue", lang: "yue"},
+ {in: "zh-xiang", lang: "hsn", rewrite: true},
+ {in: "zh-guoyu", lang: "cmn", rewrite: true},
+ {in: "iw", lang: "iw"},
+ {in: "sgn-BE-FR", lang: "sfb", rewrite: true},
+ {in: "i-klingon", lang: "tlh", rewrite: true},
+ }
+ for i, tt := range tests {
+ tests[i].i = i
+ if tt.extList != nil {
+ tests[i].ext = strings.Join(tt.extList, "-")
+ }
+ if tt.ext != "" && tt.extList == nil {
+ tests[i].extList = []string{tt.ext}
+ }
+ }
+ return tests
+}
+
+// partChecks runs checks for each part by calling the function returned by f.
+func partChecks(t *testing.T, f func(*parseTest) (Tag, bool)) {
+ for i, tt := range parseTests() {
+ tag, skip := f(&tt)
+ if skip {
+ continue
+ }
+ if l, _ := language.ParseBase(tt.lang); l != tag.lang() {
+ t.Errorf("%d: lang was %q; want %q", i, tag.lang(), l)
+ }
+ if sc, _ := language.ParseScript(tt.script); sc != tag.script() {
+ t.Errorf("%d: script was %q; want %q", i, tag.script(), sc)
+ }
+ if r, _ := language.ParseRegion(tt.region); r != tag.region() {
+ t.Errorf("%d: region was %q; want %q", i, tag.region(), r)
+ }
+ v := tag.tag().Variants()
+ if v != "" {
+ v = v[1:]
+ }
+ if v != tt.variants {
+ t.Errorf("%d: variants was %q; want %q", i, v, tt.variants)
+ }
+ if e := strings.Join(tag.tag().Extensions(), "-"); e != tt.ext {
+ t.Errorf("%d: extensions were %q; want %q", i, e, tt.ext)
+ }
+ }
+}
+
+func TestParse(t *testing.T) {
+ partChecks(t, func(tt *parseTest) (id Tag, skip bool) {
+ id, _ = Raw.Parse(tt.in)
+ return id, false
+ })
+}
+
+func TestErrors(t *testing.T) {
+ mkInvalid := func(s string) error {
+ return language.NewValueError([]byte(s))
+ }
+ tests := []struct {
+ in string
+ out error
+ }{
+ // invalid subtags.
+ {"ac", mkInvalid("ac")},
+ {"AC", mkInvalid("ac")},
+ {"aa-Uuuu", mkInvalid("Uuuu")},
+ {"aa-AB", mkInvalid("AB")},
+ // ill-formed wins over invalid.
+ {"ac-u", errSyntax},
+ {"ac-u-ca", errSyntax},
+ {"ac-u-ca-co-pinyin", errSyntax},
+ {"noob", errSyntax},
+ }
+ for _, tt := range tests {
+ _, err := Parse(tt.in)
+ if err != tt.out {
+ t.Errorf("%s: was %q; want %q", tt.in, err, tt.out)
+ }
+ }
+}
+
+func TestCompose1(t *testing.T) {
+ partChecks(t, func(tt *parseTest) (id Tag, skip bool) {
+ l, _ := ParseBase(tt.lang)
+ s, _ := ParseScript(tt.script)
+ r, _ := ParseRegion(tt.region)
+ v := []Variant{}
+ for _, x := range strings.Split(tt.variants, "-") {
+ p, _ := ParseVariant(x)
+ v = append(v, p)
+ }
+ e := []Extension{}
+ for _, x := range tt.extList {
+ p, _ := ParseExtension(x)
+ e = append(e, p)
+ }
+ id, _ = Raw.Compose(l, s, r, v, e)
+ return id, false
+ })
+}
+
+func TestCompose2(t *testing.T) {
+ partChecks(t, func(tt *parseTest) (id Tag, skip bool) {
+ l, _ := ParseBase(tt.lang)
+ s, _ := ParseScript(tt.script)
+ r, _ := ParseRegion(tt.region)
+ p := []interface{}{l, s, r, s, r, l}
+ for _, x := range strings.Split(tt.variants, "-") {
+ if x != "" {
+ v, _ := ParseVariant(x)
+ p = append(p, v)
+ }
+ }
+ for _, x := range tt.extList {
+ e, _ := ParseExtension(x)
+ p = append(p, e)
+ }
+ id, _ = Raw.Compose(p...)
+ return id, false
+ })
+}
+
+func TestCompose3(t *testing.T) {
+ partChecks(t, func(tt *parseTest) (id Tag, skip bool) {
+ id, _ = Raw.Parse(tt.in)
+ id, _ = Raw.Compose(id)
+ return id, false
+ })
+}
+
+func mk(s string) Tag {
+ return Raw.Make(s)
+}
+
+func TestParseAcceptLanguage(t *testing.T) {
+ type res struct {
+ t Tag
+ q float32
+ }
+ en := []res{{mk("en"), 1.0}}
+ tests := []struct {
+ out []res
+ in string
+ ok bool
+ }{
+ {en, "en", true},
+ {en, " en", true},
+ {en, "en ", true},
+ {en, " en ", true},
+ {en, "en,", true},
+ {en, ",en", true},
+ {en, ",,,en,,,", true},
+ {en, ",en;q=1", true},
+
+ // We allow an empty input, contrary to spec.
+ {nil, "", true},
+ {[]res{{mk("aa"), 1}}, "aa;", true}, // allow unspecified weight
+
+ // errors
+ {nil, ";", false},
+ {nil, "$", false},
+ {nil, "e;", false},
+ {nil, "x;", false},
+ {nil, "x", false},
+ {nil, "ac", false}, // non-existing language
+ {nil, "aa;q", false},
+ {nil, "aa;q=", false},
+ {nil, "aa;q=.", false},
+
+ // odd fallbacks
+ {
+ []res{{mk("en"), 0.1}},
+ " english ;q=.1",
+ true,
+ },
+ {
+ []res{{mk("it"), 1.0}, {mk("de"), 1.0}, {mk("fr"), 1.0}},
+ " italian, deutsch, french",
+ true,
+ },
+
+ // lists
+ {
+ []res{{mk("en"), 0.1}},
+ "en;q=.1",
+ true,
+ },
+ {
+ []res{{mk("mul"), 1.0}},
+ "*",
+ true,
+ },
+ {
+ []res{{mk("en"), 1.0}, {mk("de"), 1.0}},
+ "en,de",
+ true,
+ },
+ {
+ []res{{mk("en"), 1.0}, {mk("de"), .5}},
+ "en,de;q=0.5",
+ true,
+ },
+ {
+ []res{{mk("de"), 0.8}, {mk("en"), 0.5}},
+ " en ; q = 0.5 , , de;q=0.8",
+ true,
+ },
+ {
+ []res{{mk("en"), 1.0}, {mk("de"), 1.0}, {mk("fr"), 1.0}, {mk("tlh"), 1.0}},
+ "en,de,fr,i-klingon",
+ true,
+ },
+ // sorting
+ {
+ []res{{mk("tlh"), 0.4}, {mk("de"), 0.2}, {mk("fr"), 0.2}, {mk("en"), 0.1}},
+ "en;q=0.1,de;q=0.2,fr;q=0.2,i-klingon;q=0.4",
+ true,
+ },
+ // dropping
+ {
+ []res{{mk("fr"), 0.2}, {mk("en"), 0.1}},
+ "en;q=0.1,de;q=0,fr;q=0.2,i-klingon;q=0.0",
+ true,
+ },
+ }
+ for i, tt := range tests {
+ tags, qs, e := ParseAcceptLanguage(tt.in)
+ if e == nil != tt.ok {
+ t.Errorf("%d:%s:err: was %v; want %v", i, tt.in, e == nil, tt.ok)
+ }
+ for j, tag := range tags {
+ if out := tt.out[j]; !tag.equalTags(out.t) || qs[j] != out.q {
+ t.Errorf("%d:%s: was %s, %1f; want %s, %1f", i, tt.in, tag, qs[j], out.t, out.q)
+ break
+ }
+ }
+ }
+}
diff --git a/internal/language/compact/tables.go b/internal/language/compact/tables.go
new file mode 100644
index 0000000..1825af6
--- /dev/null
+++ b/internal/language/compact/tables.go
@@ -0,0 +1,1305 @@
+// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
+
+package language
+
+import "golang.org/x/text/internal/language"
+
+// CLDRVersion is the CLDR version from which the tables in this package are derived.
+const CLDRVersion = "32"
+
+const (
+ _de = 269
+ _en = 313
+ _fr = 350
+ _it = 505
+ _mo = 784
+ _no = 879
+ _nb = 839
+ _pt = 960
+ _sh = 1031
+ _mul = 806
+ _und = 0
+)
+const (
+ _001 = 1
+ _419 = 31
+ _BR = 65
+ _CA = 73
+ _ES = 110
+ _GB = 123
+ _MD = 188
+ _PT = 238
+ _UK = 306
+ _US = 309
+ _ZZ = 357
+ _XA = 323
+ _XC = 325
+ _XK = 333
+)
+const (
+ _Latn = 87
+ _Hani = 54
+ _Hans = 56
+ _Hant = 57
+ _Qaaa = 139
+ _Qaai = 147
+ _Qabx = 188
+ _Zinh = 236
+ _Zyyy = 241
+ _Zzzz = 242
+)
+
+// NumCompactTags is the number of common tags. The maximum tag is
+// NumCompactTags-1.
+const NumCompactTags = 775
+const (
+ undIndex compactID = 0
+ afIndex compactID = 1
+ afNAIndex compactID = 2
+ afZAIndex compactID = 3
+ agqIndex compactID = 4
+ agqCMIndex compactID = 5
+ akIndex compactID = 6
+ akGHIndex compactID = 7
+ amIndex compactID = 8
+ amETIndex compactID = 9
+ arIndex compactID = 10
+ ar001Index compactID = 11
+ arAEIndex compactID = 12
+ arBHIndex compactID = 13
+ arDJIndex compactID = 14
+ arDZIndex compactID = 15
+ arEGIndex compactID = 16
+ arEHIndex compactID = 17
+ arERIndex compactID = 18
+ arILIndex compactID = 19
+ arIQIndex compactID = 20
+ arJOIndex compactID = 21
+ arKMIndex compactID = 22
+ arKWIndex compactID = 23
+ arLBIndex compactID = 24
+ arLYIndex compactID = 25
+ arMAIndex compactID = 26
+ arMRIndex compactID = 27
+ arOMIndex compactID = 28
+ arPSIndex compactID = 29
+ arQAIndex compactID = 30
+ arSAIndex compactID = 31
+ arSDIndex compactID = 32
+ arSOIndex compactID = 33
+ arSSIndex compactID = 34
+ arSYIndex compactID = 35
+ arTDIndex compactID = 36
+ arTNIndex compactID = 37
+ arYEIndex compactID = 38
+ arsIndex compactID = 39
+ asIndex compactID = 40
+ asINIndex compactID = 41
+ asaIndex compactID = 42
+ asaTZIndex compactID = 43
+ astIndex compactID = 44
+ astESIndex compactID = 45
+ azIndex compactID = 46
+ azCyrlIndex compactID = 47
+ azCyrlAZIndex compactID = 48
+ azLatnIndex compactID = 49
+ azLatnAZIndex compactID = 50
+ basIndex compactID = 51
+ basCMIndex compactID = 52
+ beIndex compactID = 53
+ beBYIndex compactID = 54
+ bemIndex compactID = 55
+ bemZMIndex compactID = 56
+ bezIndex compactID = 57
+ bezTZIndex compactID = 58
+ bgIndex compactID = 59
+ bgBGIndex compactID = 60
+ bhIndex compactID = 61
+ bmIndex compactID = 62
+ bmMLIndex compactID = 63
+ bnIndex compactID = 64
+ bnBDIndex compactID = 65
+ bnINIndex compactID = 66
+ boIndex compactID = 67
+ boCNIndex compactID = 68
+ boINIndex compactID = 69
+ brIndex compactID = 70
+ brFRIndex compactID = 71
+ brxIndex compactID = 72
+ brxINIndex compactID = 73
+ bsIndex compactID = 74
+ bsCyrlIndex compactID = 75
+ bsCyrlBAIndex compactID = 76
+ bsLatnIndex compactID = 77
+ bsLatnBAIndex compactID = 78
+ caIndex compactID = 79
+ caADIndex compactID = 80
+ caESIndex compactID = 81
+ caFRIndex compactID = 82
+ caITIndex compactID = 83
+ ccpIndex compactID = 84
+ ccpBDIndex compactID = 85
+ ccpINIndex compactID = 86
+ ceIndex compactID = 87
+ ceRUIndex compactID = 88
+ cggIndex compactID = 89
+ cggUGIndex compactID = 90
+ chrIndex compactID = 91
+ chrUSIndex compactID = 92
+ ckbIndex compactID = 93
+ ckbIQIndex compactID = 94
+ ckbIRIndex compactID = 95
+ csIndex compactID = 96
+ csCZIndex compactID = 97
+ cuIndex compactID = 98
+ cuRUIndex compactID = 99
+ cyIndex compactID = 100
+ cyGBIndex compactID = 101
+ daIndex compactID = 102
+ daDKIndex compactID = 103
+ daGLIndex compactID = 104
+ davIndex compactID = 105
+ davKEIndex compactID = 106
+ deIndex compactID = 107
+ deATIndex compactID = 108
+ deBEIndex compactID = 109
+ deCHIndex compactID = 110
+ deDEIndex compactID = 111
+ deITIndex compactID = 112
+ deLIIndex compactID = 113
+ deLUIndex compactID = 114
+ djeIndex compactID = 115
+ djeNEIndex compactID = 116
+ dsbIndex compactID = 117
+ dsbDEIndex compactID = 118
+ duaIndex compactID = 119
+ duaCMIndex compactID = 120
+ dvIndex compactID = 121
+ dyoIndex compactID = 122
+ dyoSNIndex compactID = 123
+ dzIndex compactID = 124
+ dzBTIndex compactID = 125
+ ebuIndex compactID = 126
+ ebuKEIndex compactID = 127
+ eeIndex compactID = 128
+ eeGHIndex compactID = 129
+ eeTGIndex compactID = 130
+ elIndex compactID = 131
+ elCYIndex compactID = 132
+ elGRIndex compactID = 133
+ enIndex compactID = 134
+ en001Index compactID = 135
+ en150Index compactID = 136
+ enAGIndex compactID = 137
+ enAIIndex compactID = 138
+ enASIndex compactID = 139
+ enATIndex compactID = 140
+ enAUIndex compactID = 141
+ enBBIndex compactID = 142
+ enBEIndex compactID = 143
+ enBIIndex compactID = 144
+ enBMIndex compactID = 145
+ enBSIndex compactID = 146
+ enBWIndex compactID = 147
+ enBZIndex compactID = 148
+ enCAIndex compactID = 149
+ enCCIndex compactID = 150
+ enCHIndex compactID = 151
+ enCKIndex compactID = 152
+ enCMIndex compactID = 153
+ enCXIndex compactID = 154
+ enCYIndex compactID = 155
+ enDEIndex compactID = 156
+ enDGIndex compactID = 157
+ enDKIndex compactID = 158
+ enDMIndex compactID = 159
+ enERIndex compactID = 160
+ enFIIndex compactID = 161
+ enFJIndex compactID = 162
+ enFKIndex compactID = 163
+ enFMIndex compactID = 164
+ enGBIndex compactID = 165
+ enGDIndex compactID = 166
+ enGGIndex compactID = 167
+ enGHIndex compactID = 168
+ enGIIndex compactID = 169
+ enGMIndex compactID = 170
+ enGUIndex compactID = 171
+ enGYIndex compactID = 172
+ enHKIndex compactID = 173
+ enIEIndex compactID = 174
+ enILIndex compactID = 175
+ enIMIndex compactID = 176
+ enINIndex compactID = 177
+ enIOIndex compactID = 178
+ enJEIndex compactID = 179
+ enJMIndex compactID = 180
+ enKEIndex compactID = 181
+ enKIIndex compactID = 182
+ enKNIndex compactID = 183
+ enKYIndex compactID = 184
+ enLCIndex compactID = 185
+ enLRIndex compactID = 186
+ enLSIndex compactID = 187
+ enMGIndex compactID = 188
+ enMHIndex compactID = 189
+ enMOIndex compactID = 190
+ enMPIndex compactID = 191
+ enMSIndex compactID = 192
+ enMTIndex compactID = 193
+ enMUIndex compactID = 194
+ enMWIndex compactID = 195
+ enMYIndex compactID = 196
+ enNAIndex compactID = 197
+ enNFIndex compactID = 198
+ enNGIndex compactID = 199
+ enNLIndex compactID = 200
+ enNRIndex compactID = 201
+ enNUIndex compactID = 202
+ enNZIndex compactID = 203
+ enPGIndex compactID = 204
+ enPHIndex compactID = 205
+ enPKIndex compactID = 206
+ enPNIndex compactID = 207
+ enPRIndex compactID = 208
+ enPWIndex compactID = 209
+ enRWIndex compactID = 210
+ enSBIndex compactID = 211
+ enSCIndex compactID = 212
+ enSDIndex compactID = 213
+ enSEIndex compactID = 214
+ enSGIndex compactID = 215
+ enSHIndex compactID = 216
+ enSIIndex compactID = 217
+ enSLIndex compactID = 218
+ enSSIndex compactID = 219
+ enSXIndex compactID = 220
+ enSZIndex compactID = 221
+ enTCIndex compactID = 222
+ enTKIndex compactID = 223
+ enTOIndex compactID = 224
+ enTTIndex compactID = 225
+ enTVIndex compactID = 226
+ enTZIndex compactID = 227
+ enUGIndex compactID = 228
+ enUMIndex compactID = 229
+ enUSIndex compactID = 230
+ enVCIndex compactID = 231
+ enVGIndex compactID = 232
+ enVIIndex compactID = 233
+ enVUIndex compactID = 234
+ enWSIndex compactID = 235
+ enZAIndex compactID = 236
+ enZMIndex compactID = 237
+ enZWIndex compactID = 238
+ eoIndex compactID = 239
+ eo001Index compactID = 240
+ esIndex compactID = 241
+ es419Index compactID = 242
+ esARIndex compactID = 243
+ esBOIndex compactID = 244
+ esBRIndex compactID = 245
+ esBZIndex compactID = 246
+ esCLIndex compactID = 247
+ esCOIndex compactID = 248
+ esCRIndex compactID = 249
+ esCUIndex compactID = 250
+ esDOIndex compactID = 251
+ esEAIndex compactID = 252
+ esECIndex compactID = 253
+ esESIndex compactID = 254
+ esGQIndex compactID = 255
+ esGTIndex compactID = 256
+ esHNIndex compactID = 257
+ esICIndex compactID = 258
+ esMXIndex compactID = 259
+ esNIIndex compactID = 260
+ esPAIndex compactID = 261
+ esPEIndex compactID = 262
+ esPHIndex compactID = 263
+ esPRIndex compactID = 264
+ esPYIndex compactID = 265
+ esSVIndex compactID = 266
+ esUSIndex compactID = 267
+ esUYIndex compactID = 268
+ esVEIndex compactID = 269
+ etIndex compactID = 270
+ etEEIndex compactID = 271
+ euIndex compactID = 272
+ euESIndex compactID = 273
+ ewoIndex compactID = 274
+ ewoCMIndex compactID = 275
+ faIndex compactID = 276
+ faAFIndex compactID = 277
+ faIRIndex compactID = 278
+ ffIndex compactID = 279
+ ffCMIndex compactID = 280
+ ffGNIndex compactID = 281
+ ffMRIndex compactID = 282
+ ffSNIndex compactID = 283
+ fiIndex compactID = 284
+ fiFIIndex compactID = 285
+ filIndex compactID = 286
+ filPHIndex compactID = 287
+ foIndex compactID = 288
+ foDKIndex compactID = 289
+ foFOIndex compactID = 290
+ frIndex compactID = 291
+ frBEIndex compactID = 292
+ frBFIndex compactID = 293
+ frBIIndex compactID = 294
+ frBJIndex compactID = 295
+ frBLIndex compactID = 296
+ frCAIndex compactID = 297
+ frCDIndex compactID = 298
+ frCFIndex compactID = 299
+ frCGIndex compactID = 300
+ frCHIndex compactID = 301
+ frCIIndex compactID = 302
+ frCMIndex compactID = 303
+ frDJIndex compactID = 304
+ frDZIndex compactID = 305
+ frFRIndex compactID = 306
+ frGAIndex compactID = 307
+ frGFIndex compactID = 308
+ frGNIndex compactID = 309
+ frGPIndex compactID = 310
+ frGQIndex compactID = 311
+ frHTIndex compactID = 312
+ frKMIndex compactID = 313
+ frLUIndex compactID = 314
+ frMAIndex compactID = 315
+ frMCIndex compactID = 316
+ frMFIndex compactID = 317
+ frMGIndex compactID = 318
+ frMLIndex compactID = 319
+ frMQIndex compactID = 320
+ frMRIndex compactID = 321
+ frMUIndex compactID = 322
+ frNCIndex compactID = 323
+ frNEIndex compactID = 324
+ frPFIndex compactID = 325
+ frPMIndex compactID = 326
+ frREIndex compactID = 327
+ frRWIndex compactID = 328
+ frSCIndex compactID = 329
+ frSNIndex compactID = 330
+ frSYIndex compactID = 331
+ frTDIndex compactID = 332
+ frTGIndex compactID = 333
+ frTNIndex compactID = 334
+ frVUIndex compactID = 335
+ frWFIndex compactID = 336
+ frYTIndex compactID = 337
+ furIndex compactID = 338
+ furITIndex compactID = 339
+ fyIndex compactID = 340
+ fyNLIndex compactID = 341
+ gaIndex compactID = 342
+ gaIEIndex compactID = 343
+ gdIndex compactID = 344
+ gdGBIndex compactID = 345
+ glIndex compactID = 346
+ glESIndex compactID = 347
+ gswIndex compactID = 348
+ gswCHIndex compactID = 349
+ gswFRIndex compactID = 350
+ gswLIIndex compactID = 351
+ guIndex compactID = 352
+ guINIndex compactID = 353
+ guwIndex compactID = 354
+ guzIndex compactID = 355
+ guzKEIndex compactID = 356
+ gvIndex compactID = 357
+ gvIMIndex compactID = 358
+ haIndex compactID = 359
+ haGHIndex compactID = 360
+ haNEIndex compactID = 361
+ haNGIndex compactID = 362
+ hawIndex compactID = 363
+ hawUSIndex compactID = 364
+ heIndex compactID = 365
+ heILIndex compactID = 366
+ hiIndex compactID = 367
+ hiINIndex compactID = 368
+ hrIndex compactID = 369
+ hrBAIndex compactID = 370
+ hrHRIndex compactID = 371
+ hsbIndex compactID = 372
+ hsbDEIndex compactID = 373
+ huIndex compactID = 374
+ huHUIndex compactID = 375
+ hyIndex compactID = 376
+ hyAMIndex compactID = 377
+ idIndex compactID = 378
+ idIDIndex compactID = 379
+ igIndex compactID = 380
+ igNGIndex compactID = 381
+ iiIndex compactID = 382
+ iiCNIndex compactID = 383
+ inIndex compactID = 384
+ ioIndex compactID = 385
+ isIndex compactID = 386
+ isISIndex compactID = 387
+ itIndex compactID = 388
+ itCHIndex compactID = 389
+ itITIndex compactID = 390
+ itSMIndex compactID = 391
+ itVAIndex compactID = 392
+ iuIndex compactID = 393
+ iwIndex compactID = 394
+ jaIndex compactID = 395
+ jaJPIndex compactID = 396
+ jboIndex compactID = 397
+ jgoIndex compactID = 398
+ jgoCMIndex compactID = 399
+ jiIndex compactID = 400
+ jmcIndex compactID = 401
+ jmcTZIndex compactID = 402
+ jvIndex compactID = 403
+ jwIndex compactID = 404
+ kaIndex compactID = 405
+ kaGEIndex compactID = 406
+ kabIndex compactID = 407
+ kabDZIndex compactID = 408
+ kajIndex compactID = 409
+ kamIndex compactID = 410
+ kamKEIndex compactID = 411
+ kcgIndex compactID = 412
+ kdeIndex compactID = 413
+ kdeTZIndex compactID = 414
+ keaIndex compactID = 415
+ keaCVIndex compactID = 416
+ khqIndex compactID = 417
+ khqMLIndex compactID = 418
+ kiIndex compactID = 419
+ kiKEIndex compactID = 420
+ kkIndex compactID = 421
+ kkKZIndex compactID = 422
+ kkjIndex compactID = 423
+ kkjCMIndex compactID = 424
+ klIndex compactID = 425
+ klGLIndex compactID = 426
+ klnIndex compactID = 427
+ klnKEIndex compactID = 428
+ kmIndex compactID = 429
+ kmKHIndex compactID = 430
+ knIndex compactID = 431
+ knINIndex compactID = 432
+ koIndex compactID = 433
+ koKPIndex compactID = 434
+ koKRIndex compactID = 435
+ kokIndex compactID = 436
+ kokINIndex compactID = 437
+ ksIndex compactID = 438
+ ksINIndex compactID = 439
+ ksbIndex compactID = 440
+ ksbTZIndex compactID = 441
+ ksfIndex compactID = 442
+ ksfCMIndex compactID = 443
+ kshIndex compactID = 444
+ kshDEIndex compactID = 445
+ kuIndex compactID = 446
+ kwIndex compactID = 447
+ kwGBIndex compactID = 448
+ kyIndex compactID = 449
+ kyKGIndex compactID = 450
+ lagIndex compactID = 451
+ lagTZIndex compactID = 452
+ lbIndex compactID = 453
+ lbLUIndex compactID = 454
+ lgIndex compactID = 455
+ lgUGIndex compactID = 456
+ lktIndex compactID = 457
+ lktUSIndex compactID = 458
+ lnIndex compactID = 459
+ lnAOIndex compactID = 460
+ lnCDIndex compactID = 461
+ lnCFIndex compactID = 462
+ lnCGIndex compactID = 463
+ loIndex compactID = 464
+ loLAIndex compactID = 465
+ lrcIndex compactID = 466
+ lrcIQIndex compactID = 467
+ lrcIRIndex compactID = 468
+ ltIndex compactID = 469
+ ltLTIndex compactID = 470
+ luIndex compactID = 471
+ luCDIndex compactID = 472
+ luoIndex compactID = 473
+ luoKEIndex compactID = 474
+ luyIndex compactID = 475
+ luyKEIndex compactID = 476
+ lvIndex compactID = 477
+ lvLVIndex compactID = 478
+ masIndex compactID = 479
+ masKEIndex compactID = 480
+ masTZIndex compactID = 481
+ merIndex compactID = 482
+ merKEIndex compactID = 483
+ mfeIndex compactID = 484
+ mfeMUIndex compactID = 485
+ mgIndex compactID = 486
+ mgMGIndex compactID = 487
+ mghIndex compactID = 488
+ mghMZIndex compactID = 489
+ mgoIndex compactID = 490
+ mgoCMIndex compactID = 491
+ mkIndex compactID = 492
+ mkMKIndex compactID = 493
+ mlIndex compactID = 494
+ mlINIndex compactID = 495
+ mnIndex compactID = 496
+ mnMNIndex compactID = 497
+ moIndex compactID = 498
+ mrIndex compactID = 499
+ mrINIndex compactID = 500
+ msIndex compactID = 501
+ msBNIndex compactID = 502
+ msMYIndex compactID = 503
+ msSGIndex compactID = 504
+ mtIndex compactID = 505
+ mtMTIndex compactID = 506
+ muaIndex compactID = 507
+ muaCMIndex compactID = 508
+ myIndex compactID = 509
+ myMMIndex compactID = 510
+ mznIndex compactID = 511
+ mznIRIndex compactID = 512
+ nahIndex compactID = 513
+ naqIndex compactID = 514
+ naqNAIndex compactID = 515
+ nbIndex compactID = 516
+ nbNOIndex compactID = 517
+ nbSJIndex compactID = 518
+ ndIndex compactID = 519
+ ndZWIndex compactID = 520
+ ndsIndex compactID = 521
+ ndsDEIndex compactID = 522
+ ndsNLIndex compactID = 523
+ neIndex compactID = 524
+ neINIndex compactID = 525
+ neNPIndex compactID = 526
+ nlIndex compactID = 527
+ nlAWIndex compactID = 528
+ nlBEIndex compactID = 529
+ nlBQIndex compactID = 530
+ nlCWIndex compactID = 531
+ nlNLIndex compactID = 532
+ nlSRIndex compactID = 533
+ nlSXIndex compactID = 534
+ nmgIndex compactID = 535
+ nmgCMIndex compactID = 536
+ nnIndex compactID = 537
+ nnNOIndex compactID = 538
+ nnhIndex compactID = 539
+ nnhCMIndex compactID = 540
+ noIndex compactID = 541
+ nqoIndex compactID = 542
+ nrIndex compactID = 543
+ nsoIndex compactID = 544
+ nusIndex compactID = 545
+ nusSSIndex compactID = 546
+ nyIndex compactID = 547
+ nynIndex compactID = 548
+ nynUGIndex compactID = 549
+ omIndex compactID = 550
+ omETIndex compactID = 551
+ omKEIndex compactID = 552
+ orIndex compactID = 553
+ orINIndex compactID = 554
+ osIndex compactID = 555
+ osGEIndex compactID = 556
+ osRUIndex compactID = 557
+ paIndex compactID = 558
+ paArabIndex compactID = 559
+ paArabPKIndex compactID = 560
+ paGuruIndex compactID = 561
+ paGuruINIndex compactID = 562
+ papIndex compactID = 563
+ plIndex compactID = 564
+ plPLIndex compactID = 565
+ prgIndex compactID = 566
+ prg001Index compactID = 567
+ psIndex compactID = 568
+ psAFIndex compactID = 569
+ ptIndex compactID = 570
+ ptAOIndex compactID = 571
+ ptBRIndex compactID = 572
+ ptCHIndex compactID = 573
+ ptCVIndex compactID = 574
+ ptGQIndex compactID = 575
+ ptGWIndex compactID = 576
+ ptLUIndex compactID = 577
+ ptMOIndex compactID = 578
+ ptMZIndex compactID = 579
+ ptPTIndex compactID = 580
+ ptSTIndex compactID = 581
+ ptTLIndex compactID = 582
+ quIndex compactID = 583
+ quBOIndex compactID = 584
+ quECIndex compactID = 585
+ quPEIndex compactID = 586
+ rmIndex compactID = 587
+ rmCHIndex compactID = 588
+ rnIndex compactID = 589
+ rnBIIndex compactID = 590
+ roIndex compactID = 591
+ roMDIndex compactID = 592
+ roROIndex compactID = 593
+ rofIndex compactID = 594
+ rofTZIndex compactID = 595
+ ruIndex compactID = 596
+ ruBYIndex compactID = 597
+ ruKGIndex compactID = 598
+ ruKZIndex compactID = 599
+ ruMDIndex compactID = 600
+ ruRUIndex compactID = 601
+ ruUAIndex compactID = 602
+ rwIndex compactID = 603
+ rwRWIndex compactID = 604
+ rwkIndex compactID = 605
+ rwkTZIndex compactID = 606
+ sahIndex compactID = 607
+ sahRUIndex compactID = 608
+ saqIndex compactID = 609
+ saqKEIndex compactID = 610
+ sbpIndex compactID = 611
+ sbpTZIndex compactID = 612
+ sdIndex compactID = 613
+ sdPKIndex compactID = 614
+ sdhIndex compactID = 615
+ seIndex compactID = 616
+ seFIIndex compactID = 617
+ seNOIndex compactID = 618
+ seSEIndex compactID = 619
+ sehIndex compactID = 620
+ sehMZIndex compactID = 621
+ sesIndex compactID = 622
+ sesMLIndex compactID = 623
+ sgIndex compactID = 624
+ sgCFIndex compactID = 625
+ shIndex compactID = 626
+ shiIndex compactID = 627
+ shiLatnIndex compactID = 628
+ shiLatnMAIndex compactID = 629
+ shiTfngIndex compactID = 630
+ shiTfngMAIndex compactID = 631
+ siIndex compactID = 632
+ siLKIndex compactID = 633
+ skIndex compactID = 634
+ skSKIndex compactID = 635
+ slIndex compactID = 636
+ slSIIndex compactID = 637
+ smaIndex compactID = 638
+ smiIndex compactID = 639
+ smjIndex compactID = 640
+ smnIndex compactID = 641
+ smnFIIndex compactID = 642
+ smsIndex compactID = 643
+ snIndex compactID = 644
+ snZWIndex compactID = 645
+ soIndex compactID = 646
+ soDJIndex compactID = 647
+ soETIndex compactID = 648
+ soKEIndex compactID = 649
+ soSOIndex compactID = 650
+ sqIndex compactID = 651
+ sqALIndex compactID = 652
+ sqMKIndex compactID = 653
+ sqXKIndex compactID = 654
+ srIndex compactID = 655
+ srCyrlIndex compactID = 656
+ srCyrlBAIndex compactID = 657
+ srCyrlMEIndex compactID = 658
+ srCyrlRSIndex compactID = 659
+ srCyrlXKIndex compactID = 660
+ srLatnIndex compactID = 661
+ srLatnBAIndex compactID = 662
+ srLatnMEIndex compactID = 663
+ srLatnRSIndex compactID = 664
+ srLatnXKIndex compactID = 665
+ ssIndex compactID = 666
+ ssyIndex compactID = 667
+ stIndex compactID = 668
+ svIndex compactID = 669
+ svAXIndex compactID = 670
+ svFIIndex compactID = 671
+ svSEIndex compactID = 672
+ swIndex compactID = 673
+ swCDIndex compactID = 674
+ swKEIndex compactID = 675
+ swTZIndex compactID = 676
+ swUGIndex compactID = 677
+ syrIndex compactID = 678
+ taIndex compactID = 679
+ taINIndex compactID = 680
+ taLKIndex compactID = 681
+ taMYIndex compactID = 682
+ taSGIndex compactID = 683
+ teIndex compactID = 684
+ teINIndex compactID = 685
+ teoIndex compactID = 686
+ teoKEIndex compactID = 687
+ teoUGIndex compactID = 688
+ tgIndex compactID = 689
+ tgTJIndex compactID = 690
+ thIndex compactID = 691
+ thTHIndex compactID = 692
+ tiIndex compactID = 693
+ tiERIndex compactID = 694
+ tiETIndex compactID = 695
+ tigIndex compactID = 696
+ tkIndex compactID = 697
+ tkTMIndex compactID = 698
+ tlIndex compactID = 699
+ tnIndex compactID = 700
+ toIndex compactID = 701
+ toTOIndex compactID = 702
+ trIndex compactID = 703
+ trCYIndex compactID = 704
+ trTRIndex compactID = 705
+ tsIndex compactID = 706
+ ttIndex compactID = 707
+ ttRUIndex compactID = 708
+ twqIndex compactID = 709
+ twqNEIndex compactID = 710
+ tzmIndex compactID = 711
+ tzmMAIndex compactID = 712
+ ugIndex compactID = 713
+ ugCNIndex compactID = 714
+ ukIndex compactID = 715
+ ukUAIndex compactID = 716
+ urIndex compactID = 717
+ urINIndex compactID = 718
+ urPKIndex compactID = 719
+ uzIndex compactID = 720
+ uzArabIndex compactID = 721
+ uzArabAFIndex compactID = 722
+ uzCyrlIndex compactID = 723
+ uzCyrlUZIndex compactID = 724
+ uzLatnIndex compactID = 725
+ uzLatnUZIndex compactID = 726
+ vaiIndex compactID = 727
+ vaiLatnIndex compactID = 728
+ vaiLatnLRIndex compactID = 729
+ vaiVaiiIndex compactID = 730
+ vaiVaiiLRIndex compactID = 731
+ veIndex compactID = 732
+ viIndex compactID = 733
+ viVNIndex compactID = 734
+ voIndex compactID = 735
+ vo001Index compactID = 736
+ vunIndex compactID = 737
+ vunTZIndex compactID = 738
+ waIndex compactID = 739
+ waeIndex compactID = 740
+ waeCHIndex compactID = 741
+ woIndex compactID = 742
+ woSNIndex compactID = 743
+ xhIndex compactID = 744
+ xogIndex compactID = 745
+ xogUGIndex compactID = 746
+ yavIndex compactID = 747
+ yavCMIndex compactID = 748
+ yiIndex compactID = 749
+ yi001Index compactID = 750
+ yoIndex compactID = 751
+ yoBJIndex compactID = 752
+ yoNGIndex compactID = 753
+ yueIndex compactID = 754
+ yueHansIndex compactID = 755
+ yueHansCNIndex compactID = 756
+ yueHantIndex compactID = 757
+ yueHantHKIndex compactID = 758
+ zghIndex compactID = 759
+ zghMAIndex compactID = 760
+ zhIndex compactID = 761
+ zhHansIndex compactID = 762
+ zhHansCNIndex compactID = 763
+ zhHansHKIndex compactID = 764
+ zhHansMOIndex compactID = 765
+ zhHansSGIndex compactID = 766
+ zhHantIndex compactID = 767
+ zhHantHKIndex compactID = 768
+ zhHantMOIndex compactID = 769
+ zhHantTWIndex compactID = 770
+ zuIndex compactID = 771
+ zuZAIndex compactID = 772
+ caESvalenciaIndex compactID = 773
+ enUSuvaposixIndex compactID = 774
+)
+
+var coreTags = []language.CompactCoreInfo{ // 773 elements
+ // Entry 0 - 1F
+ 0x00000000, 0x01600000, 0x016000d2, 0x01600161,
+ 0x01c00000, 0x01c00052, 0x02100000, 0x02100080,
+ 0x02700000, 0x0270006f, 0x03a00000, 0x03a00001,
+ 0x03a00023, 0x03a00039, 0x03a00062, 0x03a00067,
+ 0x03a0006b, 0x03a0006c, 0x03a0006d, 0x03a00097,
+ 0x03a0009b, 0x03a000a1, 0x03a000a8, 0x03a000ac,
+ 0x03a000b0, 0x03a000b9, 0x03a000ba, 0x03a000c9,
+ 0x03a000e1, 0x03a000ed, 0x03a000f3, 0x03a00108,
+ // Entry 20 - 3F
+ 0x03a0010b, 0x03a00115, 0x03a00117, 0x03a0011c,
+ 0x03a00120, 0x03a00128, 0x03a0015e, 0x04000000,
+ 0x04300000, 0x04300099, 0x04400000, 0x0440012f,
+ 0x04800000, 0x0480006e, 0x05800000, 0x0581f000,
+ 0x0581f032, 0x05857000, 0x05857032, 0x05e00000,
+ 0x05e00052, 0x07100000, 0x07100047, 0x07500000,
+ 0x07500162, 0x07900000, 0x0790012f, 0x07e00000,
+ 0x07e00038, 0x08200000, 0x0a000000, 0x0a0000c3,
+ // Entry 40 - 5F
+ 0x0a500000, 0x0a500035, 0x0a500099, 0x0a900000,
+ 0x0a900053, 0x0a900099, 0x0b200000, 0x0b200078,
+ 0x0b500000, 0x0b500099, 0x0b700000, 0x0b71f000,
+ 0x0b71f033, 0x0b757000, 0x0b757033, 0x0d700000,
+ 0x0d700022, 0x0d70006e, 0x0d700078, 0x0d70009e,
+ 0x0db00000, 0x0db00035, 0x0db00099, 0x0dc00000,
+ 0x0dc00106, 0x0df00000, 0x0df00131, 0x0e500000,
+ 0x0e500135, 0x0e900000, 0x0e90009b, 0x0e90009c,
+ // Entry 60 - 7F
+ 0x0fa00000, 0x0fa0005e, 0x0fe00000, 0x0fe00106,
+ 0x10000000, 0x1000007b, 0x10100000, 0x10100063,
+ 0x10100082, 0x10800000, 0x108000a4, 0x10d00000,
+ 0x10d0002e, 0x10d00036, 0x10d0004e, 0x10d00060,
+ 0x10d0009e, 0x10d000b2, 0x10d000b7, 0x11700000,
+ 0x117000d4, 0x11f00000, 0x11f00060, 0x12400000,
+ 0x12400052, 0x12800000, 0x12b00000, 0x12b00114,
+ 0x12d00000, 0x12d00043, 0x12f00000, 0x12f000a4,
+ // Entry 80 - 9F
+ 0x13000000, 0x13000080, 0x13000122, 0x13600000,
+ 0x1360005d, 0x13600087, 0x13900000, 0x13900001,
+ 0x1390001a, 0x13900025, 0x13900026, 0x1390002d,
+ 0x1390002e, 0x1390002f, 0x13900034, 0x13900036,
+ 0x1390003a, 0x1390003d, 0x13900042, 0x13900046,
+ 0x13900048, 0x13900049, 0x1390004a, 0x1390004e,
+ 0x13900050, 0x13900052, 0x1390005c, 0x1390005d,
+ 0x13900060, 0x13900061, 0x13900063, 0x13900064,
+ // Entry A0 - BF
+ 0x1390006d, 0x13900072, 0x13900073, 0x13900074,
+ 0x13900075, 0x1390007b, 0x1390007c, 0x1390007f,
+ 0x13900080, 0x13900081, 0x13900083, 0x1390008a,
+ 0x1390008c, 0x1390008d, 0x13900096, 0x13900097,
+ 0x13900098, 0x13900099, 0x1390009a, 0x1390009f,
+ 0x139000a0, 0x139000a4, 0x139000a7, 0x139000a9,
+ 0x139000ad, 0x139000b1, 0x139000b4, 0x139000b5,
+ 0x139000bf, 0x139000c0, 0x139000c6, 0x139000c7,
+ // Entry C0 - DF
+ 0x139000ca, 0x139000cb, 0x139000cc, 0x139000ce,
+ 0x139000d0, 0x139000d2, 0x139000d5, 0x139000d6,
+ 0x139000d9, 0x139000dd, 0x139000df, 0x139000e0,
+ 0x139000e6, 0x139000e7, 0x139000e8, 0x139000eb,
+ 0x139000ec, 0x139000f0, 0x13900107, 0x13900109,
+ 0x1390010a, 0x1390010b, 0x1390010c, 0x1390010d,
+ 0x1390010e, 0x1390010f, 0x13900112, 0x13900117,
+ 0x1390011b, 0x1390011d, 0x1390011f, 0x13900125,
+ // Entry E0 - FF
+ 0x13900129, 0x1390012c, 0x1390012d, 0x1390012f,
+ 0x13900131, 0x13900133, 0x13900135, 0x13900139,
+ 0x1390013c, 0x1390013d, 0x1390013f, 0x13900142,
+ 0x13900161, 0x13900162, 0x13900164, 0x13c00000,
+ 0x13c00001, 0x13e00000, 0x13e0001f, 0x13e0002c,
+ 0x13e0003f, 0x13e00041, 0x13e00048, 0x13e00051,
+ 0x13e00054, 0x13e00056, 0x13e00059, 0x13e00065,
+ 0x13e00068, 0x13e00069, 0x13e0006e, 0x13e00086,
+ // Entry 100 - 11F
+ 0x13e00089, 0x13e0008f, 0x13e00094, 0x13e000cf,
+ 0x13e000d8, 0x13e000e2, 0x13e000e4, 0x13e000e7,
+ 0x13e000ec, 0x13e000f1, 0x13e0011a, 0x13e00135,
+ 0x13e00136, 0x13e0013b, 0x14000000, 0x1400006a,
+ 0x14500000, 0x1450006e, 0x14600000, 0x14600052,
+ 0x14800000, 0x14800024, 0x1480009c, 0x14e00000,
+ 0x14e00052, 0x14e00084, 0x14e000c9, 0x14e00114,
+ 0x15100000, 0x15100072, 0x15300000, 0x153000e7,
+ // Entry 120 - 13F
+ 0x15800000, 0x15800063, 0x15800076, 0x15e00000,
+ 0x15e00036, 0x15e00037, 0x15e0003a, 0x15e0003b,
+ 0x15e0003c, 0x15e00049, 0x15e0004b, 0x15e0004c,
+ 0x15e0004d, 0x15e0004e, 0x15e0004f, 0x15e00052,
+ 0x15e00062, 0x15e00067, 0x15e00078, 0x15e0007a,
+ 0x15e0007e, 0x15e00084, 0x15e00085, 0x15e00086,
+ 0x15e00091, 0x15e000a8, 0x15e000b7, 0x15e000ba,
+ 0x15e000bb, 0x15e000be, 0x15e000bf, 0x15e000c3,
+ // Entry 140 - 15F
+ 0x15e000c8, 0x15e000c9, 0x15e000cc, 0x15e000d3,
+ 0x15e000d4, 0x15e000e5, 0x15e000ea, 0x15e00102,
+ 0x15e00107, 0x15e0010a, 0x15e00114, 0x15e0011c,
+ 0x15e00120, 0x15e00122, 0x15e00128, 0x15e0013f,
+ 0x15e00140, 0x15e0015f, 0x16900000, 0x1690009e,
+ 0x16d00000, 0x16d000d9, 0x16e00000, 0x16e00096,
+ 0x17e00000, 0x17e0007b, 0x19000000, 0x1900006e,
+ 0x1a300000, 0x1a30004e, 0x1a300078, 0x1a3000b2,
+ // Entry 160 - 17F
+ 0x1a400000, 0x1a400099, 0x1a900000, 0x1ab00000,
+ 0x1ab000a4, 0x1ac00000, 0x1ac00098, 0x1b400000,
+ 0x1b400080, 0x1b4000d4, 0x1b4000d6, 0x1b800000,
+ 0x1b800135, 0x1bc00000, 0x1bc00097, 0x1be00000,
+ 0x1be00099, 0x1d100000, 0x1d100033, 0x1d100090,
+ 0x1d200000, 0x1d200060, 0x1d500000, 0x1d500092,
+ 0x1d700000, 0x1d700028, 0x1e100000, 0x1e100095,
+ 0x1e700000, 0x1e7000d6, 0x1ea00000, 0x1ea00053,
+ // Entry 180 - 19F
+ 0x1f300000, 0x1f500000, 0x1f800000, 0x1f80009d,
+ 0x1f900000, 0x1f90004e, 0x1f90009e, 0x1f900113,
+ 0x1f900138, 0x1fa00000, 0x1fb00000, 0x20000000,
+ 0x200000a2, 0x20300000, 0x20700000, 0x20700052,
+ 0x20800000, 0x20a00000, 0x20a0012f, 0x20e00000,
+ 0x20f00000, 0x21000000, 0x2100007d, 0x21200000,
+ 0x21200067, 0x21600000, 0x21700000, 0x217000a4,
+ 0x21f00000, 0x22300000, 0x2230012f, 0x22700000,
+ // Entry 1A0 - 1BF
+ 0x2270005a, 0x23400000, 0x234000c3, 0x23900000,
+ 0x239000a4, 0x24200000, 0x242000ae, 0x24400000,
+ 0x24400052, 0x24500000, 0x24500082, 0x24600000,
+ 0x246000a4, 0x24a00000, 0x24a000a6, 0x25100000,
+ 0x25100099, 0x25400000, 0x254000aa, 0x254000ab,
+ 0x25600000, 0x25600099, 0x26a00000, 0x26a00099,
+ 0x26b00000, 0x26b0012f, 0x26d00000, 0x26d00052,
+ 0x26e00000, 0x26e00060, 0x27400000, 0x28100000,
+ // Entry 1C0 - 1DF
+ 0x2810007b, 0x28a00000, 0x28a000a5, 0x29100000,
+ 0x2910012f, 0x29500000, 0x295000b7, 0x2a300000,
+ 0x2a300131, 0x2af00000, 0x2af00135, 0x2b500000,
+ 0x2b50002a, 0x2b50004b, 0x2b50004c, 0x2b50004d,
+ 0x2b800000, 0x2b8000af, 0x2bf00000, 0x2bf0009b,
+ 0x2bf0009c, 0x2c000000, 0x2c0000b6, 0x2c200000,
+ 0x2c20004b, 0x2c400000, 0x2c4000a4, 0x2c500000,
+ 0x2c5000a4, 0x2c700000, 0x2c7000b8, 0x2d100000,
+ // Entry 1E0 - 1FF
+ 0x2d1000a4, 0x2d10012f, 0x2e900000, 0x2e9000a4,
+ 0x2ed00000, 0x2ed000cc, 0x2f100000, 0x2f1000bf,
+ 0x2f200000, 0x2f2000d1, 0x2f400000, 0x2f400052,
+ 0x2ff00000, 0x2ff000c2, 0x30400000, 0x30400099,
+ 0x30b00000, 0x30b000c5, 0x31000000, 0x31b00000,
+ 0x31b00099, 0x31f00000, 0x31f0003e, 0x31f000d0,
+ 0x31f0010d, 0x32000000, 0x320000cb, 0x32500000,
+ 0x32500052, 0x33100000, 0x331000c4, 0x33a00000,
+ // Entry 200 - 21F
+ 0x33a0009c, 0x34100000, 0x34500000, 0x345000d2,
+ 0x34700000, 0x347000da, 0x34700110, 0x34e00000,
+ 0x34e00164, 0x35000000, 0x35000060, 0x350000d9,
+ 0x35100000, 0x35100099, 0x351000db, 0x36700000,
+ 0x36700030, 0x36700036, 0x36700040, 0x3670005b,
+ 0x367000d9, 0x36700116, 0x3670011b, 0x36800000,
+ 0x36800052, 0x36a00000, 0x36a000da, 0x36c00000,
+ 0x36c00052, 0x36f00000, 0x37500000, 0x37600000,
+ // Entry 220 - 23F
+ 0x37a00000, 0x38000000, 0x38000117, 0x38700000,
+ 0x38900000, 0x38900131, 0x39000000, 0x3900006f,
+ 0x390000a4, 0x39500000, 0x39500099, 0x39800000,
+ 0x3980007d, 0x39800106, 0x39d00000, 0x39d05000,
+ 0x39d050e8, 0x39d33000, 0x39d33099, 0x3a100000,
+ 0x3b300000, 0x3b3000e9, 0x3bd00000, 0x3bd00001,
+ 0x3be00000, 0x3be00024, 0x3c000000, 0x3c00002a,
+ 0x3c000041, 0x3c00004e, 0x3c00005a, 0x3c000086,
+ // Entry 240 - 25F
+ 0x3c00008b, 0x3c0000b7, 0x3c0000c6, 0x3c0000d1,
+ 0x3c0000ee, 0x3c000118, 0x3c000126, 0x3c400000,
+ 0x3c40003f, 0x3c400069, 0x3c4000e4, 0x3d400000,
+ 0x3d40004e, 0x3d900000, 0x3d90003a, 0x3dc00000,
+ 0x3dc000bc, 0x3dc00104, 0x3de00000, 0x3de0012f,
+ 0x3e200000, 0x3e200047, 0x3e2000a5, 0x3e2000ae,
+ 0x3e2000bc, 0x3e200106, 0x3e200130, 0x3e500000,
+ 0x3e500107, 0x3e600000, 0x3e60012f, 0x3eb00000,
+ // Entry 260 - 27F
+ 0x3eb00106, 0x3ec00000, 0x3ec000a4, 0x3f300000,
+ 0x3f30012f, 0x3fa00000, 0x3fa000e8, 0x3fc00000,
+ 0x3fd00000, 0x3fd00072, 0x3fd000da, 0x3fd0010c,
+ 0x3ff00000, 0x3ff000d1, 0x40100000, 0x401000c3,
+ 0x40200000, 0x4020004c, 0x40700000, 0x40800000,
+ 0x40857000, 0x408570ba, 0x408dc000, 0x408dc0ba,
+ 0x40c00000, 0x40c000b3, 0x41200000, 0x41200111,
+ 0x41600000, 0x4160010f, 0x41c00000, 0x41d00000,
+ // Entry 280 - 29F
+ 0x41e00000, 0x41f00000, 0x41f00072, 0x42200000,
+ 0x42300000, 0x42300164, 0x42900000, 0x42900062,
+ 0x4290006f, 0x429000a4, 0x42900115, 0x43100000,
+ 0x43100027, 0x431000c2, 0x4310014d, 0x43200000,
+ 0x4321f000, 0x4321f033, 0x4321f0bd, 0x4321f105,
+ 0x4321f14d, 0x43257000, 0x43257033, 0x432570bd,
+ 0x43257105, 0x4325714d, 0x43700000, 0x43a00000,
+ 0x43b00000, 0x44400000, 0x44400031, 0x44400072,
+ // Entry 2A0 - 2BF
+ 0x4440010c, 0x44500000, 0x4450004b, 0x445000a4,
+ 0x4450012f, 0x44500131, 0x44e00000, 0x45000000,
+ 0x45000099, 0x450000b3, 0x450000d0, 0x4500010d,
+ 0x46100000, 0x46100099, 0x46400000, 0x464000a4,
+ 0x46400131, 0x46700000, 0x46700124, 0x46b00000,
+ 0x46b00123, 0x46f00000, 0x46f0006d, 0x46f0006f,
+ 0x47100000, 0x47600000, 0x47600127, 0x47a00000,
+ 0x48000000, 0x48200000, 0x48200129, 0x48a00000,
+ // Entry 2C0 - 2DF
+ 0x48a0005d, 0x48a0012b, 0x48e00000, 0x49400000,
+ 0x49400106, 0x4a400000, 0x4a4000d4, 0x4a900000,
+ 0x4a9000ba, 0x4ac00000, 0x4ac00053, 0x4ae00000,
+ 0x4ae00130, 0x4b400000, 0x4b400099, 0x4b4000e8,
+ 0x4bc00000, 0x4bc05000, 0x4bc05024, 0x4bc1f000,
+ 0x4bc1f137, 0x4bc57000, 0x4bc57137, 0x4be00000,
+ 0x4be57000, 0x4be570b4, 0x4bee3000, 0x4bee30b4,
+ 0x4c000000, 0x4c300000, 0x4c30013e, 0x4c900000,
+ // Entry 2E0 - 2FF
+ 0x4c900001, 0x4cc00000, 0x4cc0012f, 0x4ce00000,
+ 0x4cf00000, 0x4cf0004e, 0x4e500000, 0x4e500114,
+ 0x4f200000, 0x4fb00000, 0x4fb00131, 0x50900000,
+ 0x50900052, 0x51200000, 0x51200001, 0x51800000,
+ 0x5180003b, 0x518000d6, 0x51f00000, 0x51f38000,
+ 0x51f38053, 0x51f39000, 0x51f3908d, 0x52800000,
+ 0x528000ba, 0x52900000, 0x52938000, 0x52938053,
+ 0x5293808d, 0x529380c6, 0x5293810d, 0x52939000,
+ // Entry 300 - 31F
+ 0x5293908d, 0x529390c6, 0x5293912e, 0x52f00000,
+ 0x52f00161,
+} // Size: 3116 bytes
+
+const specialTagsStr string = "ca-ES-valencia en-US-u-va-posix"
+
+var regionToGroups = []uint8{ // 357 elements
+ // Entry 0 - 3F
+ 0x00, 0x00, 0x00, 0x04, 0x04, 0x00, 0x00, 0x04,
+ 0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x04, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x00,
+ 0x00, 0x04, 0x00, 0x00, 0x04, 0x01, 0x00, 0x00,
+ 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x00, 0x04,
+ // Entry 40 - 7F
+ 0x04, 0x04, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x04, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x04, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00,
+ 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x00, 0x08,
+ 0x00, 0x04, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00,
+ // Entry 80 - BF
+ 0x00, 0x00, 0x04, 0x00, 0x00, 0x04, 0x00, 0x00,
+ 0x00, 0x04, 0x01, 0x00, 0x04, 0x02, 0x00, 0x04,
+ 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00,
+ 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x08, 0x08, 0x00, 0x00, 0x00, 0x04, 0x00,
+ // Entry C0 - FF
+ 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x01,
+ 0x04, 0x08, 0x04, 0x00, 0x00, 0x00, 0x00, 0x04,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x04, 0x00, 0x05, 0x00, 0x00, 0x00,
+ 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ // Entry 100 - 13F
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00,
+ 0x00, 0x00, 0x04, 0x04, 0x00, 0x00, 0x00, 0x04,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x08, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x01, 0x00, 0x05, 0x04, 0x00,
+ 0x00, 0x04, 0x00, 0x04, 0x04, 0x05, 0x00, 0x00,
+ // Entry 140 - 17F
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00,
+} // Size: 381 bytes
+
+var paradigmLocales = [][3]uint16{ // 3 elements
+ 0: [3]uint16{0x139, 0x0, 0x7b},
+ 1: [3]uint16{0x13e, 0x0, 0x1f},
+ 2: [3]uint16{0x3c0, 0x41, 0xee},
+} // Size: 42 bytes
+
+type mutualIntelligibility struct {
+ want uint16
+ have uint16
+ distance uint8
+ oneway bool
+}
+type scriptIntelligibility struct {
+ wantLang uint16
+ haveLang uint16
+ wantScript uint8
+ haveScript uint8
+ distance uint8
+}
+type regionIntelligibility struct {
+ lang uint16
+ script uint8
+ group uint8
+ distance uint8
+}
+
+// matchLang holds pairs of langIDs of base languages that are typically
+// mutually intelligible. Each pair is associated with a confidence and
+// whether the intelligibility goes one or both ways.
+var matchLang = []mutualIntelligibility{ // 113 elements
+ 0: {want: 0x1d1, have: 0xb7, distance: 0x4, oneway: false},
+ 1: {want: 0x407, have: 0xb7, distance: 0x4, oneway: false},
+ 2: {want: 0x407, have: 0x1d1, distance: 0x4, oneway: false},
+ 3: {want: 0x407, have: 0x432, distance: 0x4, oneway: false},
+ 4: {want: 0x43a, have: 0x1, distance: 0x4, oneway: false},
+ 5: {want: 0x1a3, have: 0x10d, distance: 0x4, oneway: true},
+ 6: {want: 0x295, have: 0x10d, distance: 0x4, oneway: true},
+ 7: {want: 0x101, have: 0x36f, distance: 0x8, oneway: false},
+ 8: {want: 0x101, have: 0x347, distance: 0x8, oneway: false},
+ 9: {want: 0x5, have: 0x3e2, distance: 0xa, oneway: true},
+ 10: {want: 0xd, have: 0x139, distance: 0xa, oneway: true},
+ 11: {want: 0x16, have: 0x367, distance: 0xa, oneway: true},
+ 12: {want: 0x21, have: 0x139, distance: 0xa, oneway: true},
+ 13: {want: 0x56, have: 0x13e, distance: 0xa, oneway: true},
+ 14: {want: 0x58, have: 0x3e2, distance: 0xa, oneway: true},
+ 15: {want: 0x71, have: 0x3e2, distance: 0xa, oneway: true},
+ 16: {want: 0x75, have: 0x139, distance: 0xa, oneway: true},
+ 17: {want: 0x82, have: 0x1be, distance: 0xa, oneway: true},
+ 18: {want: 0xa5, have: 0x139, distance: 0xa, oneway: true},
+ 19: {want: 0xb2, have: 0x15e, distance: 0xa, oneway: true},
+ 20: {want: 0xdd, have: 0x153, distance: 0xa, oneway: true},
+ 21: {want: 0xe5, have: 0x139, distance: 0xa, oneway: true},
+ 22: {want: 0xe9, have: 0x3a, distance: 0xa, oneway: true},
+ 23: {want: 0xf0, have: 0x15e, distance: 0xa, oneway: true},
+ 24: {want: 0xf9, have: 0x15e, distance: 0xa, oneway: true},
+ 25: {want: 0x100, have: 0x139, distance: 0xa, oneway: true},
+ 26: {want: 0x130, have: 0x139, distance: 0xa, oneway: true},
+ 27: {want: 0x13c, have: 0x139, distance: 0xa, oneway: true},
+ 28: {want: 0x140, have: 0x151, distance: 0xa, oneway: true},
+ 29: {want: 0x145, have: 0x13e, distance: 0xa, oneway: true},
+ 30: {want: 0x158, have: 0x101, distance: 0xa, oneway: true},
+ 31: {want: 0x16d, have: 0x367, distance: 0xa, oneway: true},
+ 32: {want: 0x16e, have: 0x139, distance: 0xa, oneway: true},
+ 33: {want: 0x16f, have: 0x139, distance: 0xa, oneway: true},
+ 34: {want: 0x17e, have: 0x139, distance: 0xa, oneway: true},
+ 35: {want: 0x190, have: 0x13e, distance: 0xa, oneway: true},
+ 36: {want: 0x194, have: 0x13e, distance: 0xa, oneway: true},
+ 37: {want: 0x1a4, have: 0x1be, distance: 0xa, oneway: true},
+ 38: {want: 0x1b4, have: 0x139, distance: 0xa, oneway: true},
+ 39: {want: 0x1b8, have: 0x139, distance: 0xa, oneway: true},
+ 40: {want: 0x1d4, have: 0x15e, distance: 0xa, oneway: true},
+ 41: {want: 0x1d7, have: 0x3e2, distance: 0xa, oneway: true},
+ 42: {want: 0x1d9, have: 0x139, distance: 0xa, oneway: true},
+ 43: {want: 0x1e7, have: 0x139, distance: 0xa, oneway: true},
+ 44: {want: 0x1f8, have: 0x139, distance: 0xa, oneway: true},
+ 45: {want: 0x20e, have: 0x1e1, distance: 0xa, oneway: true},
+ 46: {want: 0x210, have: 0x139, distance: 0xa, oneway: true},
+ 47: {want: 0x22d, have: 0x15e, distance: 0xa, oneway: true},
+ 48: {want: 0x242, have: 0x3e2, distance: 0xa, oneway: true},
+ 49: {want: 0x24a, have: 0x139, distance: 0xa, oneway: true},
+ 50: {want: 0x251, have: 0x139, distance: 0xa, oneway: true},
+ 51: {want: 0x265, have: 0x139, distance: 0xa, oneway: true},
+ 52: {want: 0x274, have: 0x48a, distance: 0xa, oneway: true},
+ 53: {want: 0x28a, have: 0x3e2, distance: 0xa, oneway: true},
+ 54: {want: 0x28e, have: 0x1f9, distance: 0xa, oneway: true},
+ 55: {want: 0x2a3, have: 0x139, distance: 0xa, oneway: true},
+ 56: {want: 0x2b5, have: 0x15e, distance: 0xa, oneway: true},
+ 57: {want: 0x2b8, have: 0x139, distance: 0xa, oneway: true},
+ 58: {want: 0x2be, have: 0x139, distance: 0xa, oneway: true},
+ 59: {want: 0x2c3, have: 0x15e, distance: 0xa, oneway: true},
+ 60: {want: 0x2ed, have: 0x139, distance: 0xa, oneway: true},
+ 61: {want: 0x2f1, have: 0x15e, distance: 0xa, oneway: true},
+ 62: {want: 0x2fa, have: 0x139, distance: 0xa, oneway: true},
+ 63: {want: 0x2ff, have: 0x7e, distance: 0xa, oneway: true},
+ 64: {want: 0x304, have: 0x139, distance: 0xa, oneway: true},
+ 65: {want: 0x30b, have: 0x3e2, distance: 0xa, oneway: true},
+ 66: {want: 0x31b, have: 0x1be, distance: 0xa, oneway: true},
+ 67: {want: 0x31f, have: 0x1e1, distance: 0xa, oneway: true},
+ 68: {want: 0x320, have: 0x139, distance: 0xa, oneway: true},
+ 69: {want: 0x331, have: 0x139, distance: 0xa, oneway: true},
+ 70: {want: 0x351, have: 0x139, distance: 0xa, oneway: true},
+ 71: {want: 0x36a, have: 0x347, distance: 0xa, oneway: false},
+ 72: {want: 0x36a, have: 0x36f, distance: 0xa, oneway: true},
+ 73: {want: 0x37a, have: 0x139, distance: 0xa, oneway: true},
+ 74: {want: 0x387, have: 0x139, distance: 0xa, oneway: true},
+ 75: {want: 0x389, have: 0x139, distance: 0xa, oneway: true},
+ 76: {want: 0x38b, have: 0x15e, distance: 0xa, oneway: true},
+ 77: {want: 0x390, have: 0x139, distance: 0xa, oneway: true},
+ 78: {want: 0x395, have: 0x139, distance: 0xa, oneway: true},
+ 79: {want: 0x39d, have: 0x139, distance: 0xa, oneway: true},
+ 80: {want: 0x3a5, have: 0x139, distance: 0xa, oneway: true},
+ 81: {want: 0x3be, have: 0x139, distance: 0xa, oneway: true},
+ 82: {want: 0x3c4, have: 0x13e, distance: 0xa, oneway: true},
+ 83: {want: 0x3d4, have: 0x10d, distance: 0xa, oneway: true},
+ 84: {want: 0x3d9, have: 0x139, distance: 0xa, oneway: true},
+ 85: {want: 0x3e5, have: 0x15e, distance: 0xa, oneway: true},
+ 86: {want: 0x3e9, have: 0x1be, distance: 0xa, oneway: true},
+ 87: {want: 0x3fa, have: 0x139, distance: 0xa, oneway: true},
+ 88: {want: 0x40c, have: 0x139, distance: 0xa, oneway: true},
+ 89: {want: 0x423, have: 0x139, distance: 0xa, oneway: true},
+ 90: {want: 0x429, have: 0x139, distance: 0xa, oneway: true},
+ 91: {want: 0x431, have: 0x139, distance: 0xa, oneway: true},
+ 92: {want: 0x43b, have: 0x139, distance: 0xa, oneway: true},
+ 93: {want: 0x43e, have: 0x1e1, distance: 0xa, oneway: true},
+ 94: {want: 0x445, have: 0x139, distance: 0xa, oneway: true},
+ 95: {want: 0x450, have: 0x139, distance: 0xa, oneway: true},
+ 96: {want: 0x461, have: 0x139, distance: 0xa, oneway: true},
+ 97: {want: 0x467, have: 0x3e2, distance: 0xa, oneway: true},
+ 98: {want: 0x46f, have: 0x139, distance: 0xa, oneway: true},
+ 99: {want: 0x476, have: 0x3e2, distance: 0xa, oneway: true},
+ 100: {want: 0x3883, have: 0x139, distance: 0xa, oneway: true},
+ 101: {want: 0x480, have: 0x139, distance: 0xa, oneway: true},
+ 102: {want: 0x482, have: 0x139, distance: 0xa, oneway: true},
+ 103: {want: 0x494, have: 0x3e2, distance: 0xa, oneway: true},
+ 104: {want: 0x49d, have: 0x139, distance: 0xa, oneway: true},
+ 105: {want: 0x4ac, have: 0x529, distance: 0xa, oneway: true},
+ 106: {want: 0x4b4, have: 0x139, distance: 0xa, oneway: true},
+ 107: {want: 0x4bc, have: 0x3e2, distance: 0xa, oneway: true},
+ 108: {want: 0x4e5, have: 0x15e, distance: 0xa, oneway: true},
+ 109: {want: 0x4f2, have: 0x139, distance: 0xa, oneway: true},
+ 110: {want: 0x512, have: 0x139, distance: 0xa, oneway: true},
+ 111: {want: 0x518, have: 0x139, distance: 0xa, oneway: true},
+ 112: {want: 0x52f, have: 0x139, distance: 0xa, oneway: true},
+} // Size: 702 bytes
+
+// matchScript holds pairs of scriptIDs where readers of one script
+// can typically also read the other. Each is associated with a confidence.
+var matchScript = []scriptIntelligibility{ // 26 elements
+ 0: {wantLang: 0x432, haveLang: 0x432, wantScript: 0x57, haveScript: 0x1f, distance: 0x5},
+ 1: {wantLang: 0x432, haveLang: 0x432, wantScript: 0x1f, haveScript: 0x57, distance: 0x5},
+ 2: {wantLang: 0x58, haveLang: 0x3e2, wantScript: 0x57, haveScript: 0x1f, distance: 0xa},
+ 3: {wantLang: 0xa5, haveLang: 0x139, wantScript: 0xe, haveScript: 0x57, distance: 0xa},
+ 4: {wantLang: 0x1d7, haveLang: 0x3e2, wantScript: 0x8, haveScript: 0x1f, distance: 0xa},
+ 5: {wantLang: 0x210, haveLang: 0x139, wantScript: 0x2b, haveScript: 0x57, distance: 0xa},
+ 6: {wantLang: 0x24a, haveLang: 0x139, wantScript: 0x4b, haveScript: 0x57, distance: 0xa},
+ 7: {wantLang: 0x251, haveLang: 0x139, wantScript: 0x4f, haveScript: 0x57, distance: 0xa},
+ 8: {wantLang: 0x2b8, haveLang: 0x139, wantScript: 0x54, haveScript: 0x57, distance: 0xa},
+ 9: {wantLang: 0x304, haveLang: 0x139, wantScript: 0x6b, haveScript: 0x57, distance: 0xa},
+ 10: {wantLang: 0x331, haveLang: 0x139, wantScript: 0x72, haveScript: 0x57, distance: 0xa},
+ 11: {wantLang: 0x351, haveLang: 0x139, wantScript: 0x21, haveScript: 0x57, distance: 0xa},
+ 12: {wantLang: 0x395, haveLang: 0x139, wantScript: 0x7d, haveScript: 0x57, distance: 0xa},
+ 13: {wantLang: 0x39d, haveLang: 0x139, wantScript: 0x33, haveScript: 0x57, distance: 0xa},
+ 14: {wantLang: 0x3be, haveLang: 0x139, wantScript: 0x5, haveScript: 0x57, distance: 0xa},
+ 15: {wantLang: 0x3fa, haveLang: 0x139, wantScript: 0x5, haveScript: 0x57, distance: 0xa},
+ 16: {wantLang: 0x40c, haveLang: 0x139, wantScript: 0xca, haveScript: 0x57, distance: 0xa},
+ 17: {wantLang: 0x450, haveLang: 0x139, wantScript: 0xd7, haveScript: 0x57, distance: 0xa},
+ 18: {wantLang: 0x461, haveLang: 0x139, wantScript: 0xda, haveScript: 0x57, distance: 0xa},
+ 19: {wantLang: 0x46f, haveLang: 0x139, wantScript: 0x29, haveScript: 0x57, distance: 0xa},
+ 20: {wantLang: 0x476, haveLang: 0x3e2, wantScript: 0x57, haveScript: 0x1f, distance: 0xa},
+ 21: {wantLang: 0x4b4, haveLang: 0x139, wantScript: 0x5, haveScript: 0x57, distance: 0xa},
+ 22: {wantLang: 0x4bc, haveLang: 0x3e2, wantScript: 0x57, haveScript: 0x1f, distance: 0xa},
+ 23: {wantLang: 0x512, haveLang: 0x139, wantScript: 0x3b, haveScript: 0x57, distance: 0xa},
+ 24: {wantLang: 0x529, haveLang: 0x529, wantScript: 0x38, haveScript: 0x39, distance: 0xf},
+ 25: {wantLang: 0x529, haveLang: 0x529, wantScript: 0x39, haveScript: 0x38, distance: 0x13},
+} // Size: 232 bytes
+
+var matchRegion = []regionIntelligibility{ // 15 elements
+ 0: {lang: 0x3a, script: 0x0, group: 0x4, distance: 0x4},
+ 1: {lang: 0x3a, script: 0x0, group: 0x84, distance: 0x4},
+ 2: {lang: 0x139, script: 0x0, group: 0x1, distance: 0x4},
+ 3: {lang: 0x139, script: 0x0, group: 0x81, distance: 0x4},
+ 4: {lang: 0x13e, script: 0x0, group: 0x3, distance: 0x4},
+ 5: {lang: 0x13e, script: 0x0, group: 0x83, distance: 0x4},
+ 6: {lang: 0x3c0, script: 0x0, group: 0x3, distance: 0x4},
+ 7: {lang: 0x3c0, script: 0x0, group: 0x83, distance: 0x4},
+ 8: {lang: 0x529, script: 0x39, group: 0x2, distance: 0x4},
+ 9: {lang: 0x529, script: 0x39, group: 0x82, distance: 0x4},
+ 10: {lang: 0x3a, script: 0x0, group: 0x80, distance: 0x5},
+ 11: {lang: 0x139, script: 0x0, group: 0x80, distance: 0x5},
+ 12: {lang: 0x13e, script: 0x0, group: 0x80, distance: 0x5},
+ 13: {lang: 0x3c0, script: 0x0, group: 0x80, distance: 0x5},
+ 14: {lang: 0x529, script: 0x39, group: 0x80, distance: 0x5},
+} // Size: 114 bytes
+
+// Total table size 4618 bytes (4KiB); checksum: D161A896
diff --git a/internal/language/compact/tags.go b/internal/language/compact/tags.go
new file mode 100644
index 0000000..acc482a
--- /dev/null
+++ b/internal/language/compact/tags.go
@@ -0,0 +1,143 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package language
+
+// TODO: Various sets of commonly use tags and regions.
+
+// MustParse is like Parse, but panics if the given BCP 47 tag cannot be parsed.
+// It simplifies safe initialization of Tag values.
+func MustParse(s string) Tag {
+ t, err := Parse(s)
+ if err != nil {
+ panic(err)
+ }
+ return t
+}
+
+// MustParse is like Parse, but panics if the given BCP 47 tag cannot be parsed.
+// It simplifies safe initialization of Tag values.
+func (c CanonType) MustParse(s string) Tag {
+ t, err := c.Parse(s)
+ if err != nil {
+ panic(err)
+ }
+ return t
+}
+
+// MustParseBase is like ParseBase, but panics if the given base cannot be parsed.
+// It simplifies safe initialization of Base values.
+func MustParseBase(s string) Base {
+ b, err := ParseBase(s)
+ if err != nil {
+ panic(err)
+ }
+ return b
+}
+
+// MustParseScript is like ParseScript, but panics if the given script cannot be
+// parsed. It simplifies safe initialization of Script values.
+func MustParseScript(s string) Script {
+ scr, err := ParseScript(s)
+ if err != nil {
+ panic(err)
+ }
+ return scr
+}
+
+// MustParseRegion is like ParseRegion, but panics if the given region cannot be
+// parsed. It simplifies safe initialization of Region values.
+func MustParseRegion(s string) Region {
+ r, err := ParseRegion(s)
+ if err != nil {
+ panic(err)
+ }
+ return r
+}
+
+var (
+ und = Tag{}
+
+ Und Tag = Tag{}
+
+ Afrikaans Tag = Tag{language: afIndex, locale: afIndex}
+ Amharic Tag = Tag{language: amIndex, locale: amIndex}
+ Arabic Tag = Tag{language: arIndex, locale: arIndex}
+ ModernStandardArabic Tag = Tag{language: ar001Index, locale: ar001Index}
+ Azerbaijani Tag = Tag{language: azIndex, locale: azIndex}
+ Bulgarian Tag = Tag{language: bgIndex, locale: bgIndex}
+ Bengali Tag = Tag{language: bnIndex, locale: bnIndex}
+ Catalan Tag = Tag{language: caIndex, locale: caIndex}
+ Czech Tag = Tag{language: csIndex, locale: csIndex}
+ Danish Tag = Tag{language: daIndex, locale: daIndex}
+ German Tag = Tag{language: deIndex, locale: deIndex}
+ Greek Tag = Tag{language: elIndex, locale: elIndex}
+ English Tag = Tag{language: enIndex, locale: enIndex}
+ AmericanEnglish Tag = Tag{language: enUSIndex, locale: enUSIndex}
+ BritishEnglish Tag = Tag{language: enGBIndex, locale: enGBIndex}
+ Spanish Tag = Tag{language: esIndex, locale: esIndex}
+ EuropeanSpanish Tag = Tag{language: esESIndex, locale: esESIndex}
+ LatinAmericanSpanish Tag = Tag{language: es419Index, locale: es419Index}
+ Estonian Tag = Tag{language: etIndex, locale: etIndex}
+ Persian Tag = Tag{language: faIndex, locale: faIndex}
+ Finnish Tag = Tag{language: fiIndex, locale: fiIndex}
+ Filipino Tag = Tag{language: filIndex, locale: filIndex}
+ French Tag = Tag{language: frIndex, locale: frIndex}
+ CanadianFrench Tag = Tag{language: frCAIndex, locale: frCAIndex}
+ Gujarati Tag = Tag{language: guIndex, locale: guIndex}
+ Hebrew Tag = Tag{language: heIndex, locale: heIndex}
+ Hindi Tag = Tag{language: hiIndex, locale: hiIndex}
+ Croatian Tag = Tag{language: hrIndex, locale: hrIndex}
+ Hungarian Tag = Tag{language: huIndex, locale: huIndex}
+ Armenian Tag = Tag{language: hyIndex, locale: hyIndex}
+ Indonesian Tag = Tag{language: idIndex, locale: idIndex}
+ Icelandic Tag = Tag{language: isIndex, locale: isIndex}
+ Italian Tag = Tag{language: itIndex, locale: itIndex}
+ Japanese Tag = Tag{language: jaIndex, locale: jaIndex}
+ Georgian Tag = Tag{language: kaIndex, locale: kaIndex}
+ Kazakh Tag = Tag{language: kkIndex, locale: kkIndex}
+ Khmer Tag = Tag{language: kmIndex, locale: kmIndex}
+ Kannada Tag = Tag{language: knIndex, locale: knIndex}
+ Korean Tag = Tag{language: koIndex, locale: koIndex}
+ Kirghiz Tag = Tag{language: kyIndex, locale: kyIndex}
+ Lao Tag = Tag{language: loIndex, locale: loIndex}
+ Lithuanian Tag = Tag{language: ltIndex, locale: ltIndex}
+ Latvian Tag = Tag{language: lvIndex, locale: lvIndex}
+ Macedonian Tag = Tag{language: mkIndex, locale: mkIndex}
+ Malayalam Tag = Tag{language: mlIndex, locale: mlIndex}
+ Mongolian Tag = Tag{language: mnIndex, locale: mnIndex}
+ Marathi Tag = Tag{language: mrIndex, locale: mrIndex}
+ Malay Tag = Tag{language: msIndex, locale: msIndex}
+ Burmese Tag = Tag{language: myIndex, locale: myIndex}
+ Nepali Tag = Tag{language: neIndex, locale: neIndex}
+ Dutch Tag = Tag{language: nlIndex, locale: nlIndex}
+ Norwegian Tag = Tag{language: noIndex, locale: noIndex}
+ Punjabi Tag = Tag{language: paIndex, locale: paIndex}
+ Polish Tag = Tag{language: plIndex, locale: plIndex}
+ Portuguese Tag = Tag{language: ptIndex, locale: ptIndex}
+ BrazilianPortuguese Tag = Tag{language: ptBRIndex, locale: ptBRIndex}
+ EuropeanPortuguese Tag = Tag{language: ptPTIndex, locale: ptPTIndex}
+ Romanian Tag = Tag{language: roIndex, locale: roIndex}
+ Russian Tag = Tag{language: ruIndex, locale: ruIndex}
+ Sinhala Tag = Tag{language: siIndex, locale: siIndex}
+ Slovak Tag = Tag{language: skIndex, locale: skIndex}
+ Slovenian Tag = Tag{language: slIndex, locale: slIndex}
+ Albanian Tag = Tag{language: sqIndex, locale: sqIndex}
+ Serbian Tag = Tag{language: srIndex, locale: srIndex}
+ SerbianLatin Tag = Tag{language: srLatnIndex, locale: srLatnIndex}
+ Swedish Tag = Tag{language: svIndex, locale: svIndex}
+ Swahili Tag = Tag{language: swIndex, locale: swIndex}
+ Tamil Tag = Tag{language: taIndex, locale: taIndex}
+ Telugu Tag = Tag{language: teIndex, locale: teIndex}
+ Thai Tag = Tag{language: thIndex, locale: thIndex}
+ Turkish Tag = Tag{language: trIndex, locale: trIndex}
+ Ukrainian Tag = Tag{language: ukIndex, locale: ukIndex}
+ Urdu Tag = Tag{language: urIndex, locale: urIndex}
+ Uzbek Tag = Tag{language: uzIndex, locale: uzIndex}
+ Vietnamese Tag = Tag{language: viIndex, locale: viIndex}
+ Chinese Tag = Tag{language: zhIndex, locale: zhIndex}
+ SimplifiedChinese Tag = Tag{language: zhHansIndex, locale: zhHansIndex}
+ TraditionalChinese Tag = Tag{language: zhHantIndex, locale: zhHantIndex}
+ Zulu Tag = Tag{language: zuIndex, locale: zuIndex}
+)