language/internal: remove excess code
and make the minimal rewrite to make it build and pass tests
Removed higher-level functionality:
- canonicalization
- matching
- accept language
- coverage
- compact index
- compose
- higher level tag types
Added SuppressScript primitive to compensate for
Script removal.
Change-Id: I0e480ac6dcf78a2ab5d5a00aa0b480c8f1aed233
Reviewed-on: https://go-review.googlesource.com/95817
Run-TryBot: Marcel van Lohuizen <mpvl@golang.org>
Reviewed-by: Nigel Tao <nigeltao@golang.org>
diff --git a/language/internal/coverage.go b/language/internal/coverage.go
index 101fd23..2ee499e 100644
--- a/language/internal/coverage.go
+++ b/language/internal/coverage.go
@@ -4,79 +4,21 @@
package language
-import (
- "fmt"
- "sort"
-)
-
-// The Coverage interface is used to define the level of coverage of an
-// internationalization service. Note that not all types are supported by all
-// services. As lists may be generated on the fly, it is recommended that users
-// of a Coverage cache the results.
-type Coverage interface {
- // Tags returns the list of supported tags.
- Tags() []Tag
-
- // BaseLanguages returns the list of supported base languages.
- BaseLanguages() []Base
-
- // Scripts returns the list of supported scripts.
- Scripts() []Script
-
- // Regions returns the list of supported regions.
- Regions() []Region
-}
-
-var (
- // Supported defines a Coverage that lists all supported subtags. Tags
- // always returns nil.
- Supported Coverage = allSubtags{}
-)
-
-// TODO:
-// - Support Variants, numbering systems.
-// - CLDR coverage levels.
-// - Set of common tags defined in this package.
-
-type allSubtags struct{}
-
-// Regions returns the list of supported regions. As all regions are in a
-// consecutive range, it simply returns a slice of numbers in increasing order.
-// The "undefined" region is not returned.
-func (s allSubtags) Regions() []Region {
- reg := make([]Region, numRegions)
- for i := range reg {
- reg[i] = Region{regionID(i + 1)}
- }
- return reg
-}
-
-// Scripts returns the list of supported scripts. As all scripts are in a
-// consecutive range, it simply returns a slice of numbers in increasing order.
-// The "undefined" script is not returned.
-func (s allSubtags) Scripts() []Script {
- scr := make([]Script, numScripts)
- for i := range scr {
- scr[i] = Script{scriptID(i + 1)}
- }
- return scr
-}
-
// BaseLanguages returns the list of all supported base languages. It generates
// the list by traversing the internal structures.
-func (s allSubtags) BaseLanguages() []Base {
- base := make([]Base, 0, numLanguages)
+func BaseLanguages() []langID {
+ base := make([]langID, 0, numLanguages)
for i := 0; i < langNoIndexOffset; i++ {
// We included "und" already for the value 0.
if i != nonCanonicalUnd {
- base = append(base, Base{langID(i)})
+ base = append(base, langID(i))
}
}
i := langNoIndexOffset
for _, v := range langNoIndex {
for k := 0; k < 8; k++ {
if v&1 == 1 {
- base = append(base, Base{langID(i)})
+ base = append(base, langID(i))
}
v >>= 1
i++
@@ -84,114 +26,3 @@
}
return base
}
-
-// Tags always returns nil.
-func (s allSubtags) Tags() []Tag {
- return nil
-}
-
-// coverage is used used by NewCoverage which is used as a convenient way for
-// creating Coverage implementations for partially defined data. Very often a
-// package will only need to define a subset of slices. coverage provides a
-// convenient way to do this. Moreover, packages using NewCoverage, instead of
-// their own implementation, will not break if later new slice types are added.
-type coverage struct {
- tags func() []Tag
- bases func() []Base
- scripts func() []Script
- regions func() []Region
-}
-
-func (s *coverage) Tags() []Tag {
- if s.tags == nil {
- return nil
- }
- return s.tags()
-}
-
-// bases implements sort.Interface and is used to sort base languages.
-type bases []Base
-
-func (b bases) Len() int {
- return len(b)
-}
-
-func (b bases) Swap(i, j int) {
- b[i], b[j] = b[j], b[i]
-}
-
-func (b bases) Less(i, j int) bool {
- return b[i].langID < b[j].langID
-}
-
-// BaseLanguages returns the result from calling s.bases if it is specified or
-// otherwise derives the set of supported base languages from tags.
-func (s *coverage) BaseLanguages() []Base {
- if s.bases == nil {
- tags := s.Tags()
- if len(tags) == 0 {
- return nil
- }
- a := make([]Base, len(tags))
- for i, t := range tags {
- a[i] = Base{langID(t.lang)}
- }
- sort.Sort(bases(a))
- k := 0
- for i := 1; i < len(a); i++ {
- if a[k] != a[i] {
- k++
- a[k] = a[i]
- }
- }
- return a[:k+1]
- }
- return s.bases()
-}
-
-func (s *coverage) Scripts() []Script {
- if s.scripts == nil {
- return nil
- }
- return s.scripts()
-}
-
-func (s *coverage) Regions() []Region {
- if s.regions == nil {
- return nil
- }
- return s.regions()
-}
-
-// NewCoverage returns a Coverage for the given lists. It is typically used by
-// packages providing internationalization services to define their level of
-// coverage. A list may be of type []T or func() []T, where T is either Tag,
-// Base, Script or Region. The returned Coverage derives the value for Bases
-// from Tags if no func or slice for []Base is specified. For other unspecified
-// types the returned Coverage will return nil for the respective methods.
-func NewCoverage(list ...interface{}) Coverage {
- s := &coverage{}
- for _, x := range list {
- switch v := x.(type) {
- case func() []Base:
- s.bases = v
- case func() []Script:
- s.scripts = v
- case func() []Region:
- s.regions = v
- case func() []Tag:
- s.tags = v
- case []Base:
- s.bases = func() []Base { return v }
- case []Script:
- s.scripts = func() []Script { return v }
- case []Region:
- s.regions = func() []Region { return v }
- case []Tag:
- s.tags = func() []Tag { return v }
- default:
- panic(fmt.Sprintf("language: unsupported set type %T", v))
- }
- }
- return s
-}
diff --git a/language/internal/coverage_test.go b/language/internal/coverage_test.go
deleted file mode 100644
index 8e08e5c..0000000
--- a/language/internal/coverage_test.go
+++ /dev/null
@@ -1,154 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package language
-
-import (
- "fmt"
- "reflect"
- "testing"
-)
-
-func TestSupported(t *testing.T) {
- // To prove the results are correct for a type, we test that the number of
- // results is identical to the number of results on record, that all results
- // are distinct and that all results are valid.
- tests := map[string]int{
- "BaseLanguages": numLanguages,
- "Scripts": numScripts,
- "Regions": numRegions,
- "Tags": 0,
- }
- sup := reflect.ValueOf(Supported)
- for name, num := range tests {
- v := sup.MethodByName(name).Call(nil)[0]
- if n := v.Len(); n != num {
- t.Errorf("len(%s()) was %d; want %d", name, n, num)
- }
- dup := make(map[string]bool)
- for i := 0; i < v.Len(); i++ {
- x := v.Index(i).Interface()
- // An invalid value will either cause a crash or result in a
- // duplicate when passed to Sprint.
- s := fmt.Sprint(x)
- if dup[s] {
- t.Errorf("%s: duplicate entry %q", name, s)
- }
- dup[s] = true
- }
- if len(dup) != v.Len() {
- t.Errorf("%s: # unique entries was %d; want %d", name, len(dup), v.Len())
- }
- }
-}
-
-func TestNewCoverage(t *testing.T) {
- bases := []Base{Base{0}, Base{3}, Base{7}}
- scripts := []Script{Script{11}, Script{17}, Script{23}}
- regions := []Region{Region{101}, Region{103}, Region{107}}
- tags := []Tag{Make("pt"), Make("en"), Make("en-GB"), Make("en-US"), Make("pt-PT")}
- fbases := func() []Base { return bases }
- fscripts := func() []Script { return scripts }
- fregions := func() []Region { return regions }
- ftags := func() []Tag { return tags }
-
- tests := []struct {
- desc string
- list []interface{}
- bases []Base
- scripts []Script
- regions []Region
- tags []Tag
- }{
- {
- desc: "empty",
- },
- {
- desc: "bases",
- list: []interface{}{bases},
- bases: bases,
- },
- {
- desc: "scripts",
- list: []interface{}{scripts},
- scripts: scripts,
- },
- {
- desc: "regions",
- list: []interface{}{regions},
- regions: regions,
- },
- {
- desc: "bases derives from tags",
- list: []interface{}{tags},
- bases: []Base{Base{_en}, Base{_pt}},
- tags: tags,
- },
- {
- desc: "tags and bases",
- list: []interface{}{tags, bases},
- bases: bases,
- tags: tags,
- },
- {
- desc: "fully specified",
- list: []interface{}{tags, bases, scripts, regions},
- bases: bases,
- scripts: scripts,
- regions: regions,
- tags: tags,
- },
- {
- desc: "bases func",
- list: []interface{}{fbases},
- bases: bases,
- },
- {
- desc: "scripts func",
- list: []interface{}{fscripts},
- scripts: scripts,
- },
- {
- desc: "regions func",
- list: []interface{}{fregions},
- regions: regions,
- },
- {
- desc: "tags func",
- list: []interface{}{ftags},
- bases: []Base{Base{_en}, Base{_pt}},
- tags: tags,
- },
- {
- desc: "tags and bases",
- list: []interface{}{ftags, fbases},
- bases: bases,
- tags: tags,
- },
- {
- desc: "fully specified",
- list: []interface{}{ftags, fbases, fscripts, fregions},
- bases: bases,
- scripts: scripts,
- regions: regions,
- tags: tags,
- },
- }
-
- for i, tt := range tests {
- l := NewCoverage(tt.list...)
- if a := l.BaseLanguages(); !reflect.DeepEqual(a, tt.bases) {
- t.Errorf("%d:%s: BaseLanguages was %v; want %v", i, tt.desc, a, tt.bases)
- }
- if a := l.Scripts(); !reflect.DeepEqual(a, tt.scripts) {
- t.Errorf("%d:%s: Scripts was %v; want %v", i, tt.desc, a, tt.scripts)
- }
- if a := l.Regions(); !reflect.DeepEqual(a, tt.regions) {
- t.Errorf("%d:%s: Regions was %v; want %v", i, tt.desc, a, tt.regions)
- }
- if a := l.Tags(); !reflect.DeepEqual(a, tt.tags) {
- t.Errorf("%d:%s: Tags was %v; want %v", i, tt.desc, a, tt.tags)
- }
- }
-}
diff --git a/language/internal/gen.go b/language/internal/gen.go
index 302f194..4d99097 100644
--- a/language/internal/gen.go
+++ b/language/internal/gen.go
@@ -1361,190 +1361,6 @@
b.writeSlice("likelyRegionGroup", likelyRegionGroup)
}
-type mutualIntelligibility struct {
- want, have uint16
- distance uint8
- oneway bool
-}
-
-type scriptIntelligibility struct {
- wantLang, haveLang uint16
- wantScript, haveScript uint8
- distance uint8
- // Always oneway
-}
-
-type regionIntelligibility struct {
- lang uint16 // compact language id
- script uint8 // 0 means any
- group uint8 // 0 means any; if bit 7 is set it means inverse
- distance uint8
- // Always twoway.
-}
-
-// writeMatchData writes tables with languages and scripts for which there is
-// mutual intelligibility. The data is based on CLDR's languageMatching data.
-// Note that we use a different algorithm than the one defined by CLDR and that
-// we slightly modify the data. For example, we convert scores to confidence levels.
-// We also drop all region-related data as we use a different algorithm to
-// determine region equivalence.
-func (b *builder) writeMatchData() {
- lm := b.supp.LanguageMatching.LanguageMatches
- cldr.MakeSlice(&lm).SelectAnyOf("type", "written_new")
-
- regionHierarchy := map[string][]string{}
- for _, g := range b.supp.TerritoryContainment.Group {
- regions := strings.Split(g.Contains, " ")
- regionHierarchy[g.Type] = append(regionHierarchy[g.Type], regions...)
- }
- regionToGroups := make([]uint8, len(b.region.s))
-
- idToIndex := map[string]uint8{}
- for i, mv := range lm[0].MatchVariable {
- if i > 6 {
- log.Fatalf("Too many groups: %d", i)
- }
- idToIndex[mv.Id] = uint8(i + 1)
- // TODO: also handle '-'
- for _, r := range strings.Split(mv.Value, "+") {
- todo := []string{r}
- for k := 0; k < len(todo); k++ {
- r := todo[k]
- regionToGroups[b.region.index(r)] |= 1 << uint8(i)
- todo = append(todo, regionHierarchy[r]...)
- }
- }
- }
- b.writeSlice("regionToGroups", regionToGroups)
-
- // maps language id to in- and out-of-group region.
- paradigmLocales := [][3]uint16{}
- locales := strings.Split(lm[0].ParadigmLocales[0].Locales, " ")
- for i := 0; i < len(locales); i += 2 {
- x := [3]uint16{}
- for j := 0; j < 2; j++ {
- pc := strings.SplitN(locales[i+j], "-", 2)
- x[0] = b.langIndex(pc[0])
- if len(pc) == 2 {
- x[1+j] = uint16(b.region.index(pc[1]))
- }
- }
- paradigmLocales = append(paradigmLocales, x)
- }
- b.writeSlice("paradigmLocales", paradigmLocales)
-
- b.writeType(mutualIntelligibility{})
- b.writeType(scriptIntelligibility{})
- b.writeType(regionIntelligibility{})
-
- matchLang := []mutualIntelligibility{}
- matchScript := []scriptIntelligibility{}
- matchRegion := []regionIntelligibility{}
- // Convert the languageMatch entries in lists keyed by desired language.
- for _, m := range lm[0].LanguageMatch {
- // Different versions of CLDR use different separators.
- desired := strings.Replace(m.Desired, "-", "_", -1)
- supported := strings.Replace(m.Supported, "-", "_", -1)
- d := strings.Split(desired, "_")
- s := strings.Split(supported, "_")
- if len(d) != len(s) {
- log.Fatalf("not supported: desired=%q; supported=%q", desired, supported)
- continue
- }
- distance, _ := strconv.ParseInt(m.Distance, 10, 8)
- switch len(d) {
- case 2:
- if desired == supported && desired == "*_*" {
- continue
- }
- // language-script pair.
- matchScript = append(matchScript, scriptIntelligibility{
- wantLang: uint16(b.langIndex(d[0])),
- haveLang: uint16(b.langIndex(s[0])),
- wantScript: uint8(b.script.index(d[1])),
- haveScript: uint8(b.script.index(s[1])),
- distance: uint8(distance),
- })
- if m.Oneway != "true" {
- matchScript = append(matchScript, scriptIntelligibility{
- wantLang: uint16(b.langIndex(s[0])),
- haveLang: uint16(b.langIndex(d[0])),
- wantScript: uint8(b.script.index(s[1])),
- haveScript: uint8(b.script.index(d[1])),
- distance: uint8(distance),
- })
- }
- case 1:
- if desired == supported && desired == "*" {
- continue
- }
- if distance == 1 {
- // nb == no is already handled by macro mapping. Check there
- // really is only this case.
- if d[0] != "no" || s[0] != "nb" {
- log.Fatalf("unhandled equivalence %s == %s", s[0], d[0])
- }
- continue
- }
- // TODO: consider dropping oneway field and just doubling the entry.
- matchLang = append(matchLang, mutualIntelligibility{
- want: uint16(b.langIndex(d[0])),
- have: uint16(b.langIndex(s[0])),
- distance: uint8(distance),
- oneway: m.Oneway == "true",
- })
- case 3:
- if desired == supported && desired == "*_*_*" {
- continue
- }
- if desired != supported {
- // This is now supported by CLDR, but only one case, which
- // should already be covered by paradigm locales. For instance,
- // test case "und, en, en-GU, en-IN, en-GB ; en-ZA ; en-GB" in
- // testdata/CLDRLocaleMatcherTest.txt tests this.
- if supported != "en_*_GB" {
- log.Fatalf("not supported: desired=%q; supported=%q", desired, supported)
- }
- continue
- }
- ri := regionIntelligibility{
- lang: b.langIndex(d[0]),
- distance: uint8(distance),
- }
- if d[1] != "*" {
- ri.script = uint8(b.script.index(d[1]))
- }
- switch {
- case d[2] == "*":
- ri.group = 0x80 // not contained in anything
- case strings.HasPrefix(d[2], "$!"):
- ri.group = 0x80
- d[2] = "$" + d[2][len("$!"):]
- fallthrough
- case strings.HasPrefix(d[2], "$"):
- ri.group |= idToIndex[d[2]]
- }
- matchRegion = append(matchRegion, ri)
- default:
- log.Fatalf("not supported: desired=%q; supported=%q", desired, supported)
- }
- }
- sort.SliceStable(matchLang, func(i, j int) bool {
- return matchLang[i].distance < matchLang[j].distance
- })
- b.writeSlice("matchLang", matchLang)
-
- sort.SliceStable(matchScript, func(i, j int) bool {
- return matchScript[i].distance < matchScript[j].distance
- })
- b.writeSlice("matchScript", matchScript)
-
- sort.SliceStable(matchRegion, func(i, j int) bool {
- return matchRegion[i].distance < matchRegion[j].distance
- })
- b.writeSlice("matchRegion", matchRegion)
-}
-
func (b *builder) writeRegionInclusionData() {
var (
// mm holds for each group the set of groups with a distance of 1.
@@ -1706,7 +1522,6 @@
// TODO: b.writeLocale()
b.computeRegionGroups()
b.writeLikelyData()
- b.writeMatchData()
b.writeRegionInclusionData()
b.writeParents()
}
diff --git a/language/internal/gen_index.go b/language/internal/gen_index.go
deleted file mode 100644
index 5ca9bcc..0000000
--- a/language/internal/gen_index.go
+++ /dev/null
@@ -1,162 +0,0 @@
-// Copyright 2015 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build ignore
-
-package main
-
-// This file generates derivative tables based on the language package itself.
-
-import (
- "bytes"
- "flag"
- "fmt"
- "io/ioutil"
- "log"
- "reflect"
- "sort"
- "strings"
-
- "golang.org/x/text/internal/gen"
- "golang.org/x/text/language"
- "golang.org/x/text/unicode/cldr"
-)
-
-var (
- test = flag.Bool("test", false,
- "test existing tables; can be used to compare web data with package data.")
-
- draft = flag.String("draft",
- "contributed",
- `Minimal draft requirements (approved, contributed, provisional, unconfirmed).`)
-)
-
-func main() {
- gen.Init()
-
- // Read the CLDR zip file.
- r := gen.OpenCLDRCoreZip()
- defer r.Close()
-
- d := &cldr.Decoder{}
- data, err := d.DecodeZip(r)
- if err != nil {
- log.Fatalf("DecodeZip: %v", err)
- }
-
- w := gen.NewCodeWriter()
- defer func() {
- buf := &bytes.Buffer{}
-
- if _, err = w.WriteGo(buf, "language", ""); err != nil {
- log.Fatalf("Error formatting file index.go: %v", err)
- }
-
- // Since we're generating a table for our own package we need to rewrite
- // doing the equivalent of go fmt -r 'language.b -> b'. Using
- // bytes.Replace will do.
- out := bytes.Replace(buf.Bytes(), []byte("language."), nil, -1)
- if err := ioutil.WriteFile("index.go", out, 0600); err != nil {
- log.Fatalf("Could not create file index.go: %v", err)
- }
- }()
-
- m := map[language.Tag]bool{}
- for _, lang := range data.Locales() {
- // We include all locales unconditionally to be consistent with en_US.
- // We want en_US, even though it has no data associated with it.
-
- // TODO: put any of the languages for which no data exists at the end
- // of the index. This allows all components based on ICU to use that
- // as the cutoff point.
- // if x := data.RawLDML(lang); false ||
- // x.LocaleDisplayNames != nil ||
- // x.Characters != nil ||
- // x.Delimiters != nil ||
- // x.Measurement != nil ||
- // x.Dates != nil ||
- // x.Numbers != nil ||
- // x.Units != nil ||
- // x.ListPatterns != nil ||
- // x.Collations != nil ||
- // x.Segmentations != nil ||
- // x.Rbnf != nil ||
- // x.Annotations != nil ||
- // x.Metadata != nil {
-
- // TODO: support POSIX natively, albeit non-standard.
- tag := language.Make(strings.Replace(lang, "_POSIX", "-u-va-posix", 1))
- m[tag] = true
- // }
- }
- // Include locales for plural rules, which uses a different structure.
- for _, plurals := range data.Supplemental().Plurals {
- for _, rules := range plurals.PluralRules {
- for _, lang := range strings.Split(rules.Locales, " ") {
- m[language.Make(lang)] = true
- }
- }
- }
-
- var core, special []language.Tag
-
- for t := range m {
- if x := t.Extensions(); len(x) != 0 && fmt.Sprint(x) != "[u-va-posix]" {
- log.Fatalf("Unexpected extension %v in %v", x, t)
- }
- if len(t.Variants()) == 0 && len(t.Extensions()) == 0 {
- core = append(core, t)
- } else {
- special = append(special, t)
- }
- }
-
- w.WriteComment(`
- NumCompactTags is the number of common tags. The maximum tag is
- NumCompactTags-1.`)
- w.WriteConst("NumCompactTags", len(core)+len(special))
-
- sort.Sort(byAlpha(special))
- w.WriteVar("specialTags", special)
-
- // TODO: order by frequency?
- sort.Sort(byAlpha(core))
-
- // Size computations are just an estimate.
- w.Size += int(reflect.TypeOf(map[uint32]uint16{}).Size())
- w.Size += len(core) * 6 // size of uint32 and uint16
-
- fmt.Fprintln(w)
- fmt.Fprintln(w, "var coreTags = map[uint32]uint16{")
- fmt.Fprintln(w, "0x0: 0, // und")
- i := len(special) + 1 // Und and special tags already written.
- for _, t := range core {
- if t == language.Und {
- continue
- }
- fmt.Fprint(w.Hash, t, i)
- b, s, r := t.Raw()
- fmt.Fprintf(w, "0x%s%s%s: %d, // %s\n",
- getIndex(b, 3), // 3 is enough as it is guaranteed to be a compact number
- getIndex(s, 2),
- getIndex(r, 3),
- i, t)
- i++
- }
- fmt.Fprintln(w, "}")
-}
-
-// getIndex prints the subtag type and extracts its index of size nibble.
-// If the index is less than n nibbles, the result is prefixed with 0s.
-func getIndex(x interface{}, n int) string {
- s := fmt.Sprintf("%#v", x) // s is of form Type{typeID: 0x00}
- s = s[strings.Index(s, "0x")+2 : len(s)-1]
- return strings.Repeat("0", n-len(s)) + s
-}
-
-type byAlpha []language.Tag
-
-func (a byAlpha) Len() int { return len(a) }
-func (a byAlpha) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
-func (a byAlpha) Less(i, j int) bool { return a[i].String() < a[j].String() }
diff --git a/language/internal/go1_1.go b/language/internal/go1_1.go
deleted file mode 100644
index 380f4c0..0000000
--- a/language/internal/go1_1.go
+++ /dev/null
@@ -1,38 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build !go1.2
-
-package language
-
-import "sort"
-
-func sortStable(s sort.Interface) {
- ss := stableSort{
- s: s,
- pos: make([]int, s.Len()),
- }
- for i := range ss.pos {
- ss.pos[i] = i
- }
- sort.Sort(&ss)
-}
-
-type stableSort struct {
- s sort.Interface
- pos []int
-}
-
-func (s *stableSort) Len() int {
- return len(s.pos)
-}
-
-func (s *stableSort) Less(i, j int) bool {
- return s.s.Less(i, j) || !s.s.Less(j, i) && s.pos[i] < s.pos[j]
-}
-
-func (s *stableSort) Swap(i, j int) {
- s.s.Swap(i, j)
- s.pos[i], s.pos[j] = s.pos[j], s.pos[i]
-}
diff --git a/language/internal/go1_2.go b/language/internal/go1_2.go
deleted file mode 100644
index 38268c5..0000000
--- a/language/internal/go1_2.go
+++ /dev/null
@@ -1,11 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build go1.2
-
-package language
-
-import "sort"
-
-var sortStable = sort.Stable
diff --git a/language/internal/httpexample_test.go b/language/internal/httpexample_test.go
deleted file mode 100644
index 40d0663..0000000
--- a/language/internal/httpexample_test.go
+++ /dev/null
@@ -1,48 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package language_test
-
-import (
- "fmt"
- "net/http"
- "strings"
-
- "golang.org/x/text/language"
-)
-
-// matcher is a language.Matcher configured for all supported languages.
-var matcher = language.NewMatcher([]language.Tag{
- language.BritishEnglish,
- language.Norwegian,
- language.German,
-})
-
-// handler is a http.HandlerFunc.
-func handler(w http.ResponseWriter, r *http.Request) {
- t, q, err := language.ParseAcceptLanguage(r.Header.Get("Accept-Language"))
- // We ignore the error: the default language will be selected for t == nil.
- tag, _, _ := matcher.Match(t...)
- fmt.Printf("%5v (t: %6v; q: %3v; err: %v)\n", tag, t, q, err)
-}
-
-func ExampleParseAcceptLanguage() {
- for _, al := range []string{
- "nn;q=0.3, en-us;q=0.8, en,",
- "gsw, en;q=0.7, en-US;q=0.8",
- "gsw, nl, da",
- "invalid",
- } {
- // Create dummy request with Accept-Language set and pass it to handler.
- r, _ := http.NewRequest("GET", "example.com", strings.NewReader("Hello"))
- r.Header.Set("Accept-Language", al)
- handler(nil, r)
- }
-
- // Output:
- // en-GB (t: [ en en-US nn]; q: [ 1 0.8 0.3]; err: <nil>)
- // en-GB (t: [ gsw en-US en]; q: [ 1 0.8 0.7]; err: <nil>)
- // de (t: [ gsw nl da]; q: [ 1 1 1]; err: <nil>)
- // en-GB (t: []; q: []; err: language: tag is not well-formed)
-}
diff --git a/language/internal/index.go b/language/internal/index.go
deleted file mode 100644
index 5311e5c..0000000
--- a/language/internal/index.go
+++ /dev/null
@@ -1,783 +0,0 @@
-// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
-
-package language
-
-// NumCompactTags is the number of common tags. The maximum tag is
-// NumCompactTags-1.
-const NumCompactTags = 768
-
-var specialTags = []Tag{ // 2 elements
- 0: {lang: 0xd7, region: 0x6e, script: 0x0, pVariant: 0x5, pExt: 0xe, str: "ca-ES-valencia"},
- 1: {lang: 0x139, region: 0x135, script: 0x0, pVariant: 0x5, pExt: 0x5, str: "en-US-u-va-posix"},
-} // Size: 72 bytes
-
-var coreTags = map[uint32]uint16{
- 0x0: 0, // und
- 0x01600000: 3, // af
- 0x016000d2: 4, // af-NA
- 0x01600161: 5, // af-ZA
- 0x01c00000: 6, // agq
- 0x01c00052: 7, // agq-CM
- 0x02100000: 8, // ak
- 0x02100080: 9, // ak-GH
- 0x02700000: 10, // am
- 0x0270006f: 11, // am-ET
- 0x03a00000: 12, // ar
- 0x03a00001: 13, // ar-001
- 0x03a00023: 14, // ar-AE
- 0x03a00039: 15, // ar-BH
- 0x03a00062: 16, // ar-DJ
- 0x03a00067: 17, // ar-DZ
- 0x03a0006b: 18, // ar-EG
- 0x03a0006c: 19, // ar-EH
- 0x03a0006d: 20, // ar-ER
- 0x03a00097: 21, // ar-IL
- 0x03a0009b: 22, // ar-IQ
- 0x03a000a1: 23, // ar-JO
- 0x03a000a8: 24, // ar-KM
- 0x03a000ac: 25, // ar-KW
- 0x03a000b0: 26, // ar-LB
- 0x03a000b9: 27, // ar-LY
- 0x03a000ba: 28, // ar-MA
- 0x03a000c9: 29, // ar-MR
- 0x03a000e1: 30, // ar-OM
- 0x03a000ed: 31, // ar-PS
- 0x03a000f3: 32, // ar-QA
- 0x03a00108: 33, // ar-SA
- 0x03a0010b: 34, // ar-SD
- 0x03a00115: 35, // ar-SO
- 0x03a00117: 36, // ar-SS
- 0x03a0011c: 37, // ar-SY
- 0x03a00120: 38, // ar-TD
- 0x03a00128: 39, // ar-TN
- 0x03a0015e: 40, // ar-YE
- 0x04000000: 41, // ars
- 0x04300000: 42, // as
- 0x04300099: 43, // as-IN
- 0x04400000: 44, // asa
- 0x0440012f: 45, // asa-TZ
- 0x04800000: 46, // ast
- 0x0480006e: 47, // ast-ES
- 0x05800000: 48, // az
- 0x0581f000: 49, // az-Cyrl
- 0x0581f032: 50, // az-Cyrl-AZ
- 0x05857000: 51, // az-Latn
- 0x05857032: 52, // az-Latn-AZ
- 0x05e00000: 53, // bas
- 0x05e00052: 54, // bas-CM
- 0x07100000: 55, // be
- 0x07100047: 56, // be-BY
- 0x07500000: 57, // bem
- 0x07500162: 58, // bem-ZM
- 0x07900000: 59, // bez
- 0x0790012f: 60, // bez-TZ
- 0x07e00000: 61, // bg
- 0x07e00038: 62, // bg-BG
- 0x08200000: 63, // bh
- 0x0a000000: 64, // bm
- 0x0a0000c3: 65, // bm-ML
- 0x0a500000: 66, // bn
- 0x0a500035: 67, // bn-BD
- 0x0a500099: 68, // bn-IN
- 0x0a900000: 69, // bo
- 0x0a900053: 70, // bo-CN
- 0x0a900099: 71, // bo-IN
- 0x0b200000: 72, // br
- 0x0b200078: 73, // br-FR
- 0x0b500000: 74, // brx
- 0x0b500099: 75, // brx-IN
- 0x0b700000: 76, // bs
- 0x0b71f000: 77, // bs-Cyrl
- 0x0b71f033: 78, // bs-Cyrl-BA
- 0x0b757000: 79, // bs-Latn
- 0x0b757033: 80, // bs-Latn-BA
- 0x0d700000: 81, // ca
- 0x0d700022: 82, // ca-AD
- 0x0d70006e: 83, // ca-ES
- 0x0d700078: 84, // ca-FR
- 0x0d70009e: 85, // ca-IT
- 0x0db00000: 86, // ccp
- 0x0db00035: 87, // ccp-BD
- 0x0db00099: 88, // ccp-IN
- 0x0dc00000: 89, // ce
- 0x0dc00106: 90, // ce-RU
- 0x0df00000: 91, // cgg
- 0x0df00131: 92, // cgg-UG
- 0x0e500000: 93, // chr
- 0x0e500135: 94, // chr-US
- 0x0e900000: 95, // ckb
- 0x0e90009b: 96, // ckb-IQ
- 0x0e90009c: 97, // ckb-IR
- 0x0fa00000: 98, // cs
- 0x0fa0005e: 99, // cs-CZ
- 0x0fe00000: 100, // cu
- 0x0fe00106: 101, // cu-RU
- 0x10000000: 102, // cy
- 0x1000007b: 103, // cy-GB
- 0x10100000: 104, // da
- 0x10100063: 105, // da-DK
- 0x10100082: 106, // da-GL
- 0x10800000: 107, // dav
- 0x108000a4: 108, // dav-KE
- 0x10d00000: 109, // de
- 0x10d0002e: 110, // de-AT
- 0x10d00036: 111, // de-BE
- 0x10d0004e: 112, // de-CH
- 0x10d00060: 113, // de-DE
- 0x10d0009e: 114, // de-IT
- 0x10d000b2: 115, // de-LI
- 0x10d000b7: 116, // de-LU
- 0x11700000: 117, // dje
- 0x117000d4: 118, // dje-NE
- 0x11f00000: 119, // dsb
- 0x11f00060: 120, // dsb-DE
- 0x12400000: 121, // dua
- 0x12400052: 122, // dua-CM
- 0x12800000: 123, // dv
- 0x12b00000: 124, // dyo
- 0x12b00114: 125, // dyo-SN
- 0x12d00000: 126, // dz
- 0x12d00043: 127, // dz-BT
- 0x12f00000: 128, // ebu
- 0x12f000a4: 129, // ebu-KE
- 0x13000000: 130, // ee
- 0x13000080: 131, // ee-GH
- 0x13000122: 132, // ee-TG
- 0x13600000: 133, // el
- 0x1360005d: 134, // el-CY
- 0x13600087: 135, // el-GR
- 0x13900000: 136, // en
- 0x13900001: 137, // en-001
- 0x1390001a: 138, // en-150
- 0x13900025: 139, // en-AG
- 0x13900026: 140, // en-AI
- 0x1390002d: 141, // en-AS
- 0x1390002e: 142, // en-AT
- 0x1390002f: 143, // en-AU
- 0x13900034: 144, // en-BB
- 0x13900036: 145, // en-BE
- 0x1390003a: 146, // en-BI
- 0x1390003d: 147, // en-BM
- 0x13900042: 148, // en-BS
- 0x13900046: 149, // en-BW
- 0x13900048: 150, // en-BZ
- 0x13900049: 151, // en-CA
- 0x1390004a: 152, // en-CC
- 0x1390004e: 153, // en-CH
- 0x13900050: 154, // en-CK
- 0x13900052: 155, // en-CM
- 0x1390005c: 156, // en-CX
- 0x1390005d: 157, // en-CY
- 0x13900060: 158, // en-DE
- 0x13900061: 159, // en-DG
- 0x13900063: 160, // en-DK
- 0x13900064: 161, // en-DM
- 0x1390006d: 162, // en-ER
- 0x13900072: 163, // en-FI
- 0x13900073: 164, // en-FJ
- 0x13900074: 165, // en-FK
- 0x13900075: 166, // en-FM
- 0x1390007b: 167, // en-GB
- 0x1390007c: 168, // en-GD
- 0x1390007f: 169, // en-GG
- 0x13900080: 170, // en-GH
- 0x13900081: 171, // en-GI
- 0x13900083: 172, // en-GM
- 0x1390008a: 173, // en-GU
- 0x1390008c: 174, // en-GY
- 0x1390008d: 175, // en-HK
- 0x13900096: 176, // en-IE
- 0x13900097: 177, // en-IL
- 0x13900098: 178, // en-IM
- 0x13900099: 179, // en-IN
- 0x1390009a: 180, // en-IO
- 0x1390009f: 181, // en-JE
- 0x139000a0: 182, // en-JM
- 0x139000a4: 183, // en-KE
- 0x139000a7: 184, // en-KI
- 0x139000a9: 185, // en-KN
- 0x139000ad: 186, // en-KY
- 0x139000b1: 187, // en-LC
- 0x139000b4: 188, // en-LR
- 0x139000b5: 189, // en-LS
- 0x139000bf: 190, // en-MG
- 0x139000c0: 191, // en-MH
- 0x139000c6: 192, // en-MO
- 0x139000c7: 193, // en-MP
- 0x139000ca: 194, // en-MS
- 0x139000cb: 195, // en-MT
- 0x139000cc: 196, // en-MU
- 0x139000ce: 197, // en-MW
- 0x139000d0: 198, // en-MY
- 0x139000d2: 199, // en-NA
- 0x139000d5: 200, // en-NF
- 0x139000d6: 201, // en-NG
- 0x139000d9: 202, // en-NL
- 0x139000dd: 203, // en-NR
- 0x139000df: 204, // en-NU
- 0x139000e0: 205, // en-NZ
- 0x139000e6: 206, // en-PG
- 0x139000e7: 207, // en-PH
- 0x139000e8: 208, // en-PK
- 0x139000eb: 209, // en-PN
- 0x139000ec: 210, // en-PR
- 0x139000f0: 211, // en-PW
- 0x13900107: 212, // en-RW
- 0x13900109: 213, // en-SB
- 0x1390010a: 214, // en-SC
- 0x1390010b: 215, // en-SD
- 0x1390010c: 216, // en-SE
- 0x1390010d: 217, // en-SG
- 0x1390010e: 218, // en-SH
- 0x1390010f: 219, // en-SI
- 0x13900112: 220, // en-SL
- 0x13900117: 221, // en-SS
- 0x1390011b: 222, // en-SX
- 0x1390011d: 223, // en-SZ
- 0x1390011f: 224, // en-TC
- 0x13900125: 225, // en-TK
- 0x13900129: 226, // en-TO
- 0x1390012c: 227, // en-TT
- 0x1390012d: 228, // en-TV
- 0x1390012f: 229, // en-TZ
- 0x13900131: 230, // en-UG
- 0x13900133: 231, // en-UM
- 0x13900135: 232, // en-US
- 0x13900139: 233, // en-VC
- 0x1390013c: 234, // en-VG
- 0x1390013d: 235, // en-VI
- 0x1390013f: 236, // en-VU
- 0x13900142: 237, // en-WS
- 0x13900161: 238, // en-ZA
- 0x13900162: 239, // en-ZM
- 0x13900164: 240, // en-ZW
- 0x13c00000: 241, // eo
- 0x13c00001: 242, // eo-001
- 0x13e00000: 243, // es
- 0x13e0001f: 244, // es-419
- 0x13e0002c: 245, // es-AR
- 0x13e0003f: 246, // es-BO
- 0x13e00041: 247, // es-BR
- 0x13e00048: 248, // es-BZ
- 0x13e00051: 249, // es-CL
- 0x13e00054: 250, // es-CO
- 0x13e00056: 251, // es-CR
- 0x13e00059: 252, // es-CU
- 0x13e00065: 253, // es-DO
- 0x13e00068: 254, // es-EA
- 0x13e00069: 255, // es-EC
- 0x13e0006e: 256, // es-ES
- 0x13e00086: 257, // es-GQ
- 0x13e00089: 258, // es-GT
- 0x13e0008f: 259, // es-HN
- 0x13e00094: 260, // es-IC
- 0x13e000cf: 261, // es-MX
- 0x13e000d8: 262, // es-NI
- 0x13e000e2: 263, // es-PA
- 0x13e000e4: 264, // es-PE
- 0x13e000e7: 265, // es-PH
- 0x13e000ec: 266, // es-PR
- 0x13e000f1: 267, // es-PY
- 0x13e0011a: 268, // es-SV
- 0x13e00135: 269, // es-US
- 0x13e00136: 270, // es-UY
- 0x13e0013b: 271, // es-VE
- 0x14000000: 272, // et
- 0x1400006a: 273, // et-EE
- 0x14500000: 274, // eu
- 0x1450006e: 275, // eu-ES
- 0x14600000: 276, // ewo
- 0x14600052: 277, // ewo-CM
- 0x14800000: 278, // fa
- 0x14800024: 279, // fa-AF
- 0x1480009c: 280, // fa-IR
- 0x14e00000: 281, // ff
- 0x14e00052: 282, // ff-CM
- 0x14e00084: 283, // ff-GN
- 0x14e000c9: 284, // ff-MR
- 0x14e00114: 285, // ff-SN
- 0x15100000: 286, // fi
- 0x15100072: 287, // fi-FI
- 0x15300000: 288, // fil
- 0x153000e7: 289, // fil-PH
- 0x15800000: 290, // fo
- 0x15800063: 291, // fo-DK
- 0x15800076: 292, // fo-FO
- 0x15e00000: 293, // fr
- 0x15e00036: 294, // fr-BE
- 0x15e00037: 295, // fr-BF
- 0x15e0003a: 296, // fr-BI
- 0x15e0003b: 297, // fr-BJ
- 0x15e0003c: 298, // fr-BL
- 0x15e00049: 299, // fr-CA
- 0x15e0004b: 300, // fr-CD
- 0x15e0004c: 301, // fr-CF
- 0x15e0004d: 302, // fr-CG
- 0x15e0004e: 303, // fr-CH
- 0x15e0004f: 304, // fr-CI
- 0x15e00052: 305, // fr-CM
- 0x15e00062: 306, // fr-DJ
- 0x15e00067: 307, // fr-DZ
- 0x15e00078: 308, // fr-FR
- 0x15e0007a: 309, // fr-GA
- 0x15e0007e: 310, // fr-GF
- 0x15e00084: 311, // fr-GN
- 0x15e00085: 312, // fr-GP
- 0x15e00086: 313, // fr-GQ
- 0x15e00091: 314, // fr-HT
- 0x15e000a8: 315, // fr-KM
- 0x15e000b7: 316, // fr-LU
- 0x15e000ba: 317, // fr-MA
- 0x15e000bb: 318, // fr-MC
- 0x15e000be: 319, // fr-MF
- 0x15e000bf: 320, // fr-MG
- 0x15e000c3: 321, // fr-ML
- 0x15e000c8: 322, // fr-MQ
- 0x15e000c9: 323, // fr-MR
- 0x15e000cc: 324, // fr-MU
- 0x15e000d3: 325, // fr-NC
- 0x15e000d4: 326, // fr-NE
- 0x15e000e5: 327, // fr-PF
- 0x15e000ea: 328, // fr-PM
- 0x15e00102: 329, // fr-RE
- 0x15e00107: 330, // fr-RW
- 0x15e0010a: 331, // fr-SC
- 0x15e00114: 332, // fr-SN
- 0x15e0011c: 333, // fr-SY
- 0x15e00120: 334, // fr-TD
- 0x15e00122: 335, // fr-TG
- 0x15e00128: 336, // fr-TN
- 0x15e0013f: 337, // fr-VU
- 0x15e00140: 338, // fr-WF
- 0x15e0015f: 339, // fr-YT
- 0x16900000: 340, // fur
- 0x1690009e: 341, // fur-IT
- 0x16d00000: 342, // fy
- 0x16d000d9: 343, // fy-NL
- 0x16e00000: 344, // ga
- 0x16e00096: 345, // ga-IE
- 0x17e00000: 346, // gd
- 0x17e0007b: 347, // gd-GB
- 0x19000000: 348, // gl
- 0x1900006e: 349, // gl-ES
- 0x1a300000: 350, // gsw
- 0x1a30004e: 351, // gsw-CH
- 0x1a300078: 352, // gsw-FR
- 0x1a3000b2: 353, // gsw-LI
- 0x1a400000: 354, // gu
- 0x1a400099: 355, // gu-IN
- 0x1a900000: 356, // guw
- 0x1ab00000: 357, // guz
- 0x1ab000a4: 358, // guz-KE
- 0x1ac00000: 359, // gv
- 0x1ac00098: 360, // gv-IM
- 0x1b400000: 361, // ha
- 0x1b400080: 362, // ha-GH
- 0x1b4000d4: 363, // ha-NE
- 0x1b4000d6: 364, // ha-NG
- 0x1b800000: 365, // haw
- 0x1b800135: 366, // haw-US
- 0x1bc00000: 367, // he
- 0x1bc00097: 368, // he-IL
- 0x1be00000: 369, // hi
- 0x1be00099: 370, // hi-IN
- 0x1d100000: 371, // hr
- 0x1d100033: 372, // hr-BA
- 0x1d100090: 373, // hr-HR
- 0x1d200000: 374, // hsb
- 0x1d200060: 375, // hsb-DE
- 0x1d500000: 376, // hu
- 0x1d500092: 377, // hu-HU
- 0x1d700000: 378, // hy
- 0x1d700028: 379, // hy-AM
- 0x1e100000: 380, // id
- 0x1e100095: 381, // id-ID
- 0x1e700000: 382, // ig
- 0x1e7000d6: 383, // ig-NG
- 0x1ea00000: 384, // ii
- 0x1ea00053: 385, // ii-CN
- 0x1f500000: 386, // io
- 0x1f800000: 387, // is
- 0x1f80009d: 388, // is-IS
- 0x1f900000: 389, // it
- 0x1f90004e: 390, // it-CH
- 0x1f90009e: 391, // it-IT
- 0x1f900113: 392, // it-SM
- 0x1f900138: 393, // it-VA
- 0x1fa00000: 394, // iu
- 0x20000000: 395, // ja
- 0x200000a2: 396, // ja-JP
- 0x20300000: 397, // jbo
- 0x20700000: 398, // jgo
- 0x20700052: 399, // jgo-CM
- 0x20a00000: 400, // jmc
- 0x20a0012f: 401, // jmc-TZ
- 0x20e00000: 402, // jv
- 0x21000000: 403, // ka
- 0x2100007d: 404, // ka-GE
- 0x21200000: 405, // kab
- 0x21200067: 406, // kab-DZ
- 0x21600000: 407, // kaj
- 0x21700000: 408, // kam
- 0x217000a4: 409, // kam-KE
- 0x21f00000: 410, // kcg
- 0x22300000: 411, // kde
- 0x2230012f: 412, // kde-TZ
- 0x22700000: 413, // kea
- 0x2270005a: 414, // kea-CV
- 0x23400000: 415, // khq
- 0x234000c3: 416, // khq-ML
- 0x23900000: 417, // ki
- 0x239000a4: 418, // ki-KE
- 0x24200000: 419, // kk
- 0x242000ae: 420, // kk-KZ
- 0x24400000: 421, // kkj
- 0x24400052: 422, // kkj-CM
- 0x24500000: 423, // kl
- 0x24500082: 424, // kl-GL
- 0x24600000: 425, // kln
- 0x246000a4: 426, // kln-KE
- 0x24a00000: 427, // km
- 0x24a000a6: 428, // km-KH
- 0x25100000: 429, // kn
- 0x25100099: 430, // kn-IN
- 0x25400000: 431, // ko
- 0x254000aa: 432, // ko-KP
- 0x254000ab: 433, // ko-KR
- 0x25600000: 434, // kok
- 0x25600099: 435, // kok-IN
- 0x26a00000: 436, // ks
- 0x26a00099: 437, // ks-IN
- 0x26b00000: 438, // ksb
- 0x26b0012f: 439, // ksb-TZ
- 0x26d00000: 440, // ksf
- 0x26d00052: 441, // ksf-CM
- 0x26e00000: 442, // ksh
- 0x26e00060: 443, // ksh-DE
- 0x27400000: 444, // ku
- 0x28100000: 445, // kw
- 0x2810007b: 446, // kw-GB
- 0x28a00000: 447, // ky
- 0x28a000a5: 448, // ky-KG
- 0x29100000: 449, // lag
- 0x2910012f: 450, // lag-TZ
- 0x29500000: 451, // lb
- 0x295000b7: 452, // lb-LU
- 0x2a300000: 453, // lg
- 0x2a300131: 454, // lg-UG
- 0x2af00000: 455, // lkt
- 0x2af00135: 456, // lkt-US
- 0x2b500000: 457, // ln
- 0x2b50002a: 458, // ln-AO
- 0x2b50004b: 459, // ln-CD
- 0x2b50004c: 460, // ln-CF
- 0x2b50004d: 461, // ln-CG
- 0x2b800000: 462, // lo
- 0x2b8000af: 463, // lo-LA
- 0x2bf00000: 464, // lrc
- 0x2bf0009b: 465, // lrc-IQ
- 0x2bf0009c: 466, // lrc-IR
- 0x2c000000: 467, // lt
- 0x2c0000b6: 468, // lt-LT
- 0x2c200000: 469, // lu
- 0x2c20004b: 470, // lu-CD
- 0x2c400000: 471, // luo
- 0x2c4000a4: 472, // luo-KE
- 0x2c500000: 473, // luy
- 0x2c5000a4: 474, // luy-KE
- 0x2c700000: 475, // lv
- 0x2c7000b8: 476, // lv-LV
- 0x2d100000: 477, // mas
- 0x2d1000a4: 478, // mas-KE
- 0x2d10012f: 479, // mas-TZ
- 0x2e900000: 480, // mer
- 0x2e9000a4: 481, // mer-KE
- 0x2ed00000: 482, // mfe
- 0x2ed000cc: 483, // mfe-MU
- 0x2f100000: 484, // mg
- 0x2f1000bf: 485, // mg-MG
- 0x2f200000: 486, // mgh
- 0x2f2000d1: 487, // mgh-MZ
- 0x2f400000: 488, // mgo
- 0x2f400052: 489, // mgo-CM
- 0x2ff00000: 490, // mk
- 0x2ff000c2: 491, // mk-MK
- 0x30400000: 492, // ml
- 0x30400099: 493, // ml-IN
- 0x30b00000: 494, // mn
- 0x30b000c5: 495, // mn-MN
- 0x31b00000: 496, // mr
- 0x31b00099: 497, // mr-IN
- 0x31f00000: 498, // ms
- 0x31f0003e: 499, // ms-BN
- 0x31f000d0: 500, // ms-MY
- 0x31f0010d: 501, // ms-SG
- 0x32000000: 502, // mt
- 0x320000cb: 503, // mt-MT
- 0x32500000: 504, // mua
- 0x32500052: 505, // mua-CM
- 0x33100000: 506, // my
- 0x331000c4: 507, // my-MM
- 0x33a00000: 508, // mzn
- 0x33a0009c: 509, // mzn-IR
- 0x34100000: 510, // nah
- 0x34500000: 511, // naq
- 0x345000d2: 512, // naq-NA
- 0x34700000: 513, // nb
- 0x347000da: 514, // nb-NO
- 0x34700110: 515, // nb-SJ
- 0x34e00000: 516, // nd
- 0x34e00164: 517, // nd-ZW
- 0x35000000: 518, // nds
- 0x35000060: 519, // nds-DE
- 0x350000d9: 520, // nds-NL
- 0x35100000: 521, // ne
- 0x35100099: 522, // ne-IN
- 0x351000db: 523, // ne-NP
- 0x36700000: 524, // nl
- 0x36700030: 525, // nl-AW
- 0x36700036: 526, // nl-BE
- 0x36700040: 527, // nl-BQ
- 0x3670005b: 528, // nl-CW
- 0x367000d9: 529, // nl-NL
- 0x36700116: 530, // nl-SR
- 0x3670011b: 531, // nl-SX
- 0x36800000: 532, // nmg
- 0x36800052: 533, // nmg-CM
- 0x36a00000: 534, // nn
- 0x36a000da: 535, // nn-NO
- 0x36c00000: 536, // nnh
- 0x36c00052: 537, // nnh-CM
- 0x36f00000: 538, // no
- 0x37500000: 539, // nqo
- 0x37600000: 540, // nr
- 0x37a00000: 541, // nso
- 0x38000000: 542, // nus
- 0x38000117: 543, // nus-SS
- 0x38700000: 544, // ny
- 0x38900000: 545, // nyn
- 0x38900131: 546, // nyn-UG
- 0x39000000: 547, // om
- 0x3900006f: 548, // om-ET
- 0x390000a4: 549, // om-KE
- 0x39500000: 550, // or
- 0x39500099: 551, // or-IN
- 0x39800000: 552, // os
- 0x3980007d: 553, // os-GE
- 0x39800106: 554, // os-RU
- 0x39d00000: 555, // pa
- 0x39d05000: 556, // pa-Arab
- 0x39d050e8: 557, // pa-Arab-PK
- 0x39d33000: 558, // pa-Guru
- 0x39d33099: 559, // pa-Guru-IN
- 0x3a100000: 560, // pap
- 0x3b300000: 561, // pl
- 0x3b3000e9: 562, // pl-PL
- 0x3bd00000: 563, // prg
- 0x3bd00001: 564, // prg-001
- 0x3be00000: 565, // ps
- 0x3be00024: 566, // ps-AF
- 0x3c000000: 567, // pt
- 0x3c00002a: 568, // pt-AO
- 0x3c000041: 569, // pt-BR
- 0x3c00004e: 570, // pt-CH
- 0x3c00005a: 571, // pt-CV
- 0x3c000086: 572, // pt-GQ
- 0x3c00008b: 573, // pt-GW
- 0x3c0000b7: 574, // pt-LU
- 0x3c0000c6: 575, // pt-MO
- 0x3c0000d1: 576, // pt-MZ
- 0x3c0000ee: 577, // pt-PT
- 0x3c000118: 578, // pt-ST
- 0x3c000126: 579, // pt-TL
- 0x3c400000: 580, // qu
- 0x3c40003f: 581, // qu-BO
- 0x3c400069: 582, // qu-EC
- 0x3c4000e4: 583, // qu-PE
- 0x3d400000: 584, // rm
- 0x3d40004e: 585, // rm-CH
- 0x3d900000: 586, // rn
- 0x3d90003a: 587, // rn-BI
- 0x3dc00000: 588, // ro
- 0x3dc000bc: 589, // ro-MD
- 0x3dc00104: 590, // ro-RO
- 0x3de00000: 591, // rof
- 0x3de0012f: 592, // rof-TZ
- 0x3e200000: 593, // ru
- 0x3e200047: 594, // ru-BY
- 0x3e2000a5: 595, // ru-KG
- 0x3e2000ae: 596, // ru-KZ
- 0x3e2000bc: 597, // ru-MD
- 0x3e200106: 598, // ru-RU
- 0x3e200130: 599, // ru-UA
- 0x3e500000: 600, // rw
- 0x3e500107: 601, // rw-RW
- 0x3e600000: 602, // rwk
- 0x3e60012f: 603, // rwk-TZ
- 0x3eb00000: 604, // sah
- 0x3eb00106: 605, // sah-RU
- 0x3ec00000: 606, // saq
- 0x3ec000a4: 607, // saq-KE
- 0x3f300000: 608, // sbp
- 0x3f30012f: 609, // sbp-TZ
- 0x3fa00000: 610, // sd
- 0x3fa000e8: 611, // sd-PK
- 0x3fc00000: 612, // sdh
- 0x3fd00000: 613, // se
- 0x3fd00072: 614, // se-FI
- 0x3fd000da: 615, // se-NO
- 0x3fd0010c: 616, // se-SE
- 0x3ff00000: 617, // seh
- 0x3ff000d1: 618, // seh-MZ
- 0x40100000: 619, // ses
- 0x401000c3: 620, // ses-ML
- 0x40200000: 621, // sg
- 0x4020004c: 622, // sg-CF
- 0x40800000: 623, // shi
- 0x40857000: 624, // shi-Latn
- 0x408570ba: 625, // shi-Latn-MA
- 0x408dc000: 626, // shi-Tfng
- 0x408dc0ba: 627, // shi-Tfng-MA
- 0x40c00000: 628, // si
- 0x40c000b3: 629, // si-LK
- 0x41200000: 630, // sk
- 0x41200111: 631, // sk-SK
- 0x41600000: 632, // sl
- 0x4160010f: 633, // sl-SI
- 0x41c00000: 634, // sma
- 0x41d00000: 635, // smi
- 0x41e00000: 636, // smj
- 0x41f00000: 637, // smn
- 0x41f00072: 638, // smn-FI
- 0x42200000: 639, // sms
- 0x42300000: 640, // sn
- 0x42300164: 641, // sn-ZW
- 0x42900000: 642, // so
- 0x42900062: 643, // so-DJ
- 0x4290006f: 644, // so-ET
- 0x429000a4: 645, // so-KE
- 0x42900115: 646, // so-SO
- 0x43100000: 647, // sq
- 0x43100027: 648, // sq-AL
- 0x431000c2: 649, // sq-MK
- 0x4310014d: 650, // sq-XK
- 0x43200000: 651, // sr
- 0x4321f000: 652, // sr-Cyrl
- 0x4321f033: 653, // sr-Cyrl-BA
- 0x4321f0bd: 654, // sr-Cyrl-ME
- 0x4321f105: 655, // sr-Cyrl-RS
- 0x4321f14d: 656, // sr-Cyrl-XK
- 0x43257000: 657, // sr-Latn
- 0x43257033: 658, // sr-Latn-BA
- 0x432570bd: 659, // sr-Latn-ME
- 0x43257105: 660, // sr-Latn-RS
- 0x4325714d: 661, // sr-Latn-XK
- 0x43700000: 662, // ss
- 0x43a00000: 663, // ssy
- 0x43b00000: 664, // st
- 0x44400000: 665, // sv
- 0x44400031: 666, // sv-AX
- 0x44400072: 667, // sv-FI
- 0x4440010c: 668, // sv-SE
- 0x44500000: 669, // sw
- 0x4450004b: 670, // sw-CD
- 0x445000a4: 671, // sw-KE
- 0x4450012f: 672, // sw-TZ
- 0x44500131: 673, // sw-UG
- 0x44e00000: 674, // syr
- 0x45000000: 675, // ta
- 0x45000099: 676, // ta-IN
- 0x450000b3: 677, // ta-LK
- 0x450000d0: 678, // ta-MY
- 0x4500010d: 679, // ta-SG
- 0x46100000: 680, // te
- 0x46100099: 681, // te-IN
- 0x46400000: 682, // teo
- 0x464000a4: 683, // teo-KE
- 0x46400131: 684, // teo-UG
- 0x46700000: 685, // tg
- 0x46700124: 686, // tg-TJ
- 0x46b00000: 687, // th
- 0x46b00123: 688, // th-TH
- 0x46f00000: 689, // ti
- 0x46f0006d: 690, // ti-ER
- 0x46f0006f: 691, // ti-ET
- 0x47100000: 692, // tig
- 0x47600000: 693, // tk
- 0x47600127: 694, // tk-TM
- 0x48000000: 695, // tn
- 0x48200000: 696, // to
- 0x48200129: 697, // to-TO
- 0x48a00000: 698, // tr
- 0x48a0005d: 699, // tr-CY
- 0x48a0012b: 700, // tr-TR
- 0x48e00000: 701, // ts
- 0x49400000: 702, // tt
- 0x49400106: 703, // tt-RU
- 0x4a400000: 704, // twq
- 0x4a4000d4: 705, // twq-NE
- 0x4a900000: 706, // tzm
- 0x4a9000ba: 707, // tzm-MA
- 0x4ac00000: 708, // ug
- 0x4ac00053: 709, // ug-CN
- 0x4ae00000: 710, // uk
- 0x4ae00130: 711, // uk-UA
- 0x4b400000: 712, // ur
- 0x4b400099: 713, // ur-IN
- 0x4b4000e8: 714, // ur-PK
- 0x4bc00000: 715, // uz
- 0x4bc05000: 716, // uz-Arab
- 0x4bc05024: 717, // uz-Arab-AF
- 0x4bc1f000: 718, // uz-Cyrl
- 0x4bc1f137: 719, // uz-Cyrl-UZ
- 0x4bc57000: 720, // uz-Latn
- 0x4bc57137: 721, // uz-Latn-UZ
- 0x4be00000: 722, // vai
- 0x4be57000: 723, // vai-Latn
- 0x4be570b4: 724, // vai-Latn-LR
- 0x4bee3000: 725, // vai-Vaii
- 0x4bee30b4: 726, // vai-Vaii-LR
- 0x4c000000: 727, // ve
- 0x4c300000: 728, // vi
- 0x4c30013e: 729, // vi-VN
- 0x4c900000: 730, // vo
- 0x4c900001: 731, // vo-001
- 0x4cc00000: 732, // vun
- 0x4cc0012f: 733, // vun-TZ
- 0x4ce00000: 734, // wa
- 0x4cf00000: 735, // wae
- 0x4cf0004e: 736, // wae-CH
- 0x4e500000: 737, // wo
- 0x4e500114: 738, // wo-SN
- 0x4f200000: 739, // xh
- 0x4fb00000: 740, // xog
- 0x4fb00131: 741, // xog-UG
- 0x50900000: 742, // yav
- 0x50900052: 743, // yav-CM
- 0x51200000: 744, // yi
- 0x51200001: 745, // yi-001
- 0x51800000: 746, // yo
- 0x5180003b: 747, // yo-BJ
- 0x518000d6: 748, // yo-NG
- 0x51f00000: 749, // yue
- 0x51f38000: 750, // yue-Hans
- 0x51f38053: 751, // yue-Hans-CN
- 0x51f39000: 752, // yue-Hant
- 0x51f3908d: 753, // yue-Hant-HK
- 0x52800000: 754, // zgh
- 0x528000ba: 755, // zgh-MA
- 0x52900000: 756, // zh
- 0x52938000: 757, // zh-Hans
- 0x52938053: 758, // zh-Hans-CN
- 0x5293808d: 759, // zh-Hans-HK
- 0x529380c6: 760, // zh-Hans-MO
- 0x5293810d: 761, // zh-Hans-SG
- 0x52939000: 762, // zh-Hant
- 0x5293908d: 763, // zh-Hant-HK
- 0x529390c6: 764, // zh-Hant-MO
- 0x5293912e: 765, // zh-Hant-TW
- 0x52f00000: 766, // zu
- 0x52f00161: 767, // zu-ZA
-}
-
-// Total table size 4676 bytes (4KiB); checksum: 17BE3673
diff --git a/language/internal/language.go b/language/internal/language.go
index f81f976..40f0170 100644
--- a/language/internal/language.go
+++ b/language/internal/language.go
@@ -3,7 +3,6 @@
// license that can be found in the LICENSE file.
//go:generate go run gen.go gen_common.go -output tables.go
-//go:generate go run gen_index.go
package language
@@ -55,20 +54,15 @@
// Make is a convenience wrapper for Parse that omits the error.
// In case of an error, a sensible default is returned.
func Make(s string) Tag {
- return Default.Make(s)
-}
-
-// Make is a convenience wrapper for c.Parse that omits the error.
-// In case of an error, a sensible default is returned.
-func (c CanonType) Make(s string) Tag {
- t, _ := c.Parse(s)
+ t, _ := Parse(s)
return t
}
// Raw returns the raw base language, script and region, without making an
// attempt to infer their values.
-func (t Tag) Raw() (b Base, s Script, r Region) {
- return Base{t.lang}, Script{t.script}, Region{t.region}
+// TODO: consider removing
+func (t Tag) Raw() (b langID, s scriptID, r regionID) {
+ return t.lang, t.script, t.region
}
// equalTags compares language, script and region subtags only.
@@ -89,156 +83,6 @@
return t.str != "" && t.pVariant == 0
}
-// CanonType can be used to enable or disable various types of canonicalization.
-type CanonType int
-
-const (
- // Replace deprecated base languages with their preferred replacements.
- DeprecatedBase CanonType = 1 << iota
- // Replace deprecated scripts with their preferred replacements.
- DeprecatedScript
- // Replace deprecated regions with their preferred replacements.
- DeprecatedRegion
- // Remove redundant scripts.
- SuppressScript
- // Normalize legacy encodings. This includes legacy languages defined in
- // CLDR as well as bibliographic codes defined in ISO-639.
- Legacy
- // Map the dominant language of a macro language group to the macro language
- // subtag. For example cmn -> zh.
- Macro
- // The CLDR flag should be used if full compatibility with CLDR is required.
- // There are a few cases where language.Tag may differ from CLDR. To follow all
- // of CLDR's suggestions, use All|CLDR.
- CLDR
-
- // Raw can be used to Compose or Parse without Canonicalization.
- Raw CanonType = 0
-
- // Replace all deprecated tags with their preferred replacements.
- Deprecated = DeprecatedBase | DeprecatedScript | DeprecatedRegion
-
- // All canonicalizations recommended by BCP 47.
- BCP47 = Deprecated | SuppressScript
-
- // All canonicalizations.
- All = BCP47 | Legacy | Macro
-
- // Default is the canonicalization used by Parse, Make and Compose. To
- // preserve as much information as possible, canonicalizations that remove
- // potentially valuable information are not included. The Matcher is
- // designed to recognize similar tags that would be the same if
- // they were canonicalized using All.
- Default = Deprecated | Legacy
-
- canonLang = DeprecatedBase | Legacy | Macro
-
- // TODO: LikelyScript, LikelyRegion: suppress similar to ICU.
-)
-
-// canonicalize returns the canonicalized equivalent of the tag and
-// whether there was any change.
-func (t Tag) canonicalize(c CanonType) (Tag, bool) {
- if c == Raw {
- return t, false
- }
- changed := false
- if c&SuppressScript != 0 {
- if t.lang < langNoIndexOffset && uint8(t.script) == suppressScript[t.lang] {
- t.script = 0
- changed = true
- }
- }
- if c&canonLang != 0 {
- for {
- if l, aliasType := normLang(t.lang); l != t.lang {
- switch aliasType {
- case langLegacy:
- if c&Legacy != 0 {
- if t.lang == _sh && t.script == 0 {
- t.script = _Latn
- }
- t.lang = l
- changed = true
- }
- case langMacro:
- if c&Macro != 0 {
- // We deviate here from CLDR. The mapping "nb" -> "no"
- // qualifies as a typical Macro language mapping. However,
- // for legacy reasons, CLDR maps "no", the macro language
- // code for Norwegian, to the dominant variant "nb". This
- // change is currently under consideration for CLDR as well.
- // See http://unicode.org/cldr/trac/ticket/2698 and also
- // http://unicode.org/cldr/trac/ticket/1790 for some of the
- // practical implications. TODO: this check could be removed
- // if CLDR adopts this change.
- if c&CLDR == 0 || t.lang != _nb {
- changed = true
- t.lang = l
- }
- }
- case langDeprecated:
- if c&DeprecatedBase != 0 {
- if t.lang == _mo && t.region == 0 {
- t.region = _MD
- }
- t.lang = l
- changed = true
- // Other canonicalization types may still apply.
- continue
- }
- }
- } else if c&Legacy != 0 && t.lang == _no && c&CLDR != 0 {
- t.lang = _nb
- changed = true
- }
- break
- }
- }
- if c&DeprecatedScript != 0 {
- if t.script == _Qaai {
- changed = true
- t.script = _Zinh
- }
- }
- if c&DeprecatedRegion != 0 {
- if r := normRegion(t.region); r != 0 {
- changed = true
- t.region = r
- }
- }
- return t, changed
-}
-
-// Canonicalize returns the canonicalized equivalent of the tag.
-func (c CanonType) Canonicalize(t Tag) (Tag, error) {
- t, changed := t.canonicalize(c)
- if changed {
- t.remakeString()
- }
- return t, nil
-}
-
-// Confidence indicates the level of certainty for a given return value.
-// For example, Serbian may be written in Cyrillic or Latin script.
-// The confidence level indicates whether a value was explicitly specified,
-// whether it is typically the only possible value, or whether there is
-// an ambiguity.
-type Confidence int
-
-const (
- No Confidence = iota // full confidence that there was no match
- Low // most likely value picked out of a set of alternatives
- High // value is generally assumed to be the correct match
- Exact // exact match or explicitly specified value
-)
-
-var confName = []string{"No", "Low", "High", "Exact"}
-
-func (c Confidence) String() string {
- return confName[c]
-}
-
// remakeString is used to update t.str in case lang, script or region changed.
// It is assumed that pExt and pVariant still point to the start of the
// respective parts.
@@ -314,87 +158,11 @@
// UnmarshalText implements encoding.TextUnmarshaler.
func (t *Tag) UnmarshalText(text []byte) error {
- tag, err := Raw.Parse(string(text))
+ tag, err := Parse(string(text))
*t = tag
return err
}
-// Base returns the base language of the language tag. If the base language is
-// unspecified, an attempt will be made to infer it from the context.
-// It uses a variant of CLDR's Add Likely Subtags algorithm. This is subject to change.
-func (t Tag) Base() (Base, Confidence) {
- if t.lang != 0 {
- return Base{t.lang}, Exact
- }
- c := High
- if t.script == 0 && !(Region{t.region}).IsCountry() {
- c = Low
- }
- if tag, err := addTags(t); err == nil && tag.lang != 0 {
- return Base{tag.lang}, c
- }
- return Base{0}, No
-}
-
-// Script infers the script for the language tag. If it was not explicitly given, it will infer
-// a most likely candidate.
-// If more than one script is commonly used for a language, the most likely one
-// is returned with a low confidence indication. For example, it returns (Cyrl, Low)
-// for Serbian.
-// If a script cannot be inferred (Zzzz, No) is returned. We do not use Zyyy (undetermined)
-// as one would suspect from the IANA registry for BCP 47. In a Unicode context Zyyy marks
-// common characters (like 1, 2, 3, '.', etc.) and is therefore more like multiple scripts.
-// See http://www.unicode.org/reports/tr24/#Values for more details. Zzzz is also used for
-// unknown value in CLDR. (Zzzz, Exact) is returned if Zzzz was explicitly specified.
-// Note that an inferred script is never guaranteed to be the correct one. Latin is
-// almost exclusively used for Afrikaans, but Arabic has been used for some texts
-// in the past. Also, the script that is commonly used may change over time.
-// It uses a variant of CLDR's Add Likely Subtags algorithm. This is subject to change.
-func (t Tag) Script() (Script, Confidence) {
- if t.script != 0 {
- return Script{t.script}, Exact
- }
- sc, c := scriptID(_Zzzz), No
- if t.lang < langNoIndexOffset {
- if scr := scriptID(suppressScript[t.lang]); scr != 0 {
- // Note: it is not always the case that a language with a suppress
- // script value is only written in one script (e.g. kk, ms, pa).
- if t.region == 0 {
- return Script{scriptID(scr)}, High
- }
- sc, c = scr, High
- }
- }
- if tag, err := addTags(t); err == nil {
- if tag.script != sc {
- sc, c = tag.script, Low
- }
- } else {
- t, _ = (Deprecated | Macro).Canonicalize(t)
- if tag, err := addTags(t); err == nil && tag.script != sc {
- sc, c = tag.script, Low
- }
- }
- return Script{sc}, c
-}
-
-// Region returns the region for the language tag. If it was not explicitly given, it will
-// infer a most likely candidate from the context.
-// It uses a variant of CLDR's Add Likely Subtags algorithm. This is subject to change.
-func (t Tag) Region() (Region, Confidence) {
- if t.region != 0 {
- return Region{t.region}, Exact
- }
- if t, err := addTags(t); err == nil {
- return Region{t.region}, Low // TODO: differentiate between high and low.
- }
- t, _ = (Deprecated | Macro).Canonicalize(t)
- if tag, err := addTags(t); err == nil {
- return Region{tag.region}, Low
- }
- return Region{_ZZ}, No // TODO: return world instead of undetermined?
-}
-
// Variant returns the variants specified explicitly for this language tag.
// or nil if no variant was specified.
func (t Tag) Variants() []Variant {
@@ -414,7 +182,8 @@
func (t Tag) Parent() Tag {
if t.str != "" {
// Strip the variants and extensions.
- t, _ = Raw.Compose(t.Raw())
+ b, s, r := t.Raw()
+ t = Tag{lang: b, script: s, region: r}
if t.region == 0 && t.script != 0 && t.lang != 0 {
base, _ := addTags(Tag{lang: t.lang})
if base.script == t.script {
@@ -702,129 +471,49 @@
}
}
-// CompactIndex returns an index, where 0 <= index < NumCompactTags, for tags
-// for which data exists in the text repository. The index will change over time
-// and should not be stored in persistent storage. If t does not match a compact
-// index, exact will be false and the compact index will be returned for the
-// first match after repeatedly taking the Parent of t.
-func CompactIndex(t Tag) (index int, exact bool) {
- // TODO: perhaps give more frequent tags a lower index.
- // TODO: we could make the indexes stable. This will excluded some
- // possibilities for optimization, so don't do this quite yet.
- exact = true
-
- b, s, r := t.Raw()
- switch {
- case len(t.str) > 0:
- if strings.HasPrefix(t.str, "x-") {
- // We have no entries for user-defined tags.
- return 0, false
- }
- if uint16(t.pVariant) != t.pExt {
- if int(t.pExt) < len(t.str) {
- exact = false
- t, _ = Raw.Compose(b, s, r, t.Variants())
- }
- } else if _, ok := t.Extension('u'); ok {
- // TODO: va may mean something else. Consider not considering it.
- // Strip all but the 'va' entry.
- old := t
- variant := t.TypeForKey("va")
- t, _ = Raw.Compose(b, s, r)
- if variant != "" {
- t, _ = t.SetTypeForKey("va", variant)
- }
- exact = old == t
- }
- if len(t.str) > 0 {
- // We have some variants.
- for i, s := range specialTags {
- if s == t {
- return i + 1, exact
- }
- }
- exact = false
- }
- }
- for ; t != Und; t = t.Parent() {
- // No variants specified: just compare core components.
- // The key has the form lllssrrr, where l, s, and r are nibbles for
- // respectively the langID, scriptID, and regionID.
- key := uint32(b.langID) << (8 + 12)
- key |= uint32(s.scriptID) << 12
- key |= uint32(r.regionID)
- if x, ok := coreTags[key]; ok {
- return int(x), exact
- }
- exact = false
- }
- return int(0), exact
-}
-
-// Base is an ISO 639 language code, used for encoding the base language
-// of a language tag.
-type Base struct {
- langID
-}
-
// ParseBase parses a 2- or 3-letter ISO 639 code.
// It returns a ValueError if s is a well-formed but unknown language identifier
// or another error if another error occurred.
-func ParseBase(s string) (Base, error) {
+func ParseBase(s string) (langID, error) {
if n := len(s); n < 2 || 3 < n {
- return Base{}, errSyntax
+ return 0, errSyntax
}
var buf [3]byte
- l, err := getLangID(buf[:copy(buf[:], s)])
- return Base{l}, err
-}
-
-// Script is a 4-letter ISO 15924 code for representing scripts.
-// It is idiomatically represented in title case.
-type Script struct {
- scriptID
+ return getLangID(buf[:copy(buf[:], s)])
}
// ParseScript parses a 4-letter ISO 15924 code.
// It returns a ValueError if s is a well-formed but unknown script identifier
// or another error if another error occurred.
-func ParseScript(s string) (Script, error) {
+func ParseScript(s string) (scriptID, error) {
if len(s) != 4 {
- return Script{}, errSyntax
+ return 0, errSyntax
}
var buf [4]byte
- sc, err := getScriptID(script, buf[:copy(buf[:], s)])
- return Script{sc}, err
-}
-
-// Region is an ISO 3166-1 or UN M.49 code for representing countries and regions.
-type Region struct {
- regionID
+ return getScriptID(script, buf[:copy(buf[:], s)])
}
// EncodeM49 returns the Region for the given UN M.49 code.
// It returns an error if r is not a valid code.
-func EncodeM49(r int) (Region, error) {
- rid, err := getRegionM49(r)
- return Region{rid}, err
+func EncodeM49(r int) (regionID, error) {
+ return getRegionM49(r)
}
// ParseRegion parses a 2- or 3-letter ISO 3166-1 or a UN M.49 code.
// It returns a ValueError if s is a well-formed but unknown region identifier
// or another error if another error occurred.
-func ParseRegion(s string) (Region, error) {
+func ParseRegion(s string) (regionID, error) {
if n := len(s); n < 2 || 3 < n {
- return Region{}, errSyntax
+ return 0, errSyntax
}
var buf [3]byte
- r, err := getRegionID(buf[:copy(buf[:], s)])
- return Region{r}, err
+ return getRegionID(buf[:copy(buf[:], s)])
}
// IsCountry returns whether this region is a country or autonomous area. This
// includes non-standard definitions from CLDR.
-func (r Region) IsCountry() bool {
- if r.regionID == 0 || r.IsGroup() || r.IsPrivateUse() && r.regionID != _XK {
+func (r regionID) IsCountry() bool {
+ if r == 0 || r.IsGroup() || r.IsPrivateUse() && r != _XK {
return false
}
return true
@@ -832,20 +521,16 @@
// IsGroup returns whether this region defines a collection of regions. This
// includes non-standard definitions from CLDR.
-func (r Region) IsGroup() bool {
- if r.regionID == 0 {
+func (r regionID) IsGroup() bool {
+ if r == 0 {
return false
}
- return int(regionInclusion[r.regionID]) < len(regionContainment)
+ return int(regionInclusion[r]) < len(regionContainment)
}
// Contains returns whether Region c is contained by Region r. It returns true
// if c == r.
-func (r Region) Contains(c Region) bool {
- return r.regionID.contains(c.regionID)
-}
-
-func (r regionID) contains(c regionID) bool {
+func (r regionID) Contains(c regionID) bool {
if r == c {
return true
}
@@ -876,14 +561,14 @@
// canonical form with a ccTLD. To get that ccTLD canonicalize r first. The
// region will already be canonicalized it was obtained from a Tag that was
// obtained using any of the default methods.
-func (r Region) TLD() (Region, error) {
+func (r regionID) TLD() (regionID, error) {
// See http://en.wikipedia.org/wiki/Country_code_top-level_domain for the
// difference between ISO 3166-1 and IANA ccTLD.
- if r.regionID == _GB {
- r = Region{_UK}
+ if r == _GB {
+ r = _UK
}
if (r.typ() & ccTLD) == 0 {
- return Region{}, errNoTLD
+ return 0, errNoTLD
}
return r, nil
}
@@ -891,9 +576,9 @@
// Canonicalize returns the region or a possible replacement if the region is
// deprecated. It will not return a replacement for deprecated regions that
// are split into multiple regions.
-func (r Region) Canonicalize() Region {
- if cr := normRegion(r.regionID); cr != 0 {
- return Region{cr}
+func (r regionID) Canonicalize() regionID {
+ if cr := normRegion(r); cr != 0 {
+ return cr
}
return r
}
diff --git a/language/internal/language_test.go b/language/internal/language_test.go
index 14bc8a7..e2cd965 100644
--- a/language/internal/language_test.go
+++ b/language/internal/language_test.go
@@ -41,10 +41,6 @@
if tag != t1 {
t.Errorf("%d:%s: equality test 1 failed\n got: %#v\nwant: %#v)", i, s, t1, tag)
}
- t2, _ := Compose(tag)
- if tag != t2 {
- t.Errorf("%d:%s: equality test 2 failed\n got: %#v\nwant: %#v", i, s, t2, tag)
- }
}
}
@@ -77,35 +73,6 @@
}
}
-func TestCompactIndex(t *testing.T) {
- tests := []struct {
- tag string
- index int
- ok bool
- }{
- // TODO: these values will change with each CLDR update. This issue
- // will be solved if we decide to fix the indexes.
- {"und", 0, true},
- {"ca-ES-valencia", 1, true},
- {"ca-ES-valencia-u-va-posix", 1, false},
- {"ca-ES-valencia-u-co-phonebk", 1, false},
- {"ca-ES-valencia-u-co-phonebk-va-posix", 1, false},
- {"x-klingon", 0, false},
- {"en-US", 232, true},
- {"en-US-u-va-posix", 2, true},
- {"en", 136, true},
- {"en-u-co-phonebk", 136, false},
- {"en-001", 137, true},
- {"sh", 0, false}, // We don't normalize.
- }
- for _, tt := range tests {
- x, ok := CompactIndex(Raw.MustParse(tt.tag))
- if x != tt.index || ok != tt.ok {
- t.Errorf("%s: got %d, %v; want %d %v", tt.tag, x, ok, tt.index, tt.ok)
- }
- }
-}
-
func TestMarshal(t *testing.T) {
testCases := []string{
// TODO: these values will change with each CLDR update. This issue
@@ -139,33 +106,6 @@
}
}
-func TestBase(t *testing.T) {
- tests := []struct {
- loc, lang string
- conf Confidence
- }{
- {"und", "en", Low},
- {"x-abc", "und", No},
- {"en", "en", Exact},
- {"und-Cyrl", "ru", High},
- // If a region is not included, the official language should be English.
- {"und-US", "en", High},
- // TODO: not-explicitly listed scripts should probably be und, No
- // Modify addTags to return info on how the match was derived.
- // {"und-Aghb", "und", No},
- }
- for i, tt := range tests {
- loc, _ := Parse(tt.loc)
- lang, conf := loc.Base()
- if lang.String() != tt.lang {
- t.Errorf("%d: language was %s; want %s", i, lang, tt.lang)
- }
- if conf != tt.conf {
- t.Errorf("%d: confidence was %d; want %d", i, conf, tt.conf)
- }
- }
-}
-
func TestParseBase(t *testing.T) {
tests := []struct {
in string
@@ -187,42 +127,12 @@
if x.String() != tt.out || err == nil != tt.ok {
t.Errorf("%d:%s: was %s, %v; want %s, %v", i, tt.in, x, err == nil, tt.out, tt.ok)
}
- if y, _, _ := Raw.Make(tt.out).Raw(); x != y {
+ if y, _, _ := Make(tt.out).Raw(); x != y {
t.Errorf("%d:%s: tag was %s; want %s", i, tt.in, x, y)
}
}
}
-func TestScript(t *testing.T) {
- tests := []struct {
- loc, scr string
- conf Confidence
- }{
- {"und", "Latn", Low},
- {"en-Latn", "Latn", Exact},
- {"en", "Latn", High},
- {"sr", "Cyrl", Low},
- {"kk", "Cyrl", High},
- {"kk-CN", "Arab", Low},
- {"cmn", "Hans", Low},
- {"ru", "Cyrl", High},
- {"ru-RU", "Cyrl", High},
- {"yue", "Hant", Low},
- {"x-abc", "Zzzz", Low},
- {"und-zyyy", "Zyyy", Exact},
- }
- for i, tt := range tests {
- loc, _ := Parse(tt.loc)
- sc, conf := loc.Script()
- if sc.String() != tt.scr {
- t.Errorf("%d:%s: script was %s; want %s", i, tt.loc, sc, tt.scr)
- }
- if conf != tt.conf {
- t.Errorf("%d:%s: confidence was %d; want %d", i, tt.loc, conf, tt.conf)
- }
- }
-}
-
func TestParseScript(t *testing.T) {
tests := []struct {
in string
@@ -243,39 +153,13 @@
t.Errorf("%d:%s: was %s, %v; want %s, %v", i, tt.in, x, err == nil, tt.out, tt.ok)
}
if err == nil {
- if _, y, _ := Raw.Make("und-" + tt.out).Raw(); x != y {
+ if _, y, _ := Make("und-" + tt.out).Raw(); x != y {
t.Errorf("%d:%s: tag was %s; want %s", i, tt.in, x, y)
}
}
}
}
-func TestRegion(t *testing.T) {
- tests := []struct {
- loc, reg string
- conf Confidence
- }{
- {"und", "US", Low},
- {"en", "US", Low},
- {"zh-Hant", "TW", Low},
- {"en-US", "US", Exact},
- {"cmn", "CN", Low},
- {"ru", "RU", Low},
- {"yue", "HK", Low},
- {"x-abc", "ZZ", Low},
- }
- for i, tt := range tests {
- loc, _ := Raw.Parse(tt.loc)
- reg, conf := loc.Region()
- if reg.String() != tt.reg {
- t.Errorf("%d:%s: region was %s; want %s", i, tt.loc, reg, tt.reg)
- }
- if conf != tt.conf {
- t.Errorf("%d:%s: confidence was %d; want %d", i, tt.loc, conf, tt.conf)
- }
- }
-}
-
func TestEncodeM49(t *testing.T) {
tests := []struct {
m49 int
@@ -320,7 +204,7 @@
t.Errorf("%d:%s: was %s, %v; want %s, %v", i, tt.in, r, err == nil, tt.out, tt.ok)
}
if err == nil {
- if _, _, y := Raw.Make("und-" + tt.out).Raw(); r != y {
+ if _, _, y := Make("und-" + tt.out).Raw(); r != y {
t.Errorf("%d:%s: tag was %s; want %s", i, tt.in, r, y)
}
}
@@ -347,7 +231,7 @@
}
for i, tt := range tests {
reg, _ := getRegionID([]byte(tt.reg))
- r := Region{reg}
+ r := reg
if r.IsCountry() != tt.country {
t.Errorf("%d: IsCountry(%s) was %v; want %v", i, tt.reg, r.IsCountry(), tt.country)
}
@@ -374,7 +258,7 @@
}
for i, tt := range tests {
reg, _ := getRegionID([]byte(tt.reg))
- r := Region{reg}
+ r := reg
if r.IsGroup() != tt.group {
t.Errorf("%d: IsGroup(%s) was %v; want %v", i, tt.reg, r.IsGroup(), tt.group)
}
@@ -409,8 +293,8 @@
for i, tt := range tests {
enc, _ := getRegionID([]byte(tt.enclosing))
con, _ := getRegionID([]byte(tt.contained))
- r := Region{enc}
- if got := r.Contains(Region{con}); got != tt.contains {
+ r := enc
+ if got := r.Contains(con); got != tt.contains {
t.Errorf("%d: %s.Contains(%s) was %v; want %v", i, tt.enclosing, tt.contained, got, tt.contains)
}
}
@@ -509,7 +393,7 @@
}
r := MustParseRegion(tt.in)
- var want Region
+ var want regionID
if tt.out != "ZZ" {
want = MustParseRegion(tt.out)
}
@@ -523,63 +407,6 @@
}
}
-func TestCanonicalize(t *testing.T) {
- // TODO: do a full test using CLDR data in a separate regression test.
- tests := []struct {
- in, out string
- option CanonType
- }{
- {"en-Latn", "en", SuppressScript},
- {"sr-Cyrl", "sr-Cyrl", SuppressScript},
- {"sh", "sr-Latn", Legacy},
- {"sh-HR", "sr-Latn-HR", Legacy},
- {"sh-Cyrl-HR", "sr-Cyrl-HR", Legacy},
- {"tl", "fil", Legacy},
- {"no", "no", Legacy},
- {"no", "nb", Legacy | CLDR},
- {"cmn", "cmn", Legacy},
- {"cmn", "zh", Macro},
- {"cmn-u-co-stroke", "zh-u-co-stroke", Macro},
- {"yue", "yue", Macro},
- {"nb", "no", Macro},
- {"nb", "nb", Macro | CLDR},
- {"no", "no", Macro},
- {"no", "no", Macro | CLDR},
- {"iw", "he", DeprecatedBase},
- {"iw", "he", Deprecated | CLDR},
- {"mo", "ro-MD", Deprecated}, // Adopted by CLDR as of version 25.
- {"alb", "sq", Legacy}, // bibliographic
- {"dut", "nl", Legacy}, // bibliographic
- // As of CLDR 25, mo is no longer considered a legacy mapping.
- {"mo", "mo", Legacy | CLDR},
- {"und-AN", "und-AN", Deprecated},
- {"und-YD", "und-YE", DeprecatedRegion},
- {"und-YD", "und-YD", DeprecatedBase},
- {"und-Qaai", "und-Zinh", DeprecatedScript},
- {"und-Qaai", "und-Qaai", DeprecatedBase},
- {"drh", "mn", All}, // drh -> khk -> mn
- }
- for i, tt := range tests {
- in, _ := Raw.Parse(tt.in)
- in, _ = tt.option.Canonicalize(in)
- if in.String() != tt.out {
- t.Errorf("%d:%s: was %s; want %s", i, tt.in, in.String(), tt.out)
- }
- if int(in.pVariant) > int(in.pExt) || int(in.pExt) > len(in.str) {
- t.Errorf("%d:%s:offsets %d <= %d <= %d must be true", i, tt.in, in.pVariant, in.pExt, len(in.str))
- }
- }
- // Test idempotence.
- for _, base := range Supported.BaseLanguages() {
- tag, _ := Raw.Compose(base)
- got, _ := All.Canonicalize(tag)
- want, _ := All.Canonicalize(got)
- if got != want {
- t.Errorf("idem(%s): got %s; want %s", tag, got, want)
- }
- }
-}
-
func TestTypeForKey(t *testing.T) {
tests := []struct{ key, in, out string }{
{"co", "en", ""},
@@ -802,8 +629,8 @@
{"pt-TL", "pt-PT"},
}
for _, tt := range tests {
- tag := Raw.MustParse(tt.in)
- if p := Raw.MustParse(tt.out); p != tag.Parent() {
+ tag := MustParse(tt.in)
+ if p := MustParse(tt.out); p != tag.Parent() {
t.Errorf("%s: was %v; want %v", tt.in, tag.Parent(), p)
}
}
diff --git a/language/internal/lookup.go b/language/internal/lookup.go
index 1d80ac3..6c55f8d 100644
--- a/language/internal/lookup.go
+++ b/language/internal/lookup.go
@@ -177,6 +177,15 @@
return langPrivateStart <= b && b <= langPrivateEnd
}
+// SuppressScript returns the script marked as SuppressScript in the IANA
+// language tag repository, or 0 if there is no such script.
+func (b langID) SuppressScript() scriptID {
+ if b < langNoIndexOffset {
+ return scriptID(suppressScript[b])
+ }
+ return 0
+}
+
type regionID uint16
// getRegionID returns the region id for s if s is a valid 2-letter region code
diff --git a/language/internal/lookup_test.go b/language/internal/lookup_test.go
index 9833830..6d7cf73 100644
--- a/language/internal/lookup_test.go
+++ b/language/internal/lookup_test.go
@@ -121,8 +121,8 @@
{"en_us_posix", "en-US-u-va-posix"},
{"en-us-posix", "en-US-u-va-posix"},
} {
- got := Raw.Make(tt.in)
- want := Raw.MustParse(tt.out)
+ got := Make(tt.in)
+ want := MustParse(tt.out)
if got != want {
t.Errorf("%s: got %q; want %q", tt.in, got, want)
}
diff --git a/language/internal/match.go b/language/internal/match.go
index 15b74d1..5cbd63f 100644
--- a/language/internal/match.go
+++ b/language/internal/match.go
@@ -6,112 +6,6 @@
import "errors"
-// A MatchOption configures a Matcher.
-type MatchOption func(*matcher)
-
-// PreferSameScript will, in the absence of a match, result in the first
-// preferred tag with the same script as a supported tag to match this supported
-// tag. The default is currently true, but this may change in the future.
-func PreferSameScript(preferSame bool) MatchOption {
- return func(m *matcher) { m.preferSameScript = preferSame }
-}
-
-// TODO(v1.0.0): consider making Matcher a concrete type, instead of interface.
-// There doesn't seem to be too much need for multiple types.
-// Making it a concrete type allows MatchStrings to be a method, which will
-// improve its discoverability.
-
-// MatchStrings parses and matches the given strings until one of them matches
-// the language in the Matcher. A string may be an Accept-Language header as
-// handled by ParseAcceptLanguage. The default language is returned if no
-// other language matched.
-func MatchStrings(m Matcher, lang ...string) (tag Tag, index int) {
- for _, accept := range lang {
- desired, _, err := ParseAcceptLanguage(accept)
- if err != nil {
- continue
- }
- if tag, index, conf := m.Match(desired...); conf != No {
- return tag, index
- }
- }
- tag, index, _ = m.Match()
- return
-}
-
-// Matcher is the interface that wraps the Match method.
-//
-// Match returns the best match for any of the given tags, along with
-// a unique index associated with the returned tag and a confidence
-// score.
-type Matcher interface {
- Match(t ...Tag) (tag Tag, index int, c Confidence)
-}
-
-// Comprehends reports the confidence score for a speaker of a given language
-// to being able to comprehend the written form of an alternative language.
-func Comprehends(speaker, alternative Tag) Confidence {
- _, _, c := NewMatcher([]Tag{alternative}).Match(speaker)
- return c
-}
-
-// NewMatcher returns a Matcher that matches an ordered list of preferred tags
-// against a list of supported tags based on written intelligibility, closeness
-// of dialect, equivalence of subtags and various other rules. It is initialized
-// with the list of supported tags. The first element is used as the default
-// value in case no match is found.
-//
-// Its Match method matches the first of the given Tags to reach a certain
-// confidence threshold. The tags passed to Match should therefore be specified
-// in order of preference. Extensions are ignored for matching.
-//
-// The index returned by the Match method corresponds to the index of the
-// matched tag in t, but is augmented with the Unicode extension ('u')of the
-// corresponding preferred tag. This allows user locale options to be passed
-// transparently.
-func NewMatcher(t []Tag, options ...MatchOption) Matcher {
- return newMatcher(t, options)
-}
-
-func (m *matcher) Match(want ...Tag) (t Tag, index int, c Confidence) {
- match, w, c := m.getBest(want...)
- if match != nil {
- t, index = match.tag, match.index
- } else {
- // TODO: this should be an option
- t = m.default_.tag
- if m.preferSameScript {
- outer:
- for _, w := range want {
- script, _ := w.Script()
- if script.scriptID == 0 {
- // Don't do anything if there is no script, such as with
- // private subtags.
- continue
- }
- for i, h := range m.supported {
- if script.scriptID == h.maxScript {
- t, index = h.tag, i
- break outer
- }
- }
- }
- }
- // TODO: select first language tag based on script.
- }
- if w.region != 0 && t.region != 0 && t.region.contains(w.region) {
- t, _ = Raw.Compose(t, Region{w.region})
- }
- // Copy options from the user-provided tag into the result tag. This is hard
- // to do after the fact, so we do it here.
- // TODO: add in alternative variants to -u-va-.
- // TODO: add preferred region to -u-rg-.
- if e := w.Extensions(); len(e) > 0 {
- t, _ = Raw.Compose(t, e)
- }
- return t, index, c
-}
-
type scriptRegionFlags uint8
const (
@@ -133,7 +27,7 @@
}
func (t *Tag) setUndefinedRegion(id regionID) {
- if t.region == 0 || t.region.contains(id) {
+ if t.region == 0 || t.region.Contains(id) {
t.region = id
}
}
@@ -216,7 +110,7 @@
// defined, including the ones where the region was not
// defined. This allows for proper disambiguation within
// regions.
- if x.flags&scriptInFrom == 0 && t.region.contains(regionID(x.region)) {
+ if x.flags&scriptInFrom == 0 && t.region.Contains(regionID(x.region)) {
tt.region = regionID(x.region)
tt.setUndefinedScript(scriptID(x.script))
goodScript = goodScript && tt.script == scriptID(x.script)
@@ -326,556 +220,6 @@
return t, nil
}
-// Tag Matching
-// CLDR defines an algorithm for finding the best match between two sets of language
-// tags. The basic algorithm defines how to score a possible match and then find
-// the match with the best score
-// (see http://www.unicode.org/reports/tr35/#LanguageMatching).
-// Using scoring has several disadvantages. The scoring obfuscates the importance of
-// the various factors considered, making the algorithm harder to understand. Using
-// scoring also requires the full score to be computed for each pair of tags.
-//
-// We will use a different algorithm which aims to have the following properties:
-// - clarity on the precedence of the various selection factors, and
-// - improved performance by allowing early termination of a comparison.
-//
-// Matching algorithm (overview)
-// Input:
-// - supported: a set of supported tags
-// - default: the default tag to return in case there is no match
-// - desired: list of desired tags, ordered by preference, starting with
-// the most-preferred.
-//
-// Algorithm:
-// 1) Set the best match to the lowest confidence level
-// 2) For each tag in "desired":
-// a) For each tag in "supported":
-// 1) compute the match between the two tags.
-// 2) if the match is better than the previous best match, replace it
-// with the new match. (see next section)
-// b) if the current best match is Exact and pin is true the result will be
-// frozen to the language found thusfar, although better matches may
-// still be found for the same language.
-// 3) If the best match so far is below a certain threshold, return "default".
-//
-// Ranking:
-// We use two phases to determine whether one pair of tags are a better match
-// than another pair of tags. First, we determine a rough confidence level. If the
-// levels are different, the one with the highest confidence wins.
-// Second, if the rough confidence levels are identical, we use a set of tie-breaker
-// rules.
-//
-// The confidence level of matching a pair of tags is determined by finding the
-// lowest confidence level of any matches of the corresponding subtags (the
-// result is deemed as good as its weakest link).
-// We define the following levels:
-// Exact - An exact match of a subtag, before adding likely subtags.
-// MaxExact - An exact match of a subtag, after adding likely subtags.
-// [See Note 2].
-// High - High level of mutual intelligibility between different subtag
-// variants.
-// Low - Low level of mutual intelligibility between different subtag
-// variants.
-// No - No mutual intelligibility.
-//
-// The following levels can occur for each type of subtag:
-// Base: Exact, MaxExact, High, Low, No
-// Script: Exact, MaxExact [see Note 3], Low, No
-// Region: Exact, MaxExact, High
-// Variant: Exact, High
-// Private: Exact, No
-//
-// Any result with a confidence level of Low or higher is deemed a possible match.
-// Once a desired tag matches any of the supported tags with a level of MaxExact
-// or higher, the next desired tag is not considered (see Step 2.b).
-// Note that CLDR provides languageMatching data that defines close equivalence
-// classes for base languages, scripts and regions.
-//
-// Tie-breaking
-// If we get the same confidence level for two matches, we apply a sequence of
-// tie-breaking rules. The first that succeeds defines the result. The rules are
-// applied in the following order.
-// 1) Original language was defined and was identical.
-// 2) Original region was defined and was identical.
-// 3) Distance between two maximized regions was the smallest.
-// 4) Original script was defined and was identical.
-// 5) Distance from want tag to have tag using the parent relation [see Note 5.]
-// If there is still no winner after these rules are applied, the first match
-// found wins.
-//
-// Notes:
-// [2] In practice, as matching of Exact is done in a separate phase from
-// matching the other levels, we reuse the Exact level to mean MaxExact in
-// the second phase. As a consequence, we only need the levels defined by
-// the Confidence type. The MaxExact confidence level is mapped to High in
-// the public API.
-// [3] We do not differentiate between maximized script values that were derived
-// from suppressScript versus most likely tag data. We determined that in
-// ranking the two, one ranks just after the other. Moreover, the two cannot
-// occur concurrently. As a consequence, they are identical for practical
-// purposes.
-// [4] In case of deprecated, macro-equivalents and legacy mappings, we assign
-// the MaxExact level to allow iw vs he to still be a closer match than
-// en-AU vs en-US, for example.
-// [5] In CLDR a locale inherits fields that are unspecified for this locale
-// from its parent. Therefore, if a locale is a parent of another locale,
-// it is a strong measure for closeness, especially when no other tie
-// breaker rule applies. One could also argue it is inconsistent, for
-// example, when pt-AO matches pt (which CLDR equates with pt-BR), even
-// though its parent is pt-PT according to the inheritance rules.
-//
-// Implementation Details:
-// There are several performance considerations worth pointing out. Most notably,
-// we preprocess as much as possible (within reason) at the time of creation of a
-// matcher. This includes:
-// - creating a per-language map, which includes data for the raw base language
-// and its canonicalized variant (if applicable),
-// - expanding entries for the equivalence classes defined in CLDR's
-// languageMatch data.
-// The per-language map ensures that typically only a very small number of tags
-// need to be considered. The pre-expansion of canonicalized subtags and
-// equivalence classes reduces the amount of map lookups that need to be done at
-// runtime.
-
-// matcher keeps a set of supported language tags, indexed by language.
-type matcher struct {
- default_ *haveTag
- supported []*haveTag
- index map[langID]*matchHeader
- passSettings bool
- preferSameScript bool
-}
-
-// matchHeader has the lists of tags for exact matches and matches based on
-// maximized and canonicalized tags for a given language.
-type matchHeader struct {
- haveTags []*haveTag
- original bool
-}
-
-// haveTag holds a supported Tag and its maximized script and region. The maximized
-// or canonicalized language is not stored as it is not needed during matching.
-type haveTag struct {
- tag Tag
-
- // index of this tag in the original list of supported tags.
- index int
-
- // conf is the maximum confidence that can result from matching this haveTag.
- // When conf < Exact this means it was inserted after applying a CLDR equivalence rule.
- conf Confidence
-
- // Maximized region and script.
- maxRegion regionID
- maxScript scriptID
-
- // altScript may be checked as an alternative match to maxScript. If altScript
- // matches, the confidence level for this match is Low. Theoretically there
- // could be multiple alternative scripts. This does not occur in practice.
- altScript scriptID
-
- // nextMax is the index of the next haveTag with the same maximized tags.
- nextMax uint16
-}
-
-func makeHaveTag(tag Tag, index int) (haveTag, langID) {
- max := tag
- if tag.lang != 0 || tag.region != 0 || tag.script != 0 {
- max, _ = max.canonicalize(All)
- max, _ = addTags(max)
- max.remakeString()
- }
- return haveTag{tag, index, Exact, max.region, max.script, altScript(max.lang, max.script), 0}, max.lang
-}
-
-// altScript returns an alternative script that may match the given script with
-// a low confidence. At the moment, the langMatch data allows for at most one
-// script to map to another and we rely on this to keep the code simple.
-func altScript(l langID, s scriptID) scriptID {
- for _, alt := range matchScript {
- // TODO: also match cases where language is not the same.
- if (langID(alt.wantLang) == l || langID(alt.haveLang) == l) &&
- scriptID(alt.haveScript) == s {
- return scriptID(alt.wantScript)
- }
- }
- return 0
-}
-
-// addIfNew adds a haveTag to the list of tags only if it is a unique tag.
-// Tags that have the same maximized values are linked by index.
-func (h *matchHeader) addIfNew(n haveTag, exact bool) {
- h.original = h.original || exact
- // Don't add new exact matches.
- for _, v := range h.haveTags {
- if v.tag.equalsRest(n.tag) {
- return
- }
- }
- // Allow duplicate maximized tags, but create a linked list to allow quickly
- // comparing the equivalents and bail out.
- for i, v := range h.haveTags {
- if v.maxScript == n.maxScript &&
- v.maxRegion == n.maxRegion &&
- v.tag.variantOrPrivateTagStr() == n.tag.variantOrPrivateTagStr() {
- for h.haveTags[i].nextMax != 0 {
- i = int(h.haveTags[i].nextMax)
- }
- h.haveTags[i].nextMax = uint16(len(h.haveTags))
- break
- }
- }
- h.haveTags = append(h.haveTags, &n)
-}
-
-// header returns the matchHeader for the given language. It creates one if
-// it doesn't already exist.
-func (m *matcher) header(l langID) *matchHeader {
- if h := m.index[l]; h != nil {
- return h
- }
- h := &matchHeader{}
- m.index[l] = h
- return h
-}
-
-func toConf(d uint8) Confidence {
- if d <= 10 {
- return High
- }
- if d < 30 {
- return Low
- }
- return No
-}
-
-// newMatcher builds an index for the given supported tags and returns it as
-// a matcher. It also expands the index by considering various equivalence classes
-// for a given tag.
-func newMatcher(supported []Tag, options []MatchOption) *matcher {
- m := &matcher{
- index: make(map[langID]*matchHeader),
- preferSameScript: true,
- }
- for _, o := range options {
- o(m)
- }
- if len(supported) == 0 {
- m.default_ = &haveTag{}
- return m
- }
- // Add supported languages to the index. Add exact matches first to give
- // them precedence.
- for i, tag := range supported {
- pair, _ := makeHaveTag(tag, i)
- m.header(tag.lang).addIfNew(pair, true)
- m.supported = append(m.supported, &pair)
- }
- m.default_ = m.header(supported[0].lang).haveTags[0]
- // Keep these in two different loops to support the case that two equivalent
- // languages are distinguished, such as iw and he.
- for i, tag := range supported {
- pair, max := makeHaveTag(tag, i)
- if max != tag.lang {
- m.header(max).addIfNew(pair, true)
- }
- }
-
- // update is used to add indexes in the map for equivalent languages.
- // update will only add entries to original indexes, thus not computing any
- // transitive relations.
- update := func(want, have uint16, conf Confidence) {
- if hh := m.index[langID(have)]; hh != nil {
- if !hh.original {
- return
- }
- hw := m.header(langID(want))
- for _, ht := range hh.haveTags {
- v := *ht
- if conf < v.conf {
- v.conf = conf
- }
- v.nextMax = 0 // this value needs to be recomputed
- if v.altScript != 0 {
- v.altScript = altScript(langID(want), v.maxScript)
- }
- hw.addIfNew(v, conf == Exact && hh.original)
- }
- }
- }
-
- // Add entries for languages with mutual intelligibility as defined by CLDR's
- // languageMatch data.
- for _, ml := range matchLang {
- update(ml.want, ml.have, toConf(ml.distance))
- if !ml.oneway {
- update(ml.have, ml.want, toConf(ml.distance))
- }
- }
-
- // Add entries for possible canonicalizations. This is an optimization to
- // ensure that only one map lookup needs to be done at runtime per desired tag.
- // First we match deprecated equivalents. If they are perfect equivalents
- // (their canonicalization simply substitutes a different language code, but
- // nothing else), the match confidence is Exact, otherwise it is High.
- for i, lm := range langAliasMap {
- // If deprecated codes match and there is no fiddling with the script or
- // or region, we consider it an exact match.
- conf := Exact
- if langAliasTypes[i] != langMacro {
- if !isExactEquivalent(langID(lm.from)) {
- conf = High
- }
- update(lm.to, lm.from, conf)
- }
- update(lm.from, lm.to, conf)
- }
- return m
-}
-
-// getBest gets the best matching tag in m for any of the given tags, taking into
-// account the order of preference of the given tags.
-func (m *matcher) getBest(want ...Tag) (got *haveTag, orig Tag, c Confidence) {
- best := bestMatch{}
- for i, w := range want {
- var max Tag
- // Check for exact match first.
- h := m.index[w.lang]
- if w.lang != 0 {
- if h == nil {
- continue
- }
- // Base language is defined.
- max, _ = w.canonicalize(Legacy | Deprecated | Macro)
- // A region that is added through canonicalization is stronger than
- // a maximized region: set it in the original (e.g. mo -> ro-MD).
- if w.region != max.region {
- w.region = max.region
- }
- // TODO: should we do the same for scripts?
- // See test case: en, sr, nl ; sh ; sr
- max, _ = addTags(max)
- } else {
- // Base language is not defined.
- if h != nil {
- for i := range h.haveTags {
- have := h.haveTags[i]
- if have.tag.equalsRest(w) {
- return have, w, Exact
- }
- }
- }
- if w.script == 0 && w.region == 0 {
- // We skip all tags matching und for approximate matching, including
- // private tags.
- continue
- }
- max, _ = addTags(w)
- if h = m.index[max.lang]; h == nil {
- continue
- }
- }
- pin := true
- for _, t := range want[i+1:] {
- if w.lang == t.lang {
- pin = false
- break
- }
- }
- // Check for match based on maximized tag.
- for i := range h.haveTags {
- have := h.haveTags[i]
- best.update(have, w, max.script, max.region, pin)
- if best.conf == Exact {
- for have.nextMax != 0 {
- have = h.haveTags[have.nextMax]
- best.update(have, w, max.script, max.region, pin)
- }
- return best.have, best.want, best.conf
- }
- }
- }
- if best.conf <= No {
- if len(want) != 0 {
- return nil, want[0], No
- }
- return nil, Tag{}, No
- }
- return best.have, best.want, best.conf
-}
-
-// bestMatch accumulates the best match so far.
-type bestMatch struct {
- have *haveTag
- want Tag
- conf Confidence
- pinnedRegion regionID
- pinLanguage bool
- sameRegionGroup bool
- // Cached results from applying tie-breaking rules.
- origLang bool
- origReg bool
- paradigmReg bool
- regGroupDist uint8
- origScript bool
-}
-
-// update updates the existing best match if the new pair is considered to be a
-// better match. To determine if the given pair is a better match, it first
-// computes the rough confidence level. If this surpasses the current match, it
-// will replace it and update the tie-breaker rule cache. If there is a tie, it
-// proceeds with applying a series of tie-breaker rules. If there is no
-// conclusive winner after applying the tie-breaker rules, it leaves the current
-// match as the preferred match.
-//
-// If pin is true and have and tag are a strong match, it will henceforth only
-// consider matches for this language. This corresponds to the nothing that most
-// users have a strong preference for the first defined language. A user can
-// still prefer a second language over a dialect of the preferred language by
-// explicitly specifying dialects, e.g. "en, nl, en-GB". In this case pin should
-// be false.
-func (m *bestMatch) update(have *haveTag, tag Tag, maxScript scriptID, maxRegion regionID, pin bool) {
- // Bail if the maximum attainable confidence is below that of the current best match.
- c := have.conf
- if c < m.conf {
- return
- }
- // Don't change the language once we already have found an exact match.
- if m.pinLanguage && tag.lang != m.want.lang {
- return
- }
- // Pin the region group if we are comparing tags for the same language.
- if tag.lang == m.want.lang && m.sameRegionGroup {
- _, sameGroup := regionGroupDist(m.pinnedRegion, have.maxRegion, have.maxScript, m.want.lang)
- if !sameGroup {
- return
- }
- }
- if c == Exact && have.maxScript == maxScript {
- // If there is another language and then another entry of this language,
- // don't pin anything, otherwise pin the language.
- m.pinLanguage = pin
- }
- if have.tag.equalsRest(tag) {
- } else if have.maxScript != maxScript {
- // There is usually very little comprehension between different scripts.
- // In a few cases there may still be Low comprehension. This possibility
- // is pre-computed and stored in have.altScript.
- if Low < m.conf || have.altScript != maxScript {
- return
- }
- c = Low
- } else if have.maxRegion != maxRegion {
- if High < c {
- // There is usually a small difference between languages across regions.
- c = High
- }
- }
-
- // We store the results of the computations of the tie-breaker rules along
- // with the best match. There is no need to do the checks once we determine
- // we have a winner, but we do still need to do the tie-breaker computations.
- // We use "beaten" to keep track if we still need to do the checks.
- beaten := false // true if the new pair defeats the current one.
- if c != m.conf {
- if c < m.conf {
- return
- }
- beaten = true
- }
-
- // Tie-breaker rules:
- // We prefer if the pre-maximized language was specified and identical.
- origLang := have.tag.lang == tag.lang && tag.lang != 0
- if !beaten && m.origLang != origLang {
- if m.origLang {
- return
- }
- beaten = true
- }
-
- // We prefer if the pre-maximized region was specified and identical.
- origReg := have.tag.region == tag.region && tag.region != 0
- if !beaten && m.origReg != origReg {
- if m.origReg {
- return
- }
- beaten = true
- }
-
- regGroupDist, sameGroup := regionGroupDist(have.maxRegion, maxRegion, maxScript, tag.lang)
- if !beaten && m.regGroupDist != regGroupDist {
- if regGroupDist > m.regGroupDist {
- return
- }
- beaten = true
- }
-
- paradigmReg := isParadigmLocale(tag.lang, have.maxRegion)
- if !beaten && m.paradigmReg != paradigmReg {
- if !paradigmReg {
- return
- }
- beaten = true
- }
-
- // Next we prefer if the pre-maximized script was specified and identical.
- origScript := have.tag.script == tag.script && tag.script != 0
- if !beaten && m.origScript != origScript {
- if m.origScript {
- return
- }
- beaten = true
- }
-
- // Update m to the newly found best match.
- if beaten {
- m.have = have
- m.want = tag
- m.conf = c
- m.pinnedRegion = maxRegion
- m.sameRegionGroup = sameGroup
- m.origLang = origLang
- m.origReg = origReg
- m.paradigmReg = paradigmReg
- m.origScript = origScript
- m.regGroupDist = regGroupDist
- }
-}
-
-func isParadigmLocale(lang langID, r regionID) bool {
- for _, e := range paradigmLocales {
- if langID(e[0]) == lang && (r == regionID(e[1]) || r == regionID(e[2])) {
- return true
- }
- }
- return false
-}
-
-// regionGroupDist computes the distance between two regions based on their
-// CLDR grouping.
-func regionGroupDist(a, b regionID, script scriptID, lang langID) (dist uint8, same bool) {
- const defaultDistance = 4
-
- aGroup := uint(regionToGroups[a]) << 1
- bGroup := uint(regionToGroups[b]) << 1
- for _, ri := range matchRegion {
- if langID(ri.lang) == lang && (ri.script == 0 || scriptID(ri.script) == script) {
- group := uint(1 << (ri.group &^ 0x80))
- if 0x80&ri.group == 0 {
- if aGroup&bGroup&group != 0 { // Both regions are in the group.
- return ri.distance, ri.distance == defaultDistance
- }
- } else {
- if (aGroup|bGroup)&group == 0 { // Both regions are not in the group.
- return ri.distance, ri.distance == defaultDistance
- }
- }
- }
- }
- return defaultDistance, true
-}
-
func (t Tag) variants() string {
if t.pVariant == 0 {
return ""
@@ -890,44 +234,3 @@
}
return t.str[t.pVariant:]
}
-
-// equalsRest compares everything except the language.
-func (a Tag) equalsRest(b Tag) bool {
- // TODO: don't include extensions in this comparison. To do this efficiently,
- // though, we should handle private tags separately.
- return a.script == b.script && a.region == b.region && a.variantOrPrivateTagStr() == b.variantOrPrivateTagStr()
-}
-
-// isExactEquivalent returns true if canonicalizing the language will not alter
-// the script or region of a tag.
-func isExactEquivalent(l langID) bool {
- for _, o := range notEquivalent {
- if o == l {
- return false
- }
- }
- return true
-}
-
-var notEquivalent []langID
-
-func init() {
- // Create a list of all languages for which canonicalization may alter the
- // script or region.
- for _, lm := range langAliasMap {
- tag := Tag{lang: langID(lm.from)}
- if tag, _ = tag.canonicalize(All); tag.script != 0 || tag.region != 0 {
- notEquivalent = append(notEquivalent, langID(lm.from))
- }
- }
- // Maximize undefined regions of paradigm locales.
- for i, v := range paradigmLocales {
- max, _ := addTags(Tag{lang: langID(v[0])})
- if v[1] == 0 {
- paradigmLocales[i][1] = uint16(max.region)
- }
- if v[2] == 0 {
- paradigmLocales[i][2] = uint16(max.region)
- }
- }
-}
diff --git a/language/internal/match_test.go b/language/internal/match_test.go
index 5481af5..e1a5bd7 100644
--- a/language/internal/match_test.go
+++ b/language/internal/match_test.go
@@ -5,149 +5,12 @@
package language
import (
- "bytes"
"flag"
- "fmt"
- "os"
- "path"
- "path/filepath"
- "strings"
"testing"
-
- "golang.org/x/text/internal/testtext"
- "golang.org/x/text/internal/ucd"
)
var verbose = flag.Bool("verbose", false, "set to true to print the internal tables of matchers")
-func TestCompliance(t *testing.T) {
- filepath.Walk("../testdata", func(file string, info os.FileInfo, err error) error {
- if info.IsDir() {
- return nil
- }
- r, err := os.Open(file)
- if err != nil {
- t.Fatal(err)
- }
- ucd.Parse(r, func(p *ucd.Parser) {
- name := strings.Replace(path.Join(p.String(0), p.String(1)), " ", "", -1)
- if skip[name] {
- return
- }
- t.Run(info.Name()+"/"+name, func(t *testing.T) {
- supported := makeTagList(p.String(0))
- desired := makeTagList(p.String(1))
- gotCombined, index, conf := NewMatcher(supported).Match(desired...)
-
- gotMatch := supported[index]
- wantMatch := mk(p.String(2))
- if gotMatch != wantMatch {
- t.Fatalf("match: got %q; want %q (%v)", gotMatch, wantMatch, conf)
- }
- wantCombined, err := Raw.Parse(p.String(3))
- if err == nil && gotCombined != wantCombined {
- t.Errorf("combined: got %q; want %q (%v)", gotCombined, wantCombined, conf)
- }
- })
- })
- return nil
- })
-}
-
-var skip = map[string]bool{
- // TODO: bugs
- // Honor the wildcard match. This may only be useful to select non-exact
- // stuff.
- "mul,af/nl": true, // match: got "af"; want "mul"
-
- // TODO: include other extensions.
- // combined: got "en-GB-u-ca-buddhist-nu-arab"; want "en-GB-fonipa-t-m0-iso-i0-pinyin-u-ca-buddhist-nu-arab"
- "und,en-GB-u-sd-gbsct/en-fonipa-u-nu-Arab-ca-buddhist-t-m0-iso-i0-pinyin": true,
-
- // Inconsistencies with Mark Davis' implementation where it is not clear
- // which is better.
-
- // Inconsistencies in combined. I think the Go approach is more appropriate.
- // We could use -u-rg- and -u-va- as alternative.
- "und,fr/fr-BE-fonipa": true, // combined: got "fr"; want "fr-BE-fonipa"
- "und,fr-CA/fr-BE-fonipa": true, // combined: got "fr-CA"; want "fr-BE-fonipa"
- "und,fr-fonupa/fr-BE-fonipa": true, // combined: got "fr-fonupa"; want "fr-BE-fonipa"
- "und,no/nn-BE-fonipa": true, // combined: got "no"; want "no-BE-fonipa"
- "50,und,fr-CA-fonupa/fr-BE-fonipa": true, // combined: got "fr-CA-fonupa"; want "fr-BE-fonipa"
-
- // The initial number is a threshold. As we don't use scoring, we will not
- // implement this.
- "50,und,fr-Cyrl-CA-fonupa/fr-BE-fonipa": true,
- // match: got "und"; want "fr-Cyrl-CA-fonupa"
- // combined: got "und"; want "fr-Cyrl-BE-fonipa"
-
- // Other interesting cases to test:
- // - Should same language or same script have the preference if there is
- // usually no understanding of the other script?
- // - More specific region in desired may replace enclosing supported.
-}
-
-func makeTagList(s string) (tags []Tag) {
- for _, s := range strings.Split(s, ",") {
- tags = append(tags, mk(strings.TrimSpace(s)))
- }
- return tags
-}
-
-func TestMatchStrings(t *testing.T) {
- testCases := []struct {
- supported string
- desired string // strings separted by |
- tag string
- index int
- }{{
- supported: "en",
- desired: "",
- tag: "en",
- index: 0,
- }, {
- supported: "en",
- desired: "nl",
- tag: "en",
- index: 0,
- }, {
- supported: "en,nl",
- desired: "nl",
- tag: "nl",
- index: 1,
- }, {
- supported: "en,nl",
- desired: "nl|en",
- tag: "nl",
- index: 1,
- }, {
- supported: "en-GB,nl",
- desired: "en ; q=0.1,nl",
- tag: "nl",
- index: 1,
- }, {
- supported: "en-GB,nl",
- desired: "en;q=0.005 | dk; q=0.1,nl ",
- tag: "en-GB",
- index: 0,
- }, {
- // do not match faulty tags with und
- supported: "en,und",
- desired: "|en",
- tag: "en",
- index: 0,
- }}
- for _, tc := range testCases {
- t.Run(path.Join(tc.supported, tc.desired), func(t *testing.T) {
- m := NewMatcher(makeTagList(tc.supported))
- tag, index := MatchStrings(m, strings.Split(tc.desired, "|")...)
- if tag.String() != tc.tag || index != tc.index {
- t.Errorf("got %v, %d; want %v, %d", tag, index, tc.tag, tc.index)
- }
- })
- }
-}
-
func TestAddLikelySubtags(t *testing.T) {
tests := []struct{ in, out string }{
{"aa", "aa-Latn-ET"},
@@ -296,210 +159,3 @@
}
}
}
-
-func TestRegionGroups(t *testing.T) {
- testCases := []struct {
- a, b string
- distance uint8
- }{
- {"zh-TW", "zh-HK", 5},
- {"zh-MO", "zh-HK", 4},
- {"es-ES", "es-AR", 5},
- {"es-ES", "es", 4},
- {"es-419", "es-MX", 4},
- {"es-AR", "es-MX", 4},
- {"es-ES", "es-MX", 5},
- {"es-PT", "es-MX", 5},
- }
- for _, tc := range testCases {
- a := MustParse(tc.a)
- aScript, _ := a.Script()
- b := MustParse(tc.b)
- bScript, _ := b.Script()
-
- if aScript != bScript {
- t.Errorf("scripts differ: %q vs %q", aScript, bScript)
- continue
- }
- d, _ := regionGroupDist(a.region, b.region, aScript.scriptID, a.lang)
- if d != tc.distance {
- t.Errorf("got %q; want %q", d, tc.distance)
- }
- }
-}
-
-func TestIsParadigmLocale(t *testing.T) {
- testCases := map[string]bool{
- "en-US": true,
- "en-GB": true,
- "en-VI": false,
- "es-GB": false,
- "es-ES": true,
- "es-419": true,
- }
- for str, want := range testCases {
- tag := Make(str)
- got := isParadigmLocale(tag.lang, tag.region)
- if got != want {
- t.Errorf("isPL(%q) = %v; want %v", str, got, want)
- }
- }
-}
-
-// Implementation of String methods for various types for debugging purposes.
-
-func (m *matcher) String() string {
- w := &bytes.Buffer{}
- fmt.Fprintln(w, "Default:", m.default_)
- for tag, h := range m.index {
- fmt.Fprintf(w, " %s: %v\n", tag, h)
- }
- return w.String()
-}
-
-func (h *matchHeader) String() string {
- w := &bytes.Buffer{}
- fmt.Fprint(w, "haveTag: ")
- for _, h := range h.haveTags {
- fmt.Fprintf(w, "%v, ", h)
- }
- return w.String()
-}
-
-func (t haveTag) String() string {
- return fmt.Sprintf("%v:%d:%v:%v-%v|%v", t.tag, t.index, t.conf, t.maxRegion, t.maxScript, t.altScript)
-}
-
-func TestBestMatchAlloc(t *testing.T) {
- m := NewMatcher(makeTagList("en sr nl"))
- // Go allocates when creating a list of tags from a single tag!
- list := []Tag{English}
- avg := testtext.AllocsPerRun(1, func() {
- m.Match(list...)
- })
- if avg > 0 {
- t.Errorf("got %f; want 0", avg)
- }
-}
-
-var benchHave = []Tag{
- mk("en"),
- mk("en-GB"),
- mk("za"),
- mk("zh-Hant"),
- mk("zh-Hans-CN"),
- mk("zh"),
- mk("zh-HK"),
- mk("ar-MK"),
- mk("en-CA"),
- mk("fr-CA"),
- mk("fr-US"),
- mk("fr-CH"),
- mk("fr"),
- mk("lt"),
- mk("lv"),
- mk("iw"),
- mk("iw-NL"),
- mk("he"),
- mk("he-IT"),
- mk("tlh"),
- mk("ja"),
- mk("ja-Jpan"),
- mk("ja-Jpan-JP"),
- mk("de"),
- mk("de-CH"),
- mk("de-AT"),
- mk("de-DE"),
- mk("sr"),
- mk("sr-Latn"),
- mk("sr-Cyrl"),
- mk("sr-ME"),
-}
-
-var benchWant = [][]Tag{
- []Tag{
- mk("en"),
- },
- []Tag{
- mk("en-AU"),
- mk("de-HK"),
- mk("nl"),
- mk("fy"),
- mk("lv"),
- },
- []Tag{
- mk("en-AU"),
- mk("de-HK"),
- mk("nl"),
- mk("fy"),
- },
- []Tag{
- mk("ja-Hant"),
- mk("da-HK"),
- mk("nl"),
- mk("zh-TW"),
- },
- []Tag{
- mk("ja-Hant"),
- mk("da-HK"),
- mk("nl"),
- mk("hr"),
- },
-}
-
-func BenchmarkMatch(b *testing.B) {
- m := newMatcher(benchHave, nil)
- for i := 0; i < b.N; i++ {
- for _, want := range benchWant {
- m.getBest(want...)
- }
- }
-}
-
-func BenchmarkMatchExact(b *testing.B) {
- want := mk("en")
- m := newMatcher(benchHave, nil)
- for i := 0; i < b.N; i++ {
- m.getBest(want)
- }
-}
-
-func BenchmarkMatchAltLanguagePresent(b *testing.B) {
- want := mk("hr")
- m := newMatcher(benchHave, nil)
- for i := 0; i < b.N; i++ {
- m.getBest(want)
- }
-}
-
-func BenchmarkMatchAltLanguageNotPresent(b *testing.B) {
- want := mk("nn")
- m := newMatcher(benchHave, nil)
- for i := 0; i < b.N; i++ {
- m.getBest(want)
- }
-}
-
-func BenchmarkMatchAltScriptPresent(b *testing.B) {
- want := mk("zh-Hant-CN")
- m := newMatcher(benchHave, nil)
- for i := 0; i < b.N; i++ {
- m.getBest(want)
- }
-}
-
-func BenchmarkMatchAltScriptNotPresent(b *testing.B) {
- want := mk("fr-Cyrl")
- m := newMatcher(benchHave, nil)
- for i := 0; i < b.N; i++ {
- m.getBest(want)
- }
-}
-
-func BenchmarkMatchLimitedExact(b *testing.B) {
- want := []Tag{mk("he-NL"), mk("iw-NL")}
- m := newMatcher(benchHave, nil)
- for i := 0; i < b.N; i++ {
- m.getBest(want...)
- }
-}
diff --git a/language/internal/parse.go b/language/internal/parse.go
index fca2d30..d449c09 100644
--- a/language/internal/parse.go
+++ b/language/internal/parse.go
@@ -9,8 +9,6 @@
"errors"
"fmt"
"sort"
- "strconv"
- "strings"
"golang.org/x/text/internal/tag"
)
@@ -224,20 +222,7 @@
// value. All other values are preserved. It accepts tags in the BCP 47 format
// and extensions to this standard defined in
// http://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
-// The resulting tag is canonicalized using the default canonicalization type.
func Parse(s string) (t Tag, err error) {
- return Default.Parse(s)
-}
-
-// Parse parses the given BCP 47 string and returns a valid Tag. If parsing
-// failed it returns an error and any part of the tag that could be parsed.
-// If parsing succeeded but an unknown value was found, it returns
-// ValueError. The Tag returned in this case is just stripped of the unknown
-// value. All other values are preserved. It accepts tags in the BCP 47 format
-// and extensions to this standard defined in
-// http://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
-// The resulting tag is canonicalized using the the canonicalization type c.
-func (c CanonType) Parse(s string) (t Tag, err error) {
// TODO: consider supporting old-style locale key-value pairs.
if s == "" {
return und, errSyntax
@@ -258,12 +243,7 @@
}
}
scan := makeScannerString(s)
- t, err = parse(&scan, s)
- t, changed := t.canonicalize(c)
- if changed {
- t.remakeString()
- }
- return t, err
+ return parse(&scan, s)
}
func parse(scan *scanner, s string) (t Tag, err error) {
@@ -558,181 +538,6 @@
return end
}
-// Compose creates a Tag from individual parts, which may be of type Tag, Base,
-// Script, Region, Variant, []Variant, Extension, []Extension or error. If a
-// Base, Script or Region or slice of type Variant or Extension is passed more
-// than once, the latter will overwrite the former. Variants and Extensions are
-// accumulated, but if two extensions of the same type are passed, the latter
-// will replace the former. A Tag overwrites all former values and typically
-// only makes sense as the first argument. The resulting tag is returned after
-// canonicalizing using the Default CanonType. If one or more errors are
-// encountered, one of the errors is returned.
-func Compose(part ...interface{}) (t Tag, err error) {
- return Default.Compose(part...)
-}
-
-// Compose creates a Tag from individual parts, which may be of type Tag, Base,
-// Script, Region, Variant, []Variant, Extension, []Extension or error. If a
-// Base, Script or Region or slice of type Variant or Extension is passed more
-// than once, the latter will overwrite the former. Variants and Extensions are
-// accumulated, but if two extensions of the same type are passed, the latter
-// will replace the former. A Tag overwrites all former values and typically
-// only makes sense as the first argument. The resulting tag is returned after
-// canonicalizing using CanonType c. If one or more errors are encountered,
-// one of the errors is returned.
-func (c CanonType) Compose(part ...interface{}) (t Tag, err error) {
- var b builder
- if err = b.update(part...); err != nil {
- return und, err
- }
- t, _ = b.tag.canonicalize(c)
-
- if len(b.ext) > 0 || len(b.variant) > 0 {
- sort.Sort(sortVariant(b.variant))
- sort.Strings(b.ext)
- if b.private != "" {
- b.ext = append(b.ext, b.private)
- }
- n := maxCoreSize + tokenLen(b.variant...) + tokenLen(b.ext...)
- buf := make([]byte, n)
- p := t.genCoreBytes(buf)
- t.pVariant = byte(p)
- p += appendTokens(buf[p:], b.variant...)
- t.pExt = uint16(p)
- p += appendTokens(buf[p:], b.ext...)
- t.str = string(buf[:p])
- } else if b.private != "" {
- t.str = b.private
- t.remakeString()
- }
- return
-}
-
-type builder struct {
- tag Tag
-
- private string // the x extension
- ext []string
- variant []string
-
- err error
-}
-
-func (b *builder) addExt(e string) {
- if e == "" {
- } else if e[0] == 'x' {
- b.private = e
- } else {
- b.ext = append(b.ext, e)
- }
-}
-
-var errInvalidArgument = errors.New("invalid Extension or Variant")
-
-func (b *builder) update(part ...interface{}) (err error) {
- replace := func(l *[]string, s string, eq func(a, b string) bool) bool {
- if s == "" {
- b.err = errInvalidArgument
- return true
- }
- for i, v := range *l {
- if eq(v, s) {
- (*l)[i] = s
- return true
- }
- }
- return false
- }
- for _, x := range part {
- switch v := x.(type) {
- case Tag:
- b.tag.lang = v.lang
- b.tag.region = v.region
- b.tag.script = v.script
- if v.str != "" {
- b.variant = nil
- for x, s := "", v.str[v.pVariant:v.pExt]; s != ""; {
- x, s = nextToken(s)
- b.variant = append(b.variant, x)
- }
- b.ext, b.private = nil, ""
- for i, e := int(v.pExt), ""; i < len(v.str); {
- i, e = getExtension(v.str, i)
- b.addExt(e)
- }
- }
- case Base:
- b.tag.lang = v.langID
- case Script:
- b.tag.script = v.scriptID
- case Region:
- b.tag.region = v.regionID
- case Variant:
- if !replace(&b.variant, v.variant, func(a, b string) bool { return a == b }) {
- b.variant = append(b.variant, v.variant)
- }
- case Extension:
- if !replace(&b.ext, v.s, func(a, b string) bool { return a[0] == b[0] }) {
- b.addExt(v.s)
- }
- case []Variant:
- b.variant = nil
- for _, x := range v {
- b.update(x)
- }
- case []Extension:
- b.ext, b.private = nil, ""
- for _, e := range v {
- b.update(e)
- }
- // TODO: support parsing of raw strings based on morphology or just extensions?
- case error:
- err = v
- }
- }
- return
-}
-
-func tokenLen(token ...string) (n int) {
- for _, t := range token {
- n += len(t) + 1
- }
- return
-}
-
-func appendTokens(b []byte, token ...string) int {
- p := 0
- for _, t := range token {
- b[p] = '-'
- copy(b[p+1:], t)
- p += 1 + len(t)
- }
- return p
-}
-
-type sortVariant []string
-
-func (s sortVariant) Len() int {
- return len(s)
-}
-
-func (s sortVariant) Swap(i, j int) {
- s[j], s[i] = s[i], s[j]
-}
-
-func (s sortVariant) Less(i, j int) bool {
- return variantIndex[s[i]] < variantIndex[s[j]]
-}
-
-func findExt(list []string, x byte) int {
- for i, e := range list {
- if e[0] == x {
- return i
- }
- }
- return -1
-}
-
// getExtension returns the name, body and end position of the extension.
func getExtension(s string, p int) (end int, ext string) {
if s[p] == '-' {
@@ -762,98 +567,3 @@
}
return len(s)
}
-
-var errInvalidWeight = errors.New("ParseAcceptLanguage: invalid weight")
-
-// ParseAcceptLanguage parses the contents of an Accept-Language header as
-// defined in http://www.ietf.org/rfc/rfc2616.txt and returns a list of Tags and
-// a list of corresponding quality weights. It is more permissive than RFC 2616
-// and may return non-nil slices even if the input is not valid.
-// The Tags will be sorted by highest weight first and then by first occurrence.
-// Tags with a weight of zero will be dropped. An error will be returned if the
-// input could not be parsed.
-func ParseAcceptLanguage(s string) (tag []Tag, q []float32, err error) {
- var entry string
- for s != "" {
- if entry, s = split(s, ','); entry == "" {
- continue
- }
-
- entry, weight := split(entry, ';')
-
- // Scan the language.
- t, err := Parse(entry)
- if err != nil {
- id, ok := acceptFallback[entry]
- if !ok {
- return nil, nil, err
- }
- t = Tag{lang: id}
- }
-
- // Scan the optional weight.
- w := 1.0
- if weight != "" {
- weight = consume(weight, 'q')
- weight = consume(weight, '=')
- // consume returns the empty string when a token could not be
- // consumed, resulting in an error for ParseFloat.
- if w, err = strconv.ParseFloat(weight, 32); err != nil {
- return nil, nil, errInvalidWeight
- }
- // Drop tags with a quality weight of 0.
- if w <= 0 {
- continue
- }
- }
-
- tag = append(tag, t)
- q = append(q, float32(w))
- }
- sortStable(&tagSort{tag, q})
- return tag, q, nil
-}
-
-// consume removes a leading token c from s and returns the result or the empty
-// string if there is no such token.
-func consume(s string, c byte) string {
- if s == "" || s[0] != c {
- return ""
- }
- return strings.TrimSpace(s[1:])
-}
-
-func split(s string, c byte) (head, tail string) {
- if i := strings.IndexByte(s, c); i >= 0 {
- return strings.TrimSpace(s[:i]), strings.TrimSpace(s[i+1:])
- }
- return strings.TrimSpace(s), ""
-}
-
-// Add hack mapping to deal with a small number of cases that that occur
-// in Accept-Language (with reasonable frequency).
-var acceptFallback = map[string]langID{
- "english": _en,
- "deutsch": _de,
- "italian": _it,
- "french": _fr,
- "*": _mul, // defined in the spec to match all languages.
-}
-
-type tagSort struct {
- tag []Tag
- q []float32
-}
-
-func (s *tagSort) Len() int {
- return len(s.q)
-}
-
-func (s *tagSort) Less(i, j int) bool {
- return s.q[i] > s.q[j]
-}
-
-func (s *tagSort) Swap(i, j int) {
- s.tag[i], s.tag[j] = s.tag[j], s.tag[i]
- s.q[i], s.q[j] = s.q[j], s.q[i]
-}
diff --git a/language/internal/parse_test.go b/language/internal/parse_test.go
index 9b40eb4..e228a29 100644
--- a/language/internal/parse_test.go
+++ b/language/internal/parse_test.go
@@ -307,7 +307,7 @@
func TestParse(t *testing.T) {
partChecks(t, func(tt *parseTest) (id Tag, skip bool) {
- id, err := Raw.Parse(tt.in)
+ id, err := Parse(tt.in)
ext := ""
if id.str != "" {
if strings.HasPrefix(id.str, "x-") {
@@ -316,7 +316,7 @@
ext = id.str[id.pExt+1:]
}
}
- if tag, _ := Raw.Parse(id.String()); tag.String() != id.String() {
+ if tag, _ := Parse(id.String()); tag.String() != id.String() {
t.Errorf("%d:%s: reparse was %q; want %q", tt.i, tt.in, id.String(), tag.String())
}
if ext != tt.ext {
@@ -359,159 +359,3 @@
}
}
}
-
-func TestCompose1(t *testing.T) {
- partChecks(t, func(tt *parseTest) (id Tag, skip bool) {
- l, _ := ParseBase(tt.lang)
- s, _ := ParseScript(tt.script)
- r, _ := ParseRegion(tt.region)
- v := []Variant{}
- for _, x := range strings.Split(tt.variants, "-") {
- p, _ := ParseVariant(x)
- v = append(v, p)
- }
- e := []Extension{}
- for _, x := range tt.extList {
- p, _ := ParseExtension(x)
- e = append(e, p)
- }
- id, _ = Raw.Compose(l, s, r, v, e)
- return id, false
- })
-}
-
-func TestCompose2(t *testing.T) {
- partChecks(t, func(tt *parseTest) (id Tag, skip bool) {
- l, _ := ParseBase(tt.lang)
- s, _ := ParseScript(tt.script)
- r, _ := ParseRegion(tt.region)
- p := []interface{}{l, s, r, s, r, l}
- for _, x := range strings.Split(tt.variants, "-") {
- v, _ := ParseVariant(x)
- p = append(p, v)
- }
- for _, x := range tt.extList {
- e, _ := ParseExtension(x)
- p = append(p, e)
- }
- id, _ = Raw.Compose(p...)
- return id, false
- })
-}
-
-func TestCompose3(t *testing.T) {
- partChecks(t, func(tt *parseTest) (id Tag, skip bool) {
- id, _ = Raw.Parse(tt.in)
- id, _ = Raw.Compose(id)
- return id, false
- })
-}
-
-func mk(s string) Tag {
- return Raw.Make(s)
-}
-
-func TestParseAcceptLanguage(t *testing.T) {
- type res struct {
- t Tag
- q float32
- }
- en := []res{{mk("en"), 1.0}}
- tests := []struct {
- out []res
- in string
- ok bool
- }{
- {en, "en", true},
- {en, " en", true},
- {en, "en ", true},
- {en, " en ", true},
- {en, "en,", true},
- {en, ",en", true},
- {en, ",,,en,,,", true},
- {en, ",en;q=1", true},
-
- // We allow an empty input, contrary to spec.
- {nil, "", true},
- {[]res{{mk("aa"), 1}}, "aa;", true}, // allow unspecified weight
-
- // errors
- {nil, ";", false},
- {nil, "$", false},
- {nil, "e;", false},
- {nil, "x;", false},
- {nil, "x", false},
- {nil, "ac", false}, // non-existing language
- {nil, "aa;q", false},
- {nil, "aa;q=", false},
- {nil, "aa;q=.", false},
-
- // odd fallbacks
- {
- []res{{mk("en"), 0.1}},
- " english ;q=.1",
- true,
- },
- {
- []res{{mk("it"), 1.0}, {mk("de"), 1.0}, {mk("fr"), 1.0}},
- " italian, deutsch, french",
- true,
- },
-
- // lists
- {
- []res{{mk("en"), 0.1}},
- "en;q=.1",
- true,
- },
- {
- []res{{mk("mul"), 1.0}},
- "*",
- true,
- },
- {
- []res{{mk("en"), 1.0}, {mk("de"), 1.0}},
- "en,de",
- true,
- },
- {
- []res{{mk("en"), 1.0}, {mk("de"), .5}},
- "en,de;q=0.5",
- true,
- },
- {
- []res{{mk("de"), 0.8}, {mk("en"), 0.5}},
- " en ; q = 0.5 , , de;q=0.8",
- true,
- },
- {
- []res{{mk("en"), 1.0}, {mk("de"), 1.0}, {mk("fr"), 1.0}, {mk("tlh"), 1.0}},
- "en,de,fr,i-klingon",
- true,
- },
- // sorting
- {
- []res{{mk("tlh"), 0.4}, {mk("de"), 0.2}, {mk("fr"), 0.2}, {mk("en"), 0.1}},
- "en;q=0.1,de;q=0.2,fr;q=0.2,i-klingon;q=0.4",
- true,
- },
- // dropping
- {
- []res{{mk("fr"), 0.2}, {mk("en"), 0.1}},
- "en;q=0.1,de;q=0,fr;q=0.2,i-klingon;q=0.0",
- true,
- },
- }
- for i, tt := range tests {
- tags, qs, e := ParseAcceptLanguage(tt.in)
- if e == nil != tt.ok {
- t.Errorf("%d:%s:err: was %v; want %v", i, tt.in, e == nil, tt.ok)
- }
- for j, tag := range tags {
- if out := tt.out[j]; !tag.equalTags(out.t) || qs[j] != out.q {
- t.Errorf("%d:%s: was %s, %1f; want %s, %1f", i, tt.in, tag, qs[j], out.t, out.q)
- break
- }
- }
- }
-}
diff --git a/language/internal/tables.go b/language/internal/tables.go
index b738d45..2a7b274 100644
--- a/language/internal/tables.go
+++ b/language/internal/tables.go
@@ -3291,261 +3291,6 @@
32: {lang: 0x139, region: 0x7b, script: 0x57},
}
-// Size: 358 bytes, 358 elements
-var regionToGroups = [358]uint8{
- // Entry 0 - 3F
- 0x00, 0x00, 0x00, 0x04, 0x04, 0x00, 0x00, 0x04,
- 0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x04, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x00,
- 0x00, 0x04, 0x00, 0x00, 0x04, 0x01, 0x00, 0x00,
- 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x00, 0x04,
- // Entry 40 - 7F
- 0x04, 0x04, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x04, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x04, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00,
- 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x00, 0x08,
- 0x00, 0x04, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00,
- // Entry 80 - BF
- 0x00, 0x00, 0x04, 0x00, 0x00, 0x04, 0x00, 0x00,
- 0x00, 0x04, 0x01, 0x00, 0x04, 0x02, 0x00, 0x04,
- 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00,
- 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x08, 0x08, 0x00, 0x00, 0x00, 0x04, 0x00,
- // Entry C0 - FF
- 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x01,
- 0x04, 0x08, 0x04, 0x00, 0x00, 0x00, 0x00, 0x04,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x04, 0x00, 0x05, 0x00, 0x00, 0x00,
- 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- // Entry 100 - 13F
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00,
- 0x00, 0x00, 0x04, 0x04, 0x00, 0x00, 0x00, 0x04,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x08, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x01, 0x00, 0x05, 0x04, 0x00,
- 0x00, 0x04, 0x00, 0x04, 0x04, 0x05, 0x00, 0x00,
- // Entry 140 - 17F
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-}
-
-// Size: 18 bytes, 3 elements
-var paradigmLocales = [3][3]uint16{
- 0: [3]uint16{0x139, 0x0, 0x7b},
- 1: [3]uint16{0x13e, 0x0, 0x1f},
- 2: [3]uint16{0x3c0, 0x41, 0xee},
-}
-
-type mutualIntelligibility struct {
- want uint16
- have uint16
- distance uint8
- oneway bool
-}
-
-type scriptIntelligibility struct {
- wantLang uint16
- haveLang uint16
- wantScript uint8
- haveScript uint8
- distance uint8
-}
-
-type regionIntelligibility struct {
- lang uint16
- script uint8
- group uint8
- distance uint8
-}
-
-// matchLang holds pairs of langIDs of base languages that are typically
-// mutually intelligible. Each pair is associated with a confidence and
-// whether the intelligibility goes one or both ways.
-// Size: 678 bytes, 113 elements
-var matchLang = [113]mutualIntelligibility{
- 0: {want: 0x1d1, have: 0xb7, distance: 0x4, oneway: false},
- 1: {want: 0x407, have: 0xb7, distance: 0x4, oneway: false},
- 2: {want: 0x407, have: 0x1d1, distance: 0x4, oneway: false},
- 3: {want: 0x407, have: 0x432, distance: 0x4, oneway: false},
- 4: {want: 0x43a, have: 0x1, distance: 0x4, oneway: false},
- 5: {want: 0x1a3, have: 0x10d, distance: 0x4, oneway: true},
- 6: {want: 0x295, have: 0x10d, distance: 0x4, oneway: true},
- 7: {want: 0x101, have: 0x36f, distance: 0x8, oneway: false},
- 8: {want: 0x101, have: 0x347, distance: 0x8, oneway: false},
- 9: {want: 0x5, have: 0x3e2, distance: 0xa, oneway: true},
- 10: {want: 0xd, have: 0x139, distance: 0xa, oneway: true},
- 11: {want: 0x16, have: 0x367, distance: 0xa, oneway: true},
- 12: {want: 0x21, have: 0x139, distance: 0xa, oneway: true},
- 13: {want: 0x56, have: 0x13e, distance: 0xa, oneway: true},
- 14: {want: 0x58, have: 0x3e2, distance: 0xa, oneway: true},
- 15: {want: 0x71, have: 0x3e2, distance: 0xa, oneway: true},
- 16: {want: 0x75, have: 0x139, distance: 0xa, oneway: true},
- 17: {want: 0x82, have: 0x1be, distance: 0xa, oneway: true},
- 18: {want: 0xa5, have: 0x139, distance: 0xa, oneway: true},
- 19: {want: 0xb2, have: 0x15e, distance: 0xa, oneway: true},
- 20: {want: 0xdd, have: 0x153, distance: 0xa, oneway: true},
- 21: {want: 0xe5, have: 0x139, distance: 0xa, oneway: true},
- 22: {want: 0xe9, have: 0x3a, distance: 0xa, oneway: true},
- 23: {want: 0xf0, have: 0x15e, distance: 0xa, oneway: true},
- 24: {want: 0xf9, have: 0x15e, distance: 0xa, oneway: true},
- 25: {want: 0x100, have: 0x139, distance: 0xa, oneway: true},
- 26: {want: 0x130, have: 0x139, distance: 0xa, oneway: true},
- 27: {want: 0x13c, have: 0x139, distance: 0xa, oneway: true},
- 28: {want: 0x140, have: 0x151, distance: 0xa, oneway: true},
- 29: {want: 0x145, have: 0x13e, distance: 0xa, oneway: true},
- 30: {want: 0x158, have: 0x101, distance: 0xa, oneway: true},
- 31: {want: 0x16d, have: 0x367, distance: 0xa, oneway: true},
- 32: {want: 0x16e, have: 0x139, distance: 0xa, oneway: true},
- 33: {want: 0x16f, have: 0x139, distance: 0xa, oneway: true},
- 34: {want: 0x17e, have: 0x139, distance: 0xa, oneway: true},
- 35: {want: 0x190, have: 0x13e, distance: 0xa, oneway: true},
- 36: {want: 0x194, have: 0x13e, distance: 0xa, oneway: true},
- 37: {want: 0x1a4, have: 0x1be, distance: 0xa, oneway: true},
- 38: {want: 0x1b4, have: 0x139, distance: 0xa, oneway: true},
- 39: {want: 0x1b8, have: 0x139, distance: 0xa, oneway: true},
- 40: {want: 0x1d4, have: 0x15e, distance: 0xa, oneway: true},
- 41: {want: 0x1d7, have: 0x3e2, distance: 0xa, oneway: true},
- 42: {want: 0x1d9, have: 0x139, distance: 0xa, oneway: true},
- 43: {want: 0x1e7, have: 0x139, distance: 0xa, oneway: true},
- 44: {want: 0x1f8, have: 0x139, distance: 0xa, oneway: true},
- 45: {want: 0x20e, have: 0x1e1, distance: 0xa, oneway: true},
- 46: {want: 0x210, have: 0x139, distance: 0xa, oneway: true},
- 47: {want: 0x22d, have: 0x15e, distance: 0xa, oneway: true},
- 48: {want: 0x242, have: 0x3e2, distance: 0xa, oneway: true},
- 49: {want: 0x24a, have: 0x139, distance: 0xa, oneway: true},
- 50: {want: 0x251, have: 0x139, distance: 0xa, oneway: true},
- 51: {want: 0x265, have: 0x139, distance: 0xa, oneway: true},
- 52: {want: 0x274, have: 0x48a, distance: 0xa, oneway: true},
- 53: {want: 0x28a, have: 0x3e2, distance: 0xa, oneway: true},
- 54: {want: 0x28e, have: 0x1f9, distance: 0xa, oneway: true},
- 55: {want: 0x2a3, have: 0x139, distance: 0xa, oneway: true},
- 56: {want: 0x2b5, have: 0x15e, distance: 0xa, oneway: true},
- 57: {want: 0x2b8, have: 0x139, distance: 0xa, oneway: true},
- 58: {want: 0x2be, have: 0x139, distance: 0xa, oneway: true},
- 59: {want: 0x2c3, have: 0x15e, distance: 0xa, oneway: true},
- 60: {want: 0x2ed, have: 0x139, distance: 0xa, oneway: true},
- 61: {want: 0x2f1, have: 0x15e, distance: 0xa, oneway: true},
- 62: {want: 0x2fa, have: 0x139, distance: 0xa, oneway: true},
- 63: {want: 0x2ff, have: 0x7e, distance: 0xa, oneway: true},
- 64: {want: 0x304, have: 0x139, distance: 0xa, oneway: true},
- 65: {want: 0x30b, have: 0x3e2, distance: 0xa, oneway: true},
- 66: {want: 0x31b, have: 0x1be, distance: 0xa, oneway: true},
- 67: {want: 0x31f, have: 0x1e1, distance: 0xa, oneway: true},
- 68: {want: 0x320, have: 0x139, distance: 0xa, oneway: true},
- 69: {want: 0x331, have: 0x139, distance: 0xa, oneway: true},
- 70: {want: 0x351, have: 0x139, distance: 0xa, oneway: true},
- 71: {want: 0x36a, have: 0x347, distance: 0xa, oneway: false},
- 72: {want: 0x36a, have: 0x36f, distance: 0xa, oneway: true},
- 73: {want: 0x37a, have: 0x139, distance: 0xa, oneway: true},
- 74: {want: 0x387, have: 0x139, distance: 0xa, oneway: true},
- 75: {want: 0x389, have: 0x139, distance: 0xa, oneway: true},
- 76: {want: 0x38b, have: 0x15e, distance: 0xa, oneway: true},
- 77: {want: 0x390, have: 0x139, distance: 0xa, oneway: true},
- 78: {want: 0x395, have: 0x139, distance: 0xa, oneway: true},
- 79: {want: 0x39d, have: 0x139, distance: 0xa, oneway: true},
- 80: {want: 0x3a5, have: 0x139, distance: 0xa, oneway: true},
- 81: {want: 0x3be, have: 0x139, distance: 0xa, oneway: true},
- 82: {want: 0x3c4, have: 0x13e, distance: 0xa, oneway: true},
- 83: {want: 0x3d4, have: 0x10d, distance: 0xa, oneway: true},
- 84: {want: 0x3d9, have: 0x139, distance: 0xa, oneway: true},
- 85: {want: 0x3e5, have: 0x15e, distance: 0xa, oneway: true},
- 86: {want: 0x3e9, have: 0x1be, distance: 0xa, oneway: true},
- 87: {want: 0x3fa, have: 0x139, distance: 0xa, oneway: true},
- 88: {want: 0x40c, have: 0x139, distance: 0xa, oneway: true},
- 89: {want: 0x423, have: 0x139, distance: 0xa, oneway: true},
- 90: {want: 0x429, have: 0x139, distance: 0xa, oneway: true},
- 91: {want: 0x431, have: 0x139, distance: 0xa, oneway: true},
- 92: {want: 0x43b, have: 0x139, distance: 0xa, oneway: true},
- 93: {want: 0x43e, have: 0x1e1, distance: 0xa, oneway: true},
- 94: {want: 0x445, have: 0x139, distance: 0xa, oneway: true},
- 95: {want: 0x450, have: 0x139, distance: 0xa, oneway: true},
- 96: {want: 0x461, have: 0x139, distance: 0xa, oneway: true},
- 97: {want: 0x467, have: 0x3e2, distance: 0xa, oneway: true},
- 98: {want: 0x46f, have: 0x139, distance: 0xa, oneway: true},
- 99: {want: 0x476, have: 0x3e2, distance: 0xa, oneway: true},
- 100: {want: 0x3883, have: 0x139, distance: 0xa, oneway: true},
- 101: {want: 0x480, have: 0x139, distance: 0xa, oneway: true},
- 102: {want: 0x482, have: 0x139, distance: 0xa, oneway: true},
- 103: {want: 0x494, have: 0x3e2, distance: 0xa, oneway: true},
- 104: {want: 0x49d, have: 0x139, distance: 0xa, oneway: true},
- 105: {want: 0x4ac, have: 0x529, distance: 0xa, oneway: true},
- 106: {want: 0x4b4, have: 0x139, distance: 0xa, oneway: true},
- 107: {want: 0x4bc, have: 0x3e2, distance: 0xa, oneway: true},
- 108: {want: 0x4e5, have: 0x15e, distance: 0xa, oneway: true},
- 109: {want: 0x4f2, have: 0x139, distance: 0xa, oneway: true},
- 110: {want: 0x512, have: 0x139, distance: 0xa, oneway: true},
- 111: {want: 0x518, have: 0x139, distance: 0xa, oneway: true},
- 112: {want: 0x52f, have: 0x139, distance: 0xa, oneway: true},
-}
-
-// matchScript holds pairs of scriptIDs where readers of one script
-// can typically also read the other. Each is associated with a confidence.
-// Size: 208 bytes, 26 elements
-var matchScript = [26]scriptIntelligibility{
- 0: {wantLang: 0x432, haveLang: 0x432, wantScript: 0x57, haveScript: 0x1f, distance: 0x5},
- 1: {wantLang: 0x432, haveLang: 0x432, wantScript: 0x1f, haveScript: 0x57, distance: 0x5},
- 2: {wantLang: 0x58, haveLang: 0x3e2, wantScript: 0x57, haveScript: 0x1f, distance: 0xa},
- 3: {wantLang: 0xa5, haveLang: 0x139, wantScript: 0xe, haveScript: 0x57, distance: 0xa},
- 4: {wantLang: 0x1d7, haveLang: 0x3e2, wantScript: 0x8, haveScript: 0x1f, distance: 0xa},
- 5: {wantLang: 0x210, haveLang: 0x139, wantScript: 0x2b, haveScript: 0x57, distance: 0xa},
- 6: {wantLang: 0x24a, haveLang: 0x139, wantScript: 0x4b, haveScript: 0x57, distance: 0xa},
- 7: {wantLang: 0x251, haveLang: 0x139, wantScript: 0x4f, haveScript: 0x57, distance: 0xa},
- 8: {wantLang: 0x2b8, haveLang: 0x139, wantScript: 0x54, haveScript: 0x57, distance: 0xa},
- 9: {wantLang: 0x304, haveLang: 0x139, wantScript: 0x6b, haveScript: 0x57, distance: 0xa},
- 10: {wantLang: 0x331, haveLang: 0x139, wantScript: 0x72, haveScript: 0x57, distance: 0xa},
- 11: {wantLang: 0x351, haveLang: 0x139, wantScript: 0x21, haveScript: 0x57, distance: 0xa},
- 12: {wantLang: 0x395, haveLang: 0x139, wantScript: 0x7d, haveScript: 0x57, distance: 0xa},
- 13: {wantLang: 0x39d, haveLang: 0x139, wantScript: 0x33, haveScript: 0x57, distance: 0xa},
- 14: {wantLang: 0x3be, haveLang: 0x139, wantScript: 0x5, haveScript: 0x57, distance: 0xa},
- 15: {wantLang: 0x3fa, haveLang: 0x139, wantScript: 0x5, haveScript: 0x57, distance: 0xa},
- 16: {wantLang: 0x40c, haveLang: 0x139, wantScript: 0xca, haveScript: 0x57, distance: 0xa},
- 17: {wantLang: 0x450, haveLang: 0x139, wantScript: 0xd7, haveScript: 0x57, distance: 0xa},
- 18: {wantLang: 0x461, haveLang: 0x139, wantScript: 0xda, haveScript: 0x57, distance: 0xa},
- 19: {wantLang: 0x46f, haveLang: 0x139, wantScript: 0x29, haveScript: 0x57, distance: 0xa},
- 20: {wantLang: 0x476, haveLang: 0x3e2, wantScript: 0x57, haveScript: 0x1f, distance: 0xa},
- 21: {wantLang: 0x4b4, haveLang: 0x139, wantScript: 0x5, haveScript: 0x57, distance: 0xa},
- 22: {wantLang: 0x4bc, haveLang: 0x3e2, wantScript: 0x57, haveScript: 0x1f, distance: 0xa},
- 23: {wantLang: 0x512, haveLang: 0x139, wantScript: 0x3b, haveScript: 0x57, distance: 0xa},
- 24: {wantLang: 0x529, haveLang: 0x529, wantScript: 0x38, haveScript: 0x39, distance: 0xf},
- 25: {wantLang: 0x529, haveLang: 0x529, wantScript: 0x39, haveScript: 0x38, distance: 0x13},
-}
-
-// Size: 90 bytes, 15 elements
-var matchRegion = [15]regionIntelligibility{
- 0: {lang: 0x3a, script: 0x0, group: 0x4, distance: 0x4},
- 1: {lang: 0x3a, script: 0x0, group: 0x84, distance: 0x4},
- 2: {lang: 0x139, script: 0x0, group: 0x1, distance: 0x4},
- 3: {lang: 0x139, script: 0x0, group: 0x81, distance: 0x4},
- 4: {lang: 0x13e, script: 0x0, group: 0x3, distance: 0x4},
- 5: {lang: 0x13e, script: 0x0, group: 0x83, distance: 0x4},
- 6: {lang: 0x3c0, script: 0x0, group: 0x3, distance: 0x4},
- 7: {lang: 0x3c0, script: 0x0, group: 0x83, distance: 0x4},
- 8: {lang: 0x529, script: 0x39, group: 0x2, distance: 0x4},
- 9: {lang: 0x529, script: 0x39, group: 0x82, distance: 0x4},
- 10: {lang: 0x3a, script: 0x0, group: 0x80, distance: 0x5},
- 11: {lang: 0x139, script: 0x0, group: 0x80, distance: 0x5},
- 12: {lang: 0x13e, script: 0x0, group: 0x80, distance: 0x5},
- 13: {lang: 0x3c0, script: 0x0, group: 0x80, distance: 0x5},
- 14: {lang: 0x529, script: 0x39, group: 0x80, distance: 0x5},
-}
-
// Size: 264 bytes, 33 elements
var regionContainment = [33]uint64{
// Entry 0 - 1F
@@ -3683,4 +3428,4 @@
4: {lang: 0x529, script: 0x39, maxScript: 0x39, toRegion: 0x8d, fromRegion: []uint16{0xc6}},
}
-// Total table size 27238 bytes (26KiB); checksum: C9BBE4D5
+// Total table size 25886 bytes (25KiB); checksum: BAE5A319
diff --git a/language/internal/tags.go b/language/internal/tags.go
index de30155..8f6195f 100644
--- a/language/internal/tags.go
+++ b/language/internal/tags.go
@@ -16,19 +16,9 @@
return t
}
-// MustParse is like Parse, but panics if the given BCP 47 tag cannot be parsed.
-// It simplifies safe initialization of Tag values.
-func (c CanonType) MustParse(s string) Tag {
- t, err := c.Parse(s)
- if err != nil {
- panic(err)
- }
- return t
-}
-
// MustParseBase is like ParseBase, but panics if the given base cannot be parsed.
// It simplifies safe initialization of Base values.
-func MustParseBase(s string) Base {
+func MustParseBase(s string) langID {
b, err := ParseBase(s)
if err != nil {
panic(err)
@@ -38,7 +28,7 @@
// MustParseScript is like ParseScript, but panics if the given script cannot be
// parsed. It simplifies safe initialization of Script values.
-func MustParseScript(s string) Script {
+func MustParseScript(s string) scriptID {
scr, err := ParseScript(s)
if err != nil {
panic(err)
@@ -48,7 +38,7 @@
// MustParseRegion is like ParseRegion, but panics if the given region cannot be
// parsed. It simplifies safe initialization of Region values.
-func MustParseRegion(s string) Region {
+func MustParseRegion(s string) regionID {
r, err := ParseRegion(s)
if err != nil {
panic(err)
@@ -56,88 +46,4 @@
return r
}
-var (
- und = Tag{}
-
- Und Tag = Tag{}
-
- Afrikaans Tag = Tag{lang: _af} // af
- Amharic Tag = Tag{lang: _am} // am
- Arabic Tag = Tag{lang: _ar} // ar
- ModernStandardArabic Tag = Tag{lang: _ar, region: _001} // ar-001
- Azerbaijani Tag = Tag{lang: _az} // az
- Bulgarian Tag = Tag{lang: _bg} // bg
- Bengali Tag = Tag{lang: _bn} // bn
- Catalan Tag = Tag{lang: _ca} // ca
- Czech Tag = Tag{lang: _cs} // cs
- Danish Tag = Tag{lang: _da} // da
- German Tag = Tag{lang: _de} // de
- Greek Tag = Tag{lang: _el} // el
- English Tag = Tag{lang: _en} // en
- AmericanEnglish Tag = Tag{lang: _en, region: _US} // en-US
- BritishEnglish Tag = Tag{lang: _en, region: _GB} // en-GB
- Spanish Tag = Tag{lang: _es} // es
- EuropeanSpanish Tag = Tag{lang: _es, region: _ES} // es-ES
- LatinAmericanSpanish Tag = Tag{lang: _es, region: _419} // es-419
- Estonian Tag = Tag{lang: _et} // et
- Persian Tag = Tag{lang: _fa} // fa
- Finnish Tag = Tag{lang: _fi} // fi
- Filipino Tag = Tag{lang: _fil} // fil
- French Tag = Tag{lang: _fr} // fr
- CanadianFrench Tag = Tag{lang: _fr, region: _CA} // fr-CA
- Gujarati Tag = Tag{lang: _gu} // gu
- Hebrew Tag = Tag{lang: _he} // he
- Hindi Tag = Tag{lang: _hi} // hi
- Croatian Tag = Tag{lang: _hr} // hr
- Hungarian Tag = Tag{lang: _hu} // hu
- Armenian Tag = Tag{lang: _hy} // hy
- Indonesian Tag = Tag{lang: _id} // id
- Icelandic Tag = Tag{lang: _is} // is
- Italian Tag = Tag{lang: _it} // it
- Japanese Tag = Tag{lang: _ja} // ja
- Georgian Tag = Tag{lang: _ka} // ka
- Kazakh Tag = Tag{lang: _kk} // kk
- Khmer Tag = Tag{lang: _km} // km
- Kannada Tag = Tag{lang: _kn} // kn
- Korean Tag = Tag{lang: _ko} // ko
- Kirghiz Tag = Tag{lang: _ky} // ky
- Lao Tag = Tag{lang: _lo} // lo
- Lithuanian Tag = Tag{lang: _lt} // lt
- Latvian Tag = Tag{lang: _lv} // lv
- Macedonian Tag = Tag{lang: _mk} // mk
- Malayalam Tag = Tag{lang: _ml} // ml
- Mongolian Tag = Tag{lang: _mn} // mn
- Marathi Tag = Tag{lang: _mr} // mr
- Malay Tag = Tag{lang: _ms} // ms
- Burmese Tag = Tag{lang: _my} // my
- Nepali Tag = Tag{lang: _ne} // ne
- Dutch Tag = Tag{lang: _nl} // nl
- Norwegian Tag = Tag{lang: _no} // no
- Punjabi Tag = Tag{lang: _pa} // pa
- Polish Tag = Tag{lang: _pl} // pl
- Portuguese Tag = Tag{lang: _pt} // pt
- BrazilianPortuguese Tag = Tag{lang: _pt, region: _BR} // pt-BR
- EuropeanPortuguese Tag = Tag{lang: _pt, region: _PT} // pt-PT
- Romanian Tag = Tag{lang: _ro} // ro
- Russian Tag = Tag{lang: _ru} // ru
- Sinhala Tag = Tag{lang: _si} // si
- Slovak Tag = Tag{lang: _sk} // sk
- Slovenian Tag = Tag{lang: _sl} // sl
- Albanian Tag = Tag{lang: _sq} // sq
- Serbian Tag = Tag{lang: _sr} // sr
- SerbianLatin Tag = Tag{lang: _sr, script: _Latn} // sr-Latn
- Swedish Tag = Tag{lang: _sv} // sv
- Swahili Tag = Tag{lang: _sw} // sw
- Tamil Tag = Tag{lang: _ta} // ta
- Telugu Tag = Tag{lang: _te} // te
- Thai Tag = Tag{lang: _th} // th
- Turkish Tag = Tag{lang: _tr} // tr
- Ukrainian Tag = Tag{lang: _uk} // uk
- Urdu Tag = Tag{lang: _ur} // ur
- Uzbek Tag = Tag{lang: _uz} // uz
- Vietnamese Tag = Tag{lang: _vi} // vi
- Chinese Tag = Tag{lang: _zh} // zh
- SimplifiedChinese Tag = Tag{lang: _zh, script: _Hans} // zh-Hans
- TraditionalChinese Tag = Tag{lang: _zh, script: _Hant} // zh-Hant
- Zulu Tag = Tag{lang: _zu} // zu
-)
+var und = Tag{}