language: clean up gen
Mostly switch to use of CodeWriter.
Remove many old helper functions and move
main to top.
Change-Id: I957408391c26d49e1db13265dc8ebb7829e267eb
Reviewed-on: https://go-review.googlesource.com/95823
Run-TryBot: Marcel van Lohuizen <mpvl@golang.org>
Reviewed-by: Nigel Tao <nigeltao@golang.org>
diff --git a/language/gen.go b/language/gen.go
index 5a0b393..a850a4e 100644
--- a/language/gen.go
+++ b/language/gen.go
@@ -14,7 +14,6 @@
"fmt"
"io"
"log"
- "reflect"
"sort"
"strconv"
"strings"
@@ -33,27 +32,17 @@
"output file for generated tables")
)
-var comment = []string{
- `
-matchLang holds pairs of langIDs of base languages that are typically
-mutually intelligible. Each pair is associated with a confidence and
-whether the intelligibility goes one or both ways.`,
- `
-matchScript holds pairs of scriptIDs where readers of one script
-can typically also read the other. Each is associated with a confidence.`,
- `
-nRegionGroups is the number of region groups.`,
- `
-regionInclusion maps region identifiers to sets of regions in regionInclusionBits,
-where each set holds all groupings that are directly connected in a region
-containment graph.`,
- `
-regionInclusionBits is an array of bit vectors where every vector represents
-a set of region groupings. These sets are used to compute the distance
-between two regions for the purpose of language matching.`,
- `
-regionInclusionNext marks, for each entry in regionInclusionBits, the set of
-all groups that are reachable from the groups set in the respective entry.`,
+func main() {
+ gen.Init()
+
+ w := gen.NewCodeWriter()
+ defer w.WriteGoFile("tables.go", "language")
+
+ b := newBuilder(w)
+ gen.WriteCLDRVersion(w)
+
+ b.writeConstants()
+ b.writeMatchData()
}
type builder struct {
@@ -61,20 +50,18 @@
hw io.Writer // MultiWriter for w and w.Hash
data *cldr.CLDR
supp *cldr.SupplementalData
-
- // lang index
- region index
- script index
}
func (b *builder) langIndex(s string) uint16 {
return uint16(language.MustParseBase(s))
}
-type index func(s string) int
+func (b *builder) regionIndex(s string) int {
+ return int(language.MustParseRegion(s))
+}
-func (i index) index(s string) int {
- return i(s)
+func (b *builder) scriptIndex(s string) int {
+ return int(language.MustParseScript(s))
}
func newBuilder(w *gen.CodeWriter) *builder {
@@ -90,84 +77,18 @@
hw: io.MultiWriter(w, w.Hash),
data: data,
supp: data.Supplemental(),
-
- script: func(s string) int {
- return int(language.MustParseScript(s))
- },
-
- region: func(s string) int {
- return int(language.MustParseRegion(s))
- },
}
return &b
}
-var commentIndex = make(map[string]string)
-
-func init() {
- for _, s := range comment {
- key := strings.TrimSpace(strings.SplitN(s, " ", 2)[0])
- commentIndex[key] = s
- }
-}
-
-func (b *builder) comment(name string) {
- if s := commentIndex[name]; len(s) > 0 {
- b.w.WriteComment(s)
- } else {
- fmt.Fprintln(b.w)
- }
-}
-
-func (b *builder) pf(f string, x ...interface{}) {
- fmt.Fprintf(b.hw, f, x...)
- fmt.Fprint(b.hw, "\n")
-}
-
-func (b *builder) p(x ...interface{}) {
- fmt.Fprintln(b.hw, x...)
-}
-
-func (b *builder) addSize(s int) {
- b.w.Size += s
- b.pf("// Size: %d bytes", s)
-}
-
-func (b *builder) writeConst(name string, x interface{}) {
- b.comment(name)
- b.w.WriteConst(name, x)
-}
-
// writeConsts computes f(v) for all v in values and writes the results
// as constants named _v to a single constant block.
func (b *builder) writeConsts(f func(string) int, values ...string) {
- b.pf("const (")
+ fmt.Fprintln(b.w, "const (")
for _, v := range values {
- b.pf("\t_%s = %v", v, f(v))
+ fmt.Fprintf(b.w, "\t_%s = %v\n", v, f(v))
}
- b.pf(")")
-}
-
-// writeType writes the type of the given value, which must be a struct.
-func (b *builder) writeType(value interface{}) {
- b.comment(reflect.TypeOf(value).Name())
- b.w.WriteType(value)
-}
-
-func (b *builder) writeSlice(name string, ss interface{}) {
- b.writeSliceAddSize(name, 0, ss)
-}
-
-func (b *builder) writeSliceAddSize(name string, extraSize int, ss interface{}) {
- b.comment(name)
- b.w.Size += extraSize
- v := reflect.ValueOf(ss)
- t := v.Type().Elem()
- b.pf("// Size: %d bytes, %d elements", v.Len()*int(t.Size())+extraSize, v.Len())
-
- fmt.Fprintf(b.w, "var %s = ", name)
- b.w.WriteArray(ss)
- b.p()
+ fmt.Fprintln(b.w, ")")
}
// TODO: region inclusion data will probably not be use used in future matchers.
@@ -197,8 +118,8 @@
func (b *builder) writeConstants() {
b.writeConsts(func(s string) int { return int(b.langIndex(s)) }, langConsts...)
- b.writeConsts(b.region, regionConsts...)
- b.writeConsts(b.script, scriptConsts...)
+ b.writeConsts(b.regionIndex, regionConsts...)
+ b.writeConsts(b.scriptIndex, scriptConsts...)
}
type mutualIntelligibility struct {
@@ -250,12 +171,12 @@
todo := []string{r}
for k := 0; k < len(todo); k++ {
r := todo[k]
- regionToGroups[b.region.index(r)] |= 1 << uint8(i)
+ regionToGroups[b.regionIndex(r)] |= 1 << uint8(i)
todo = append(todo, regionHierarchy[r]...)
}
}
}
- b.writeSlice("regionToGroups", regionToGroups)
+ b.w.WriteVar("regionToGroups", regionToGroups)
// maps language id to in- and out-of-group region.
paradigmLocales := [][3]uint16{}
@@ -266,16 +187,16 @@
pc := strings.SplitN(locales[i+j], "-", 2)
x[0] = b.langIndex(pc[0])
if len(pc) == 2 {
- x[1+j] = uint16(b.region.index(pc[1]))
+ x[1+j] = uint16(b.regionIndex(pc[1]))
}
}
paradigmLocales = append(paradigmLocales, x)
}
- b.writeSlice("paradigmLocales", paradigmLocales)
+ b.w.WriteVar("paradigmLocales", paradigmLocales)
- b.writeType(mutualIntelligibility{})
- b.writeType(scriptIntelligibility{})
- b.writeType(regionIntelligibility{})
+ b.w.WriteType(mutualIntelligibility{})
+ b.w.WriteType(scriptIntelligibility{})
+ b.w.WriteType(regionIntelligibility{})
matchLang := []mutualIntelligibility{}
matchScript := []scriptIntelligibility{}
@@ -301,16 +222,16 @@
matchScript = append(matchScript, scriptIntelligibility{
wantLang: uint16(b.langIndex(d[0])),
haveLang: uint16(b.langIndex(s[0])),
- wantScript: uint8(b.script.index(d[1])),
- haveScript: uint8(b.script.index(s[1])),
+ wantScript: uint8(b.scriptIndex(d[1])),
+ haveScript: uint8(b.scriptIndex(s[1])),
distance: uint8(distance),
})
if m.Oneway != "true" {
matchScript = append(matchScript, scriptIntelligibility{
wantLang: uint16(b.langIndex(s[0])),
haveLang: uint16(b.langIndex(d[0])),
- wantScript: uint8(b.script.index(s[1])),
- haveScript: uint8(b.script.index(d[1])),
+ wantScript: uint8(b.scriptIndex(s[1])),
+ haveScript: uint8(b.scriptIndex(d[1])),
distance: uint8(distance),
})
}
@@ -352,7 +273,7 @@
distance: uint8(distance),
}
if d[1] != "*" {
- ri.script = uint8(b.script.index(d[1]))
+ ri.script = uint8(b.scriptIndex(d[1]))
}
switch {
case d[2] == "*":
@@ -372,28 +293,22 @@
sort.SliceStable(matchLang, func(i, j int) bool {
return matchLang[i].distance < matchLang[j].distance
})
- b.writeSlice("matchLang", matchLang)
+ b.w.WriteComment(`
+ matchLang holds pairs of langIDs of base languages that are typically
+ mutually intelligible. Each pair is associated with a confidence and
+ whether the intelligibility goes one or both ways.`)
+ b.w.WriteVar("matchLang", matchLang)
+ b.w.WriteComment(`
+ matchScript holds pairs of scriptIDs where readers of one script
+ can typically also read the other. Each is associated with a confidence.`)
sort.SliceStable(matchScript, func(i, j int) bool {
return matchScript[i].distance < matchScript[j].distance
})
- b.writeSlice("matchScript", matchScript)
+ b.w.WriteVar("matchScript", matchScript)
sort.SliceStable(matchRegion, func(i, j int) bool {
return matchRegion[i].distance < matchRegion[j].distance
})
- b.writeSlice("matchRegion", matchRegion)
-}
-
-func main() {
- gen.Init()
-
- w := gen.NewCodeWriter()
- defer w.WriteGoFile("tables.go", "language")
-
- b := newBuilder(w)
- gen.WriteCLDRVersion(w)
-
- b.writeConstants()
- b.writeMatchData()
+ b.w.WriteVar("matchRegion", matchRegion)
}
diff --git a/language/tables.go b/language/tables.go
index cb7e1f8..5552ab3 100644
--- a/language/tables.go
+++ b/language/tables.go
@@ -128,8 +128,7 @@
_Zzzz = 242
)
-// Size: 357 bytes, 357 elements
-var regionToGroups = [357]uint8{
+var regionToGroups = []uint8{ // 357 elements
// Entry 0 - 3F
0x00, 0x00, 0x00, 0x04, 0x04, 0x00, 0x00, 0x04,
0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x04, 0x00,
@@ -181,14 +180,13 @@
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00,
-}
+} // Size: 381 bytes
-// Size: 18 bytes, 3 elements
-var paradigmLocales = [3][3]uint16{
+var paradigmLocales = [][3]uint16{ // 3 elements
0: [3]uint16{0x139, 0x0, 0x7b},
1: [3]uint16{0x13e, 0x0, 0x1f},
2: [3]uint16{0x3c0, 0x41, 0xee},
-}
+} // Size: 42 bytes
type mutualIntelligibility struct {
want uint16
@@ -196,7 +194,6 @@
distance uint8
oneway bool
}
-
type scriptIntelligibility struct {
wantLang uint16
haveLang uint16
@@ -204,7 +201,6 @@
haveScript uint8
distance uint8
}
-
type regionIntelligibility struct {
lang uint16
script uint8
@@ -215,8 +211,7 @@
// matchLang holds pairs of langIDs of base languages that are typically
// mutually intelligible. Each pair is associated with a confidence and
// whether the intelligibility goes one or both ways.
-// Size: 678 bytes, 113 elements
-var matchLang = [113]mutualIntelligibility{
+var matchLang = []mutualIntelligibility{ // 113 elements
0: {want: 0x1d1, have: 0xb7, distance: 0x4, oneway: false},
1: {want: 0x407, have: 0xb7, distance: 0x4, oneway: false},
2: {want: 0x407, have: 0x1d1, distance: 0x4, oneway: false},
@@ -330,12 +325,11 @@
110: {want: 0x512, have: 0x139, distance: 0xa, oneway: true},
111: {want: 0x518, have: 0x139, distance: 0xa, oneway: true},
112: {want: 0x52f, have: 0x139, distance: 0xa, oneway: true},
-}
+} // Size: 702 bytes
// matchScript holds pairs of scriptIDs where readers of one script
// can typically also read the other. Each is associated with a confidence.
-// Size: 208 bytes, 26 elements
-var matchScript = [26]scriptIntelligibility{
+var matchScript = []scriptIntelligibility{ // 26 elements
0: {wantLang: 0x432, haveLang: 0x432, wantScript: 0x57, haveScript: 0x1f, distance: 0x5},
1: {wantLang: 0x432, haveLang: 0x432, wantScript: 0x1f, haveScript: 0x57, distance: 0x5},
2: {wantLang: 0x58, haveLang: 0x3e2, wantScript: 0x57, haveScript: 0x1f, distance: 0xa},
@@ -362,10 +356,9 @@
23: {wantLang: 0x512, haveLang: 0x139, wantScript: 0x3b, haveScript: 0x57, distance: 0xa},
24: {wantLang: 0x529, haveLang: 0x529, wantScript: 0x38, haveScript: 0x39, distance: 0xf},
25: {wantLang: 0x529, haveLang: 0x529, wantScript: 0x39, haveScript: 0x38, distance: 0x13},
-}
+} // Size: 232 bytes
-// Size: 90 bytes, 15 elements
-var matchRegion = [15]regionIntelligibility{
+var matchRegion = []regionIntelligibility{ // 15 elements
0: {lang: 0x3a, script: 0x0, group: 0x4, distance: 0x4},
1: {lang: 0x3a, script: 0x0, group: 0x84, distance: 0x4},
2: {lang: 0x139, script: 0x0, group: 0x1, distance: 0x4},
@@ -381,6 +374,6 @@
12: {lang: 0x13e, script: 0x0, group: 0x80, distance: 0x5},
13: {lang: 0x3c0, script: 0x0, group: 0x80, distance: 0x5},
14: {lang: 0x529, script: 0x39, group: 0x80, distance: 0x5},
-}
+} // Size: 114 bytes
-// Total table size 1351 bytes (1KiB); checksum: B5A6F1BC
+// Total table size 1471 bytes (1KiB); checksum: 5E04E5F6