cmd/gotext: improved extraction
- split extraction and translation data in two types
- rewrite fmt strings into translator-readable format
- more intelligent name picking, for instance:
- use variable name for placeholders
- if var is too short, use type name, if it differs
from the underlying type.
Change-Id: I80ae9c165892491df6fcedc340d56a08269f47fe
Reviewed-on: https://go-review.googlesource.com/79237
Run-TryBot: Marcel van Lohuizen <mpvl@golang.org>
Reviewed-by: Nigel Tao <nigeltao@golang.org>
diff --git a/cmd/gotext/examples/main.go b/cmd/gotext/examples/main.go
new file mode 100644
index 0000000..d71bbd6
--- /dev/null
+++ b/cmd/gotext/examples/main.go
@@ -0,0 +1,70 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+//go:generate gotext extract
+
+import (
+ "golang.org/x/text/language"
+ "golang.org/x/text/message"
+)
+
+func main() {
+ p := message.NewPrinter(language.English)
+
+ p.Print("Hello world!\n")
+
+ p.Println("Hello", "world!")
+
+ person := "Sheila"
+ place := "Zürich"
+
+ p.Print("Hello ", person, " in ", place, "!\n")
+
+ // Greet a city.
+ p.Print("Hello city!\n")
+
+ city := "Amsterdam"
+ // Greet a city.
+ p.Printf("Hello %s!\n", city)
+
+ town := "Amsterdam"
+ // Greet a town.
+ p.Printf("Hello %s!\n",
+ town, // Town
+ )
+
+ // Person visiting a place.
+ p.Printf("%s is visiting %s!\n",
+ person, // The person of matter.
+ place, // Place the person is visiting.
+ )
+
+ pp := struct {
+ Person string // The person of matter. // TODO: get this comment.
+ Place string
+ }{
+ person, place,
+ }
+
+ // extract will drop this comment in favor of the one below.
+ p.Printf("%s is visiting %s!\n", // Person visiting a place.
+ pp.Person,
+ pp.Place, // Place the person is visiting.
+ )
+
+ // Numeric literal
+ p.Printf("%d files remaining!", 2)
+
+ const n = 2
+
+ // Numeric var
+ p.Printf("%d more files remaining!", n)
+
+ type referralCode int
+
+ c := referralCode(5)
+ p.Printf("Use the following code for your discount: %d\n", c)
+}
diff --git a/cmd/gotext/examples/textdata/gotext_en.out.json b/cmd/gotext/examples/textdata/gotext_en.out.json
new file mode 100755
index 0000000..61de664
--- /dev/null
+++ b/cmd/gotext/examples/textdata/gotext_en.out.json
@@ -0,0 +1,185 @@
+[
+ {
+ "key": [
+ "Hello %s!\n"
+ ],
+ "message": {
+ "msg": "Hello {City}!\n"
+ },
+ "args": [
+ {
+ "id": "City",
+ "argNum": 1,
+ "format": [
+ "%s"
+ ],
+ "type": "string",
+ "underlyingType": "string",
+ "expr": "city",
+ "position": "golang.org/x/text/cmd/gotext/examples/main.go:31:26"
+ }
+ ],
+ "position": "golang.org/x/text/cmd/gotext/examples/main.go:31:10"
+ },
+ {
+ "key": [
+ "Hello %s!\n"
+ ],
+ "message": {
+ "msg": "Hello {Town}!\n"
+ },
+ "args": [
+ {
+ "id": "Town",
+ "argNum": 1,
+ "format": [
+ "%s"
+ ],
+ "type": "string",
+ "underlyingType": "string",
+ "expr": "town",
+ "comment": "Town",
+ "position": "golang.org/x/text/cmd/gotext/examples/main.go:36:3"
+ }
+ ],
+ "position": "golang.org/x/text/cmd/gotext/examples/main.go:35:10"
+ },
+ {
+ "key": [
+ "%s is visiting %s!\n"
+ ],
+ "message": {
+ "msg": "{Person} is visiting {Place}!\n"
+ },
+ "args": [
+ {
+ "id": "Person",
+ "argNum": 1,
+ "format": [
+ "%s"
+ ],
+ "type": "string",
+ "underlyingType": "string",
+ "expr": "person",
+ "comment": "The person of matter.",
+ "position": "golang.org/x/text/cmd/gotext/examples/main.go:41:3"
+ },
+ {
+ "id": "Place",
+ "argNum": 2,
+ "format": [
+ "%s"
+ ],
+ "type": "string",
+ "underlyingType": "string",
+ "expr": "place",
+ "comment": "Place the person is visiting.",
+ "position": "golang.org/x/text/cmd/gotext/examples/main.go:42:3"
+ }
+ ],
+ "position": "golang.org/x/text/cmd/gotext/examples/main.go:40:10"
+ },
+ {
+ "key": [
+ "%s is visiting %s!\n"
+ ],
+ "message": {
+ "msg": "{Person} is visiting {Place}!\n"
+ },
+ "comment": "Person visiting a place.",
+ "args": [
+ {
+ "id": "Person",
+ "argNum": 1,
+ "format": [
+ "%s"
+ ],
+ "type": "string",
+ "underlyingType": "string",
+ "expr": "pp.Person",
+ "position": "golang.org/x/text/cmd/gotext/examples/main.go:54:3"
+ },
+ {
+ "id": "Place",
+ "argNum": 2,
+ "format": [
+ "%s"
+ ],
+ "type": "string",
+ "underlyingType": "string",
+ "expr": "pp.Place",
+ "comment": "Place the person is visiting.",
+ "position": "golang.org/x/text/cmd/gotext/examples/main.go:55:3"
+ }
+ ],
+ "position": "golang.org/x/text/cmd/gotext/examples/main.go:53:10"
+ },
+ {
+ "key": [
+ "%d files remaining!"
+ ],
+ "message": {
+ "msg": "{2} files remaining!"
+ },
+ "args": [
+ {
+ "id": "2",
+ "argNum": 1,
+ "format": [
+ "%d"
+ ],
+ "type": "int",
+ "underlyingType": "int",
+ "expr": "2",
+ "value": "2",
+ "position": "golang.org/x/text/cmd/gotext/examples/main.go:59:34"
+ }
+ ],
+ "position": "golang.org/x/text/cmd/gotext/examples/main.go:59:10"
+ },
+ {
+ "key": [
+ "%d more files remaining!"
+ ],
+ "message": {
+ "msg": "{N} more files remaining!"
+ },
+ "args": [
+ {
+ "id": "N",
+ "argNum": 1,
+ "format": [
+ "%d"
+ ],
+ "type": "int",
+ "underlyingType": "int",
+ "expr": "n",
+ "value": "2",
+ "position": "golang.org/x/text/cmd/gotext/examples/main.go:64:39"
+ }
+ ],
+ "position": "golang.org/x/text/cmd/gotext/examples/main.go:64:10"
+ },
+ {
+ "key": [
+ "Use the following code for your discount: %d\n"
+ ],
+ "message": {
+ "msg": "Use the following code for your discount: {ReferralCode}\n"
+ },
+ "args": [
+ {
+ "id": "ReferralCode",
+ "argNum": 1,
+ "format": [
+ "%d"
+ ],
+ "type": "golang.org/x/text/cmd/gotext/examples.referralCode",
+ "underlyingType": "int",
+ "expr": "c",
+ "position": "golang.org/x/text/cmd/gotext/examples/main.go:69:61"
+ }
+ ],
+ "position": "golang.org/x/text/cmd/gotext/examples/main.go:69:10"
+ }
+]
\ No newline at end of file
diff --git a/cmd/gotext/extract.go b/cmd/gotext/extract.go
index 79a9b59..88c7513 100644
--- a/cmd/gotext/extract.go
+++ b/cmd/gotext/extract.go
@@ -13,13 +13,16 @@
"go/constant"
"go/format"
"go/parser"
+ "go/token"
"go/types"
"io/ioutil"
"os"
"path"
"path/filepath"
"strings"
+ "unicode"
+ fmtparser "golang.org/x/text/internal/format"
"golang.org/x/tools/go/loader"
)
@@ -64,7 +67,7 @@
return buf.String()
}
- var translations []Translation
+ var messages []Message
for _, info := range iprog.InitialPackages() {
for _, f := range info.Files {
@@ -103,70 +106,88 @@
return true
}
+ fmtType, ok := m[meth.Obj().Name()]
+ if !ok {
+ return true
+ }
// argn is the index of the format string.
- argn, ok := m[meth.Obj().Name()]
- if !ok || argn >= len(call.Args) {
+ argn := fmtType.arg
+ if argn >= len(call.Args) {
return true
}
- // Skip calls with non-constant format string.
- fmtstr := info.Types[call.Args[argn]].Value
- if fmtstr == nil || fmtstr.Kind() != constant.String {
+ args := call.Args[fmtType.arg:]
+
+ fmtMsg, ok := msgStr(info, args[0])
+ if !ok {
+ // TODO: identify the type of the format argument. If it
+ // is not a string, multiple keys may be defined.
return true
}
-
- posn := conf.Fset.Position(call.Lparen)
- filepos := fmt.Sprintf("%s:%d:%d", filepath.Base(posn.Filename), posn.Line, posn.Column)
-
- // TODO: identify the type of the format argument. If it is not
- // a string, multiple keys may be defined.
- var key []string
-
- // TODO: replace substitutions (%v) with a translator friendly
- // notation. For instance:
- // "%d files remaining" -> "{numFiles} files remaining", or
- // "%d files remaining" -> "{arg1} files remaining"
- // Alternatively, this could be done at a later stage.
- msg := constant.StringVal(fmtstr)
-
- // Construct a Translation unit.
- c := Translation{
- Key: key,
- Position: filepath.Join(info.Pkg.Path(), filepos),
- Original: Text{Msg: msg},
- ExtractedComment: getComment(call.Args[0]),
- // TODO(fix): this doesn't get the before comment.
- // Comment: getComment(call),
- }
-
- for i, arg := range call.Args[argn+1:] {
- var val string
+ key := []string{fmtMsg}
+ arguments := []Argument{}
+ args = args[1:]
+ simArgs := make([]interface{}, len(args))
+ for i, arg := range args {
+ expr := print(arg)
+ val := ""
if v := info.Types[arg].Value; v != nil {
val = v.ExactString()
+ simArgs[i] = val
+ switch arg.(type) {
+ case *ast.BinaryExpr, *ast.UnaryExpr:
+ expr = val
+ }
}
- posn := conf.Fset.Position(arg.Pos())
- filepos := fmt.Sprintf("%s:%d:%d", filepath.Base(posn.Filename), posn.Line, posn.Column)
- c.Args = append(c.Args, Argument{
- ID: i + 1,
+ arguments = append(arguments, Argument{
+ ArgNum: i + 1,
Type: info.Types[arg].Type.String(),
UnderlyingType: info.Types[arg].Type.Underlying().String(),
- Expr: print(arg),
+ Expr: expr,
Value: val,
Comment: getComment(arg),
- Position: filepath.Join(info.Pkg.Path(), filepos),
+ Position: posString(conf, info, arg.Pos()),
// TODO report whether it implements
// interfaces plural.Interface,
// gender.Interface.
})
}
+ msg := ""
- translations = append(translations, c)
+ p := fmtparser.Parser{}
+ p.Reset(simArgs)
+ for p.SetFormat(fmtMsg); p.Scan(); {
+ switch p.Status {
+ case fmtparser.StatusText:
+ msg += p.Text()
+ case fmtparser.StatusSubstitution,
+ fmtparser.StatusBadWidthSubstitution,
+ fmtparser.StatusBadPrecSubstitution:
+ arg := arguments[p.ArgNum-1]
+ id := getID(&arg)
+ arguments[p.ArgNum-1].ID = id
+ // TODO: do we allow the same entry to be formatted
+ // differently within the same string, do we give
+ // a warning, or is this an error?
+ arguments[p.ArgNum-1].Format = append(arguments[p.ArgNum-1].Format, p.Text())
+ msg += fmt.Sprintf("{%s}", id)
+ }
+ }
+
+ messages = append(messages, Message{
+ Key: key,
+ Position: posString(conf, info, call.Lparen),
+ Message: Text{Msg: msg},
+ // TODO(fix): this doesn't get the before comment.
+ Comment: getComment(call.Args[0]),
+ Args: arguments,
+ })
return true
})
}
}
- data, err := json.MarshalIndent(translations, "", " ")
+ data, err := json.MarshalIndent(messages, "", " ")
if err != nil {
return err
}
@@ -181,15 +202,60 @@
return nil
}
+func posString(conf loader.Config, info *loader.PackageInfo, pos token.Pos) string {
+ p := conf.Fset.Position(pos)
+ file := fmt.Sprintf("%s:%d:%d", filepath.Base(p.Filename), p.Line, p.Column)
+ return filepath.Join(info.Pkg.Path(), file)
+}
+
// extractFuncs indicates the types and methods for which to extract strings,
// and which argument to extract.
// TODO: use the types in conf.Import("golang.org/x/text/message") to extract
// the correct instances.
-var extractFuncs = map[string]map[string]int{
+var extractFuncs = map[string]map[string]extractType{
// TODO: Printer -> *golang.org/x/text/message.Printer
"message.Printer": {
- "Printf": 0,
- "Sprintf": 0,
- "Fprintf": 1,
+ "Printf": extractType{arg: 0, format: true},
+ "Sprintf": extractType{arg: 0, format: true},
+ "Fprintf": extractType{arg: 1, format: true},
+
+ "Lookup": extractType{arg: 0},
},
}
+
+type extractType struct {
+ // format indicates if the next arg is a formatted string or whether to
+ // concatenate all arguments
+ format bool
+ // arg indicates the position of the argument to extract.
+ arg int
+}
+
+func getID(arg *Argument) string {
+ s := getLastComponent(arg.Expr)
+ s = strings.Replace(s, " ", "", -1)
+ // For small variable names, use user-defined types for more info.
+ if len(s) <= 2 && arg.UnderlyingType != arg.Type {
+ s = getLastComponent(arg.Type)
+ }
+ return strings.Title(s)
+}
+
+func getLastComponent(s string) string {
+ return s[1+strings.LastIndexByte(s, '.'):]
+}
+
+func msgStr(info *loader.PackageInfo, e ast.Expr) (s string, ok bool) {
+ v := info.Types[e].Value
+ if v == nil || v.Kind() != constant.String {
+ return "", false
+ }
+ s = constant.StringVal(v)
+ // Only record strings with letters.
+ for _, r := range s {
+ if unicode.In(r, unicode.L) {
+ return s, true
+ }
+ }
+ return "", false
+}
diff --git a/cmd/gotext/message.go b/cmd/gotext/message.go
index 67a622f..7344f8d 100644
--- a/cmd/gotext/message.go
+++ b/cmd/gotext/message.go
@@ -13,67 +13,25 @@
// A translation may have multiple translations strings, or messages, depending
// on the feature values of the various arguments. For instance, consider
// a hypothetical translation from English to English, where the source defines
-// the format string "%d file(s) remaining". A completed translation, expressed
-// in JS, for this format string could look like:
-//
-// {
-// "Key": [
-// "\"%d files(s) remaining\""
-// ],
-// "Original": {
-// "Msg": "\"%d files(s) remaining\""
-// },
-// "Translation": {
-// "Select": {
-// "Feature": "plural",
-// "Arg": 1,
-// "Case": {
-// "one": { "Msg": "1 file remaining" },
-// "other": { "Msg": "%d files remaining" }
-// },
-// },
-// },
-// "Args": [
-// {
-// "ID": 2,
-// "Type": "int",
-// "UnderlyingType": "int",
-// "Expr": "nFiles",
-// "Comment": "number of files remaining",
-// "Position": "golang.org/x/text/cmd/gotext/demo.go:34:3"
-// }
-// ],
-// "Position": "golang.org/x/text/cmd/gotext/demo.go:33:10",
-// }
-//
-// Alternatively, the Translation section could be written as:
-//
-// "Translation": {
-// "Msg": "%d %[files]s remaining",
-// "Var": {
-// "files" : {
-// "Select": {
-// "Feature": "plural",
-// "Arg": 1,
-// "Case": {
-// "one": { "Msg": "file" },
-// "other": { "Msg": "files" }
-// }
-// }
-// }
-// }
-// }
+// the format string "%d file(s) remaining".
+// See the examples directory for examples of extracted messages.
-// A Translation describes a translation for a single language for a single
-// message.
-type Translation struct {
+// A Message describes a message to be translated.
+type Message struct {
// Key contains a list of identifiers for the message. If this list is empty
- // Original is used as the key.
- Key []string `json:"key,omitempty"`
- Original Text `json:"original"`
- Translation Text `json:"translation"`
- ExtractedComment string `json:"extractedComment,omitempty"`
- TranslatorComment string `json:"translatorComment,omitempty"`
+ // the message itself is used as the key.
+ Key []string `json:"key,omitempty"`
+ Meaning string `json:"meaning,omitempty"`
+ Message Text `json:"message"`
+ Translation *Text `json:"translation,omitempty"`
+
+ Comment string `json:"comment,omitempty"`
+ TranslatorComment string `json:"translatorComment,omitempty"`
+
+ // TODO: have a separate placeholder list, mapping placeholders
+ // to arguments or constant strings.
+ // TODO: default placeholder syntax is {foo}. Allow alternatives
+ // like `foo`.
Args []Argument `json:"args,omitempty"`
@@ -83,13 +41,19 @@
// An Argument contains information about the arguments passed to a message.
type Argument struct {
- ID interface{} `json:"id"` // An int for printf-style calls, but could be a string.
- Type string `json:"type"`
- UnderlyingType string `json:"underlyingType"`
- Expr string `json:"expr"`
- Value string `json:"value,omitempty"`
- Comment string `json:"comment,omitempty"`
- Position string `json:"position,omitempty"`
+ ID string `json:"id"` // An int for printf-style calls, but could be a string.
+ // Argument position for printf-style format strings. ArgNum corresponds to
+ // the number that should be used for explicit argument indexes (e.g.
+ // "%[1]d").
+ ArgNum int `json:"argNum,omitempty"`
+ Format []string `json:"format,omitempty"`
+
+ Type string `json:"type"`
+ UnderlyingType string `json:"underlyingType"`
+ Expr string `json:"expr"`
+ Value string `json:"value,omitempty"`
+ Comment string `json:"comment,omitempty"`
+ Position string `json:"position,omitempty"`
// Features contains the features that are available for the implementation
// of this argument.
@@ -118,8 +82,8 @@
Example string `json:"example,omitempty"`
}
-// Type Select selects a Text based on the feature value associated with
-// a feature of a certain argument.
+// Select selects a Text based on the feature value associated with a feature of
+// a certain argument.
type Select struct {
Feature string `json:"feature"` // Name of variable or Feature type
Arg interface{} `json:"arg"` // The argument ID.