cmd/gotext: improved extraction

- split extraction and translation data in two types
- rewrite fmt strings into translator-readable format
- more intelligent name picking, for instance:
  - use variable name for placeholders
  - if var is too short, use type name, if it differs
    from the underlying type.

Change-Id: I80ae9c165892491df6fcedc340d56a08269f47fe
Reviewed-on: https://go-review.googlesource.com/79237
Run-TryBot: Marcel van Lohuizen <mpvl@golang.org>
Reviewed-by: Nigel Tao <nigeltao@golang.org>
diff --git a/cmd/gotext/examples/main.go b/cmd/gotext/examples/main.go
new file mode 100644
index 0000000..d71bbd6
--- /dev/null
+++ b/cmd/gotext/examples/main.go
@@ -0,0 +1,70 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+//go:generate gotext extract
+
+import (
+	"golang.org/x/text/language"
+	"golang.org/x/text/message"
+)
+
+func main() {
+	p := message.NewPrinter(language.English)
+
+	p.Print("Hello world!\n")
+
+	p.Println("Hello", "world!")
+
+	person := "Sheila"
+	place := "Zürich"
+
+	p.Print("Hello ", person, " in ", place, "!\n")
+
+	// Greet a city.
+	p.Print("Hello city!\n")
+
+	city := "Amsterdam"
+	// Greet a city.
+	p.Printf("Hello %s!\n", city)
+
+	town := "Amsterdam"
+	// Greet a town.
+	p.Printf("Hello %s!\n",
+		town, // Town
+	)
+
+	// Person visiting a place.
+	p.Printf("%s is visiting %s!\n",
+		person, // The person of matter.
+		place,  // Place the person is visiting.
+	)
+
+	pp := struct {
+		Person string // The person of matter. // TODO: get this comment.
+		Place  string
+	}{
+		person, place,
+	}
+
+	// extract will drop this comment in favor of the one below.
+	p.Printf("%s is visiting %s!\n", // Person visiting a place.
+		pp.Person,
+		pp.Place, // Place the person is visiting.
+	)
+
+	// Numeric literal
+	p.Printf("%d files remaining!", 2)
+
+	const n = 2
+
+	// Numeric var
+	p.Printf("%d more files remaining!", n)
+
+	type referralCode int
+
+	c := referralCode(5)
+	p.Printf("Use the following code for your discount: %d\n", c)
+}
diff --git a/cmd/gotext/examples/textdata/gotext_en.out.json b/cmd/gotext/examples/textdata/gotext_en.out.json
new file mode 100755
index 0000000..61de664
--- /dev/null
+++ b/cmd/gotext/examples/textdata/gotext_en.out.json
@@ -0,0 +1,185 @@
+[
+    {
+        "key": [
+            "Hello %s!\n"
+        ],
+        "message": {
+            "msg": "Hello {City}!\n"
+        },
+        "args": [
+            {
+                "id": "City",
+                "argNum": 1,
+                "format": [
+                    "%s"
+                ],
+                "type": "string",
+                "underlyingType": "string",
+                "expr": "city",
+                "position": "golang.org/x/text/cmd/gotext/examples/main.go:31:26"
+            }
+        ],
+        "position": "golang.org/x/text/cmd/gotext/examples/main.go:31:10"
+    },
+    {
+        "key": [
+            "Hello %s!\n"
+        ],
+        "message": {
+            "msg": "Hello {Town}!\n"
+        },
+        "args": [
+            {
+                "id": "Town",
+                "argNum": 1,
+                "format": [
+                    "%s"
+                ],
+                "type": "string",
+                "underlyingType": "string",
+                "expr": "town",
+                "comment": "Town",
+                "position": "golang.org/x/text/cmd/gotext/examples/main.go:36:3"
+            }
+        ],
+        "position": "golang.org/x/text/cmd/gotext/examples/main.go:35:10"
+    },
+    {
+        "key": [
+            "%s is visiting %s!\n"
+        ],
+        "message": {
+            "msg": "{Person} is visiting {Place}!\n"
+        },
+        "args": [
+            {
+                "id": "Person",
+                "argNum": 1,
+                "format": [
+                    "%s"
+                ],
+                "type": "string",
+                "underlyingType": "string",
+                "expr": "person",
+                "comment": "The person of matter.",
+                "position": "golang.org/x/text/cmd/gotext/examples/main.go:41:3"
+            },
+            {
+                "id": "Place",
+                "argNum": 2,
+                "format": [
+                    "%s"
+                ],
+                "type": "string",
+                "underlyingType": "string",
+                "expr": "place",
+                "comment": "Place the person is visiting.",
+                "position": "golang.org/x/text/cmd/gotext/examples/main.go:42:3"
+            }
+        ],
+        "position": "golang.org/x/text/cmd/gotext/examples/main.go:40:10"
+    },
+    {
+        "key": [
+            "%s is visiting %s!\n"
+        ],
+        "message": {
+            "msg": "{Person} is visiting {Place}!\n"
+        },
+        "comment": "Person visiting a place.",
+        "args": [
+            {
+                "id": "Person",
+                "argNum": 1,
+                "format": [
+                    "%s"
+                ],
+                "type": "string",
+                "underlyingType": "string",
+                "expr": "pp.Person",
+                "position": "golang.org/x/text/cmd/gotext/examples/main.go:54:3"
+            },
+            {
+                "id": "Place",
+                "argNum": 2,
+                "format": [
+                    "%s"
+                ],
+                "type": "string",
+                "underlyingType": "string",
+                "expr": "pp.Place",
+                "comment": "Place the person is visiting.",
+                "position": "golang.org/x/text/cmd/gotext/examples/main.go:55:3"
+            }
+        ],
+        "position": "golang.org/x/text/cmd/gotext/examples/main.go:53:10"
+    },
+    {
+        "key": [
+            "%d files remaining!"
+        ],
+        "message": {
+            "msg": "{2} files remaining!"
+        },
+        "args": [
+            {
+                "id": "2",
+                "argNum": 1,
+                "format": [
+                    "%d"
+                ],
+                "type": "int",
+                "underlyingType": "int",
+                "expr": "2",
+                "value": "2",
+                "position": "golang.org/x/text/cmd/gotext/examples/main.go:59:34"
+            }
+        ],
+        "position": "golang.org/x/text/cmd/gotext/examples/main.go:59:10"
+    },
+    {
+        "key": [
+            "%d more files remaining!"
+        ],
+        "message": {
+            "msg": "{N} more files remaining!"
+        },
+        "args": [
+            {
+                "id": "N",
+                "argNum": 1,
+                "format": [
+                    "%d"
+                ],
+                "type": "int",
+                "underlyingType": "int",
+                "expr": "n",
+                "value": "2",
+                "position": "golang.org/x/text/cmd/gotext/examples/main.go:64:39"
+            }
+        ],
+        "position": "golang.org/x/text/cmd/gotext/examples/main.go:64:10"
+    },
+    {
+        "key": [
+            "Use the following code for your discount: %d\n"
+        ],
+        "message": {
+            "msg": "Use the following code for your discount: {ReferralCode}\n"
+        },
+        "args": [
+            {
+                "id": "ReferralCode",
+                "argNum": 1,
+                "format": [
+                    "%d"
+                ],
+                "type": "golang.org/x/text/cmd/gotext/examples.referralCode",
+                "underlyingType": "int",
+                "expr": "c",
+                "position": "golang.org/x/text/cmd/gotext/examples/main.go:69:61"
+            }
+        ],
+        "position": "golang.org/x/text/cmd/gotext/examples/main.go:69:10"
+    }
+]
\ No newline at end of file
diff --git a/cmd/gotext/extract.go b/cmd/gotext/extract.go
index 79a9b59..88c7513 100644
--- a/cmd/gotext/extract.go
+++ b/cmd/gotext/extract.go
@@ -13,13 +13,16 @@
 	"go/constant"
 	"go/format"
 	"go/parser"
+	"go/token"
 	"go/types"
 	"io/ioutil"
 	"os"
 	"path"
 	"path/filepath"
 	"strings"
+	"unicode"
 
+	fmtparser "golang.org/x/text/internal/format"
 	"golang.org/x/tools/go/loader"
 )
 
@@ -64,7 +67,7 @@
 		return buf.String()
 	}
 
-	var translations []Translation
+	var messages []Message
 
 	for _, info := range iprog.InitialPackages() {
 		for _, f := range info.Files {
@@ -103,70 +106,88 @@
 					return true
 				}
 
+				fmtType, ok := m[meth.Obj().Name()]
+				if !ok {
+					return true
+				}
 				// argn is the index of the format string.
-				argn, ok := m[meth.Obj().Name()]
-				if !ok || argn >= len(call.Args) {
+				argn := fmtType.arg
+				if argn >= len(call.Args) {
 					return true
 				}
 
-				// Skip calls with non-constant format string.
-				fmtstr := info.Types[call.Args[argn]].Value
-				if fmtstr == nil || fmtstr.Kind() != constant.String {
+				args := call.Args[fmtType.arg:]
+
+				fmtMsg, ok := msgStr(info, args[0])
+				if !ok {
+					// TODO: identify the type of the format argument. If it
+					// is not a string, multiple keys may be defined.
 					return true
 				}
-
-				posn := conf.Fset.Position(call.Lparen)
-				filepos := fmt.Sprintf("%s:%d:%d", filepath.Base(posn.Filename), posn.Line, posn.Column)
-
-				// TODO: identify the type of the format argument. If it is not
-				// a string, multiple keys may be defined.
-				var key []string
-
-				// TODO: replace substitutions (%v) with a translator friendly
-				// notation. For instance:
-				//     "%d files remaining" -> "{numFiles} files remaining", or
-				//     "%d files remaining" -> "{arg1} files remaining"
-				// Alternatively, this could be done at a later stage.
-				msg := constant.StringVal(fmtstr)
-
-				// Construct a Translation unit.
-				c := Translation{
-					Key:              key,
-					Position:         filepath.Join(info.Pkg.Path(), filepos),
-					Original:         Text{Msg: msg},
-					ExtractedComment: getComment(call.Args[0]),
-					// TODO(fix): this doesn't get the before comment.
-					// Comment: getComment(call),
-				}
-
-				for i, arg := range call.Args[argn+1:] {
-					var val string
+				key := []string{fmtMsg}
+				arguments := []Argument{}
+				args = args[1:]
+				simArgs := make([]interface{}, len(args))
+				for i, arg := range args {
+					expr := print(arg)
+					val := ""
 					if v := info.Types[arg].Value; v != nil {
 						val = v.ExactString()
+						simArgs[i] = val
+						switch arg.(type) {
+						case *ast.BinaryExpr, *ast.UnaryExpr:
+							expr = val
+						}
 					}
-					posn := conf.Fset.Position(arg.Pos())
-					filepos := fmt.Sprintf("%s:%d:%d", filepath.Base(posn.Filename), posn.Line, posn.Column)
-					c.Args = append(c.Args, Argument{
-						ID:             i + 1,
+					arguments = append(arguments, Argument{
+						ArgNum:         i + 1,
 						Type:           info.Types[arg].Type.String(),
 						UnderlyingType: info.Types[arg].Type.Underlying().String(),
-						Expr:           print(arg),
+						Expr:           expr,
 						Value:          val,
 						Comment:        getComment(arg),
-						Position:       filepath.Join(info.Pkg.Path(), filepos),
+						Position:       posString(conf, info, arg.Pos()),
 						// TODO report whether it implements
 						// interfaces plural.Interface,
 						// gender.Interface.
 					})
 				}
+				msg := ""
 
-				translations = append(translations, c)
+				p := fmtparser.Parser{}
+				p.Reset(simArgs)
+				for p.SetFormat(fmtMsg); p.Scan(); {
+					switch p.Status {
+					case fmtparser.StatusText:
+						msg += p.Text()
+					case fmtparser.StatusSubstitution,
+						fmtparser.StatusBadWidthSubstitution,
+						fmtparser.StatusBadPrecSubstitution:
+						arg := arguments[p.ArgNum-1]
+						id := getID(&arg)
+						arguments[p.ArgNum-1].ID = id
+						// TODO: do we allow the same entry to be formatted
+						// differently within the same string, do we give
+						// a warning, or is this an error?
+						arguments[p.ArgNum-1].Format = append(arguments[p.ArgNum-1].Format, p.Text())
+						msg += fmt.Sprintf("{%s}", id)
+					}
+				}
+
+				messages = append(messages, Message{
+					Key:      key,
+					Position: posString(conf, info, call.Lparen),
+					Message:  Text{Msg: msg},
+					// TODO(fix): this doesn't get the before comment.
+					Comment: getComment(call.Args[0]),
+					Args:    arguments,
+				})
 				return true
 			})
 		}
 	}
 
-	data, err := json.MarshalIndent(translations, "", "    ")
+	data, err := json.MarshalIndent(messages, "", "    ")
 	if err != nil {
 		return err
 	}
@@ -181,15 +202,60 @@
 	return nil
 }
 
+func posString(conf loader.Config, info *loader.PackageInfo, pos token.Pos) string {
+	p := conf.Fset.Position(pos)
+	file := fmt.Sprintf("%s:%d:%d", filepath.Base(p.Filename), p.Line, p.Column)
+	return filepath.Join(info.Pkg.Path(), file)
+}
+
 // extractFuncs indicates the types and methods for which to extract strings,
 // and which argument to extract.
 // TODO: use the types in conf.Import("golang.org/x/text/message") to extract
 // the correct instances.
-var extractFuncs = map[string]map[string]int{
+var extractFuncs = map[string]map[string]extractType{
 	// TODO: Printer -> *golang.org/x/text/message.Printer
 	"message.Printer": {
-		"Printf":  0,
-		"Sprintf": 0,
-		"Fprintf": 1,
+		"Printf":  extractType{arg: 0, format: true},
+		"Sprintf": extractType{arg: 0, format: true},
+		"Fprintf": extractType{arg: 1, format: true},
+
+		"Lookup": extractType{arg: 0},
 	},
 }
+
+type extractType struct {
+	// format indicates if the next arg is a formatted string or whether to
+	// concatenate all arguments
+	format bool
+	// arg indicates the position of the argument to extract.
+	arg int
+}
+
+func getID(arg *Argument) string {
+	s := getLastComponent(arg.Expr)
+	s = strings.Replace(s, " ", "", -1)
+	// For small variable names, use user-defined types for more info.
+	if len(s) <= 2 && arg.UnderlyingType != arg.Type {
+		s = getLastComponent(arg.Type)
+	}
+	return strings.Title(s)
+}
+
+func getLastComponent(s string) string {
+	return s[1+strings.LastIndexByte(s, '.'):]
+}
+
+func msgStr(info *loader.PackageInfo, e ast.Expr) (s string, ok bool) {
+	v := info.Types[e].Value
+	if v == nil || v.Kind() != constant.String {
+		return "", false
+	}
+	s = constant.StringVal(v)
+	// Only record strings with letters.
+	for _, r := range s {
+		if unicode.In(r, unicode.L) {
+			return s, true
+		}
+	}
+	return "", false
+}
diff --git a/cmd/gotext/message.go b/cmd/gotext/message.go
index 67a622f..7344f8d 100644
--- a/cmd/gotext/message.go
+++ b/cmd/gotext/message.go
@@ -13,67 +13,25 @@
 // A translation may have multiple translations strings, or messages, depending
 // on the feature values of the various arguments. For instance, consider
 // a hypothetical translation from English to English, where the source defines
-// the format string "%d file(s) remaining". A completed translation, expressed
-// in JS, for this format string could look like:
-//
-// {
-//     "Key": [
-//         "\"%d files(s) remaining\""
-//     ],
-//     "Original": {
-//         "Msg": "\"%d files(s) remaining\""
-//     },
-//     "Translation": {
-// 	       "Select": {
-// 	           "Feature": "plural",
-//             "Arg": 1,
-//             "Case": {
-//                 "one":   { "Msg": "1 file remaining" },
-//                 "other": { "Msg": "%d files remaining" }
-//             },
-//         },
-//     },
-//     "Args": [
-//         {
-//             "ID": 2,
-//             "Type": "int",
-//             "UnderlyingType": "int",
-//             "Expr": "nFiles",
-//             "Comment": "number of files remaining",
-//             "Position": "golang.org/x/text/cmd/gotext/demo.go:34:3"
-//         }
-//     ],
-//     "Position": "golang.org/x/text/cmd/gotext/demo.go:33:10",
-// }
-//
-// Alternatively, the Translation section could be written as:
-//
-//     "Translation": {
-// 	       "Msg": "%d %[files]s remaining",
-//         "Var": {
-//             "files" : {
-//                 "Select": {
-//         	           "Feature": "plural",
-//                     "Arg": 1,
-//                     "Case": {
-//                         "one":   { "Msg": "file" },
-//                         "other": { "Msg": "files" }
-//                     }
-//                 }
-//             }
-//         }
-//     }
+// the format string "%d file(s) remaining".
+// See the examples directory for examples of extracted messages.
 
-// A Translation describes a translation for a single language for a single
-// message.
-type Translation struct {
+// A Message describes a message to be translated.
+type Message struct {
 	// Key contains a list of identifiers for the message. If this list is empty
-	// Original is used as the key.
-	Key               []string `json:"key,omitempty"`
-	Original          Text     `json:"original"`
-	Translation       Text     `json:"translation"`
-	ExtractedComment  string   `json:"extractedComment,omitempty"`
-	TranslatorComment string   `json:"translatorComment,omitempty"`
+	// the message itself is used as the key.
+	Key         []string `json:"key,omitempty"`
+	Meaning     string   `json:"meaning,omitempty"`
+	Message     Text     `json:"message"`
+	Translation *Text    `json:"translation,omitempty"`
+
+	Comment           string `json:"comment,omitempty"`
+	TranslatorComment string `json:"translatorComment,omitempty"`
+
+	// TODO: have a separate placeholder list, mapping placeholders
+	// to arguments or constant strings.
+	// TODO: default placeholder syntax is {foo}. Allow alternatives
+	// like `foo`.
 
 	Args []Argument `json:"args,omitempty"`
 
@@ -83,13 +41,19 @@
 
 // An Argument contains information about the arguments passed to a message.
 type Argument struct {
-	ID             interface{} `json:"id"` // An int for printf-style calls, but could be a string.
-	Type           string      `json:"type"`
-	UnderlyingType string      `json:"underlyingType"`
-	Expr           string      `json:"expr"`
-	Value          string      `json:"value,omitempty"`
-	Comment        string      `json:"comment,omitempty"`
-	Position       string      `json:"position,omitempty"`
+	ID string `json:"id"` // An int for printf-style calls, but could be a string.
+	// Argument position for printf-style format strings. ArgNum corresponds to
+	// the number that should be used for explicit argument indexes (e.g.
+	// "%[1]d").
+	ArgNum int      `json:"argNum,omitempty"`
+	Format []string `json:"format,omitempty"`
+
+	Type           string `json:"type"`
+	UnderlyingType string `json:"underlyingType"`
+	Expr           string `json:"expr"`
+	Value          string `json:"value,omitempty"`
+	Comment        string `json:"comment,omitempty"`
+	Position       string `json:"position,omitempty"`
 
 	// Features contains the features that are available for the implementation
 	// of this argument.
@@ -118,8 +82,8 @@
 	Example string `json:"example,omitempty"`
 }
 
-// Type Select selects a Text based on the feature value associated with
-// a feature of a certain argument.
+// Select selects a Text based on the feature value associated with a feature of
+// a certain argument.
 type Select struct {
 	Feature string          `json:"feature"` // Name of variable or Feature type
 	Arg     interface{}     `json:"arg"`     // The argument ID.