proto: simplify StructProperties

StructProperties were the old approach of pseudo-providing reflection
like functionality for Go protobufs. However, the API is fundamentally
too closely tied to Go structs to be easily extensible to the new
protobuf reflection API. In preparation for that migration work,
we simplify the implementation of StructProperties.

Changes made:
* Export Properties.{Proto3,Oneof} as these are sensible
pieces of information relevant to the proto type system.
* Delete StructProperties.{decoderTags,order} and Properties.{stype,sprop,mtype}
as nothing uses these anymore.
* Move the functionality of StructProperties.{reqCount,decoderOrigNames}
to be under text_parser.go, which is the only user of those fields.
* Avoid depending on the interface for XXX_Oneof, but use pure reflection.
This breaks the dependency on the Buffer type.
* Move unrelated declarations to lib.go such as:
protoMessageType, marshalerType, oneofFuncsIface, oneofWrappersIface

This set of changes make it such that all usages of StructProperties
within the proto package itself is on the public API of StructProperties
rather than some internal property.

It also makes it such that properties.go builds on its own with no
dependencies on any other source file inthe proto package.

Change-Id: I96414ce65116846c50e5509b5c9c9c9a65d3b6dc
Reviewed-on: https://go-review.googlesource.com/c/protobuf/+/167772
Reviewed-by: Herbie Ong <herbie@google.com>
diff --git a/proto/clone.go b/proto/clone.go
index aea0899..5f97f7a 100644
--- a/proto/clone.go
+++ b/proto/clone.go
@@ -176,7 +176,7 @@
 			// Edge case: if this is in a proto3 message, a zero length
 			// bytes field is considered the zero value, and should not
 			// be merged.
-			if prop != nil && prop.proto3 && in.Len() == 0 {
+			if prop != nil && prop.Proto3 && in.Len() == 0 {
 				return
 			}
 
diff --git a/proto/equal.go b/proto/equal.go
index c709790..88e094e 100644
--- a/proto/equal.go
+++ b/proto/equal.go
@@ -176,7 +176,7 @@
 
 			// Edge case: if this is in a proto3 message, a zero length
 			// bytes field is considered the zero value.
-			if prop != nil && prop.proto3 && v1.Len() == 0 && v2.Len() == 0 {
+			if prop != nil && prop.Proto3 && v1.Len() == 0 && v2.Len() == 0 {
 				return true
 			}
 			if v1.IsNil() != v2.IsNil() {
diff --git a/proto/lib.go b/proto/lib.go
index a35f5db..c18e4a8 100644
--- a/proto/lib.go
+++ b/proto/lib.go
@@ -86,6 +86,19 @@
 // Message is implemented by generated protocol buffer messages.
 type Message = protoapi.Message
 
+var protoMessageType = reflect.TypeOf((*Message)(nil)).Elem()
+
+var marshalerType = reflect.TypeOf((*Marshaler)(nil)).Elem()
+
+type (
+	oneofFuncsIface interface {
+		XXX_OneofFuncs() (func(Message, *Buffer) error, func(Message, int, int, *Buffer) (bool, error), func(Message) int, []interface{})
+	}
+	oneofWrappersIface interface {
+		XXX_OneofWrappers() []interface{}
+	}
+)
+
 // A Buffer is a buffer manager for marshaling and unmarshaling
 // protocol buffers.  It may be reused between invocations to
 // reduce memory usage.  It is not necessary to use a Buffer;
@@ -479,9 +492,8 @@
 // t is a struct type.
 func buildDefaultMessage(t reflect.Type) (dm defaultMessage) {
 	sprop := GetProperties(t)
-	for _, prop := range sprop.Prop {
-		fi, ok := sprop.decoderTags.get(prop.Tag)
-		if !ok {
+	for fi, prop := range sprop.Prop {
+		if prop.Tag <= 0 {
 			// XXX_unrecognized
 			continue
 		}
diff --git a/proto/properties.go b/proto/properties.go
index 538137a..0932558 100644
--- a/proto/properties.go
+++ b/proto/properties.go
@@ -4,15 +4,8 @@
 
 package proto
 
-/*
- * Routines for encoding data into the wire format for protocol buffers.
- */
-
 import (
-	"fmt"
-	"os"
 	"reflect"
-	"sort"
 	"strconv"
 	"strings"
 	"sync"
@@ -21,59 +14,16 @@
 // Constants that identify the encoding of a value on the wire.
 const (
 	WireVarint     = 0
+	WireFixed32    = 5
 	WireFixed64    = 1
 	WireBytes      = 2
 	WireStartGroup = 3
 	WireEndGroup   = 4
-	WireFixed32    = 5
 )
 
-// tagMap is an optimization over map[int]int for typical protocol buffer
-// use-cases. Encoded protocol buffers are often in tag order with small tag
-// numbers.
-type tagMap struct {
-	fastTags []int
-	slowTags map[int]int
-}
-
-// tagMapFastLimit is the upper bound on the tag number that will be stored in
-// the tagMap slice rather than its map.
-const tagMapFastLimit = 1024
-
-func (p *tagMap) get(t int) (int, bool) {
-	if t > 0 && t < tagMapFastLimit {
-		if t >= len(p.fastTags) {
-			return 0, false
-		}
-		fi := p.fastTags[t]
-		return fi, fi >= 0
-	}
-	fi, ok := p.slowTags[t]
-	return fi, ok
-}
-
-func (p *tagMap) put(t int, fi int) {
-	if t > 0 && t < tagMapFastLimit {
-		for len(p.fastTags) < t+1 {
-			p.fastTags = append(p.fastTags, -1)
-		}
-		p.fastTags[t] = fi
-		return
-	}
-	if p.slowTags == nil {
-		p.slowTags = make(map[int]int)
-	}
-	p.slowTags[t] = fi
-}
-
 // StructProperties represents properties for all the fields of a struct.
-// decoderTags and decoderOrigNames should only be used by the decoder.
 type StructProperties struct {
-	Prop             []*Properties  // properties for each field
-	reqCount         int            // required count
-	decoderTags      tagMap         // map from proto tag to struct field number
-	decoderOrigNames map[string]int // map from original name to struct field number
-	order            []int          // list of struct field numbers in tag order
+	Prop []*Properties // properties for each field
 
 	// OneofTypes contains information about the oneof fields in this message.
 	// It is keyed by the original name of a field.
@@ -87,14 +37,9 @@
 	Prop  *Properties
 }
 
-// Implement the sorting interface so we can sort the fields in tag order, as recommended by the spec.
-// See encode.go, (*Buffer).enc_struct.
-
-func (sp *StructProperties) Len() int { return len(sp.order) }
-func (sp *StructProperties) Less(i, j int) bool {
-	return sp.Prop[sp.order[i]].Tag < sp.Prop[sp.order[j]].Tag
-}
-func (sp *StructProperties) Swap(i, j int) { sp.order[i], sp.order[j] = sp.order[j], sp.order[i] }
+func (sp *StructProperties) Len() int           { return len(sp.Prop) }
+func (sp *StructProperties) Less(i, j int) bool { return false }
+func (sp *StructProperties) Swap(i, j int)      { return }
 
 // Properties represents the protocol-specific behavior of a single struct field.
 type Properties struct {
@@ -109,25 +54,20 @@
 	Repeated bool
 	Packed   bool   // relevant for repeated primitives only
 	Enum     string // set for enum types only
-	proto3   bool   // whether this is known to be a proto3 field
-	oneof    bool   // whether this is a oneof field
+	Proto3   bool   // whether this is known to be a proto3 field
+	Oneof    bool   // whether this is a oneof field
 
 	Default    string // default value
 	HasDefault bool   // whether an explicit default was provided
 
-	stype reflect.Type      // set for struct types only
-	sprop *StructProperties // set for struct types only
-
-	mtype      reflect.Type // set for map types only
-	MapKeyProp *Properties  // set for map types only
-	MapValProp *Properties  // set for map types only
+	MapKeyProp *Properties // set for map types only
+	MapValProp *Properties // set for map types only
 }
 
 // String formats the properties in the protobuf struct field tag style.
 func (p *Properties) String() string {
 	s := p.Wire
-	s += ","
-	s += strconv.Itoa(p.Tag)
+	s += "," + strconv.Itoa(p.Tag)
 	if p.Required {
 		s += ",req"
 	}
@@ -141,13 +81,13 @@
 		s += ",packed"
 	}
 	s += ",name=" + p.OrigName
-	if p.JSONName != p.OrigName {
+	if p.JSONName != "" {
 		s += ",json=" + p.JSONName
 	}
-	if p.proto3 {
+	if p.Proto3 {
 		s += ",proto3"
 	}
-	if p.oneof {
+	if p.Oneof {
 		s += ",oneof"
 	}
 	if len(p.Enum) > 0 {
@@ -160,252 +100,141 @@
 }
 
 // Parse populates p by parsing a string in the protobuf struct field tag style.
-func (p *Properties) Parse(s string) {
-	// "bytes,49,opt,name=foo,def=hello!"
-	fields := strings.Split(s, ",") // breaks def=, but handled below.
-	if len(fields) < 2 {
-		fmt.Fprintf(os.Stderr, "proto: tag has too few fields: %q\n", s)
-		return
-	}
-
-	p.Wire = fields[0]
-	switch p.Wire {
-	case "varint":
-		p.WireType = WireVarint
-	case "fixed32":
-		p.WireType = WireFixed32
-	case "fixed64":
-		p.WireType = WireFixed64
-	case "zigzag32":
-		p.WireType = WireVarint
-	case "zigzag64":
-		p.WireType = WireVarint
-	case "bytes", "group":
-		p.WireType = WireBytes
-		// no numeric converter for non-numeric types
-	default:
-		fmt.Fprintf(os.Stderr, "proto: tag has unknown wire type: %q\n", s)
-		return
-	}
-
-	var err error
-	p.Tag, err = strconv.Atoi(fields[1])
-	if err != nil {
-		return
-	}
-
-outer:
-	for i := 2; i < len(fields); i++ {
-		f := fields[i]
-		switch {
-		case f == "req":
-			p.Required = true
-		case f == "opt":
+func (p *Properties) Parse(tag string) {
+	// For example: "bytes,49,opt,name=foo,def=hello!"
+	for len(tag) > 0 {
+		i := strings.IndexByte(tag, ',')
+		if i < 0 {
+			i = len(tag)
+		}
+		switch s := tag[:i]; {
+		case strings.HasPrefix(s, "name="):
+			p.OrigName = s[len("name="):]
+		case strings.HasPrefix(s, "json="):
+			p.JSONName = s[len("json="):]
+		case strings.HasPrefix(s, "enum="):
+			p.Enum = s[len("enum="):]
+		case strings.Trim(s, "0123456789") == "":
+			n, _ := strconv.ParseUint(s, 10, 32)
+			p.Tag = int(n)
+		case s == "opt":
 			p.Optional = true
-		case f == "rep":
+		case s == "req":
+			p.Required = true
+		case s == "rep":
 			p.Repeated = true
-		case f == "packed":
+		case s == "varint" || s == "zigzag32" || s == "zigzag64":
+			p.Wire = s
+			p.WireType = WireVarint
+		case s == "fixed32":
+			p.Wire = s
+			p.WireType = WireFixed32
+		case s == "fixed64":
+			p.Wire = s
+			p.WireType = WireFixed64
+		case s == "bytes" || s == "group":
+			// NOTE: Historically, this used WireBytes even for groups,
+			// when it should have been WireStartGroup.
+			p.Wire = s
+			p.WireType = WireBytes
+		case s == "packed":
 			p.Packed = true
-		case strings.HasPrefix(f, "name="):
-			p.OrigName = f[5:]
-		case strings.HasPrefix(f, "json="):
-			p.JSONName = f[5:]
-		case strings.HasPrefix(f, "enum="):
-			p.Enum = f[5:]
-		case f == "proto3":
-			p.proto3 = true
-		case f == "oneof":
-			p.oneof = true
-		case strings.HasPrefix(f, "def="):
+		case s == "proto3":
+			p.Proto3 = true
+		case s == "oneof":
+			p.Oneof = true
+		case strings.HasPrefix(s, "def="):
+			// The default tag is special in that everything afterwards is the
+			// default regardless of the presence of commas.
 			p.HasDefault = true
-			p.Default = f[4:] // rest of string
-			if i+1 < len(fields) {
-				// Commas aren't escaped, and def is always last.
-				p.Default += "," + strings.Join(fields[i+1:], ",")
-				break outer
-			}
+			p.Default, i = tag[len("def="):], len(tag)
 		}
+		tag = strings.TrimPrefix(tag[i:], ",")
 	}
 }
 
-var protoMessageType = reflect.TypeOf((*Message)(nil)).Elem()
-
-// setFieldProps initializes the field properties for submessages and maps.
-func (p *Properties) setFieldProps(typ reflect.Type, f *reflect.StructField, lockGetProp bool) {
-	switch t1 := typ; t1.Kind() {
-	case reflect.Ptr:
-		if t1.Elem().Kind() == reflect.Struct {
-			p.stype = t1.Elem()
-		}
-
-	case reflect.Slice:
-		if t2 := t1.Elem(); t2.Kind() == reflect.Ptr && t2.Elem().Kind() == reflect.Struct {
-			p.stype = t2.Elem()
-		}
-
-	case reflect.Map:
-		p.mtype = t1
-		p.MapKeyProp = &Properties{}
-		p.MapKeyProp.init(reflect.PtrTo(p.mtype.Key()), "Key", f.Tag.Get("protobuf_key"), nil, lockGetProp)
-		p.MapValProp = &Properties{}
-		vtype := p.mtype.Elem()
-		if vtype.Kind() != reflect.Ptr && vtype.Kind() != reflect.Slice {
-			// The value type is not a message (*T) or bytes ([]byte),
-			// so we need encoders for the pointer to this type.
-			vtype = reflect.PtrTo(vtype)
-		}
-		p.MapValProp.init(vtype, "Value", f.Tag.Get("protobuf_val"), nil, lockGetProp)
-	}
-
-	if p.stype != nil {
-		if lockGetProp {
-			p.sprop = GetProperties(p.stype)
-		} else {
-			p.sprop = getPropertiesLocked(p.stype)
-		}
-	}
-}
-
-var (
-	marshalerType = reflect.TypeOf((*Marshaler)(nil)).Elem()
-)
-
 // Init populates the properties from a protocol buffer struct tag.
 func (p *Properties) Init(typ reflect.Type, name, tag string, f *reflect.StructField) {
-	p.init(typ, name, tag, f, true)
+	p.init(typ, name, tag, f)
 }
 
-func (p *Properties) init(typ reflect.Type, name, tag string, f *reflect.StructField, lockGetProp bool) {
-	// "bytes,49,opt,def=hello!"
+func (p *Properties) init(typ reflect.Type, name, tag string, f *reflect.StructField) {
 	p.Name = name
 	p.OrigName = name
 	if tag == "" {
 		return
 	}
 	p.Parse(tag)
-	p.setFieldProps(typ, f, lockGetProp)
+
+	if typ != nil && typ.Kind() == reflect.Map {
+		p.MapKeyProp = new(Properties)
+		p.MapKeyProp.init(nil, "Key", f.Tag.Get("protobuf_key"), nil)
+		p.MapValProp = new(Properties)
+		p.MapValProp.init(nil, "Value", f.Tag.Get("protobuf_val"), nil)
+	}
 }
 
-var (
-	propertiesMu  sync.RWMutex
-	propertiesMap = make(map[reflect.Type]*StructProperties)
-)
+var propertiesCache sync.Map // map[reflect.Type]*StructProperties
 
 // GetProperties returns the list of properties for the type represented by t.
 // t must represent a generated struct type of a protocol message.
 func GetProperties(t reflect.Type) *StructProperties {
+	if p, ok := propertiesCache.Load(t); ok {
+		return p.(*StructProperties)
+	}
+	p, _ := propertiesCache.LoadOrStore(t, newProperties(t))
+	return p.(*StructProperties)
+}
+
+func newProperties(t reflect.Type) *StructProperties {
 	if t.Kind() != reflect.Struct {
 		panic("proto: type must have kind struct")
 	}
 
-	// Most calls to GetProperties in a long-running program will be
-	// retrieving details for types we have seen before.
-	propertiesMu.RLock()
-	sprop, ok := propertiesMap[t]
-	propertiesMu.RUnlock()
-	if ok {
-		return sprop
-	}
-
-	propertiesMu.Lock()
-	sprop = getPropertiesLocked(t)
-	propertiesMu.Unlock()
-	return sprop
-}
-
-type (
-	oneofFuncsIface interface {
-		XXX_OneofFuncs() (func(Message, *Buffer) error, func(Message, int, int, *Buffer) (bool, error), func(Message) int, []interface{})
-	}
-	oneofWrappersIface interface {
-		XXX_OneofWrappers() []interface{}
-	}
-)
-
-// getPropertiesLocked requires that propertiesMu is held.
-func getPropertiesLocked(t reflect.Type) *StructProperties {
-	if prop, ok := propertiesMap[t]; ok {
-		return prop
-	}
-
 	prop := new(StructProperties)
-	// in case of recursive protos, fill this in now.
-	propertiesMap[t] = prop
 
-	// build properties
-	prop.Prop = make([]*Properties, t.NumField())
-	prop.order = make([]int, t.NumField())
-
+	// Construct a list of properties for each field in the struct.
 	for i := 0; i < t.NumField(); i++ {
-		f := t.Field(i)
 		p := new(Properties)
-		name := f.Name
-		p.init(f.Type, name, f.Tag.Get("protobuf"), &f, false)
+		f := t.Field(i)
+		p.init(f.Type, f.Name, f.Tag.Get("protobuf"), &f)
 
-		oneof := f.Tag.Get("protobuf_oneof") // special case
-		if oneof != "" {
-			// Oneof fields don't use the traditional protobuf tag.
-			p.OrigName = oneof
+		if name := f.Tag.Get("protobuf_oneof"); name != "" {
+			p.OrigName = name
 		}
-		prop.Prop[i] = p
-		prop.order[i] = i
+		prop.Prop = append(prop.Prop, p)
 	}
 
-	// Re-order prop.order.
-	sort.Sort(prop)
-
-	var oots []interface{}
-	switch m := reflect.Zero(reflect.PtrTo(t)).Interface().(type) {
-	case oneofFuncsIface:
-		_, _, _, oots = m.XXX_OneofFuncs()
-	case oneofWrappersIface:
-		oots = m.XXX_OneofWrappers()
+	// Construct a mapping of oneof field names to properties.
+	var oneofWrappers []interface{}
+	if fn, ok := reflect.PtrTo(t).MethodByName("XXX_OneofFuncs"); ok {
+		oneofWrappers = fn.Func.Call([]reflect.Value{reflect.Zero(fn.Type.In(0))})[3].Interface().([]interface{})
 	}
-	if len(oots) > 0 {
-		// Interpret oneof metadata.
+	if fn, ok := reflect.PtrTo(t).MethodByName("XXX_OneofWrappers"); ok {
+		oneofWrappers = fn.Func.Call([]reflect.Value{reflect.Zero(fn.Type.In(0))})[0].Interface().([]interface{})
+	}
+	if len(oneofWrappers) > 0 {
 		prop.OneofTypes = make(map[string]*OneofProperties)
-		for _, oot := range oots {
-			oop := &OneofProperties{
-				Type: reflect.ValueOf(oot).Type(), // *T
+		for _, wrapper := range oneofWrappers {
+			p := &OneofProperties{
+				Type: reflect.ValueOf(wrapper).Type(), // *T
 				Prop: new(Properties),
 			}
-			sft := oop.Type.Elem().Field(0)
-			oop.Prop.Name = sft.Name
-			oop.Prop.Parse(sft.Tag.Get("protobuf"))
-			// There will be exactly one interface field that
-			// this new value is assignable to.
-			for i := 0; i < t.NumField(); i++ {
-				f := t.Field(i)
-				if f.Type.Kind() != reflect.Interface {
-					continue
-				}
-				if !oop.Type.AssignableTo(f.Type) {
-					continue
-				}
-				oop.Field = i
-				break
-			}
-			prop.OneofTypes[oop.Prop.OrigName] = oop
-		}
-	}
+			f := p.Type.Elem().Field(0)
+			p.Prop.Name = f.Name
+			p.Prop.Parse(f.Tag.Get("protobuf"))
 
-	// build required counts
-	// build tags
-	reqCount := 0
-	prop.decoderOrigNames = make(map[string]int)
-	for i, p := range prop.Prop {
-		if strings.HasPrefix(p.Name, "XXX_") {
-			// Internal fields should not appear in tags/origNames maps.
-			// They are handled specially when encoding and decoding.
-			continue
+			// Determine the struct field that contains this oneof.
+			// Each wrapper is assignable to exactly one parent field.
+			for i := 0; i < t.NumField(); i++ {
+				if p.Type.AssignableTo(t.Field(i).Type) {
+					p.Field = i
+					break
+				}
+			}
+			prop.OneofTypes[p.Prop.OrigName] = p
 		}
-		if p.Required {
-			reqCount++
-		}
-		prop.decoderTags.put(p.Tag, i)
-		prop.decoderOrigNames[p.OrigName] = i
 	}
-	prop.reqCount = reqCount
 
 	return prop
 }
diff --git a/proto/table_merge.go b/proto/table_merge.go
index 4edb4f4..cb30bdc 100644
--- a/proto/table_merge.go
+++ b/proto/table_merge.go
@@ -465,7 +465,7 @@
 				}
 			}
 		case reflect.Slice:
-			isProto3 := props.Prop[i].proto3
+			isProto3 := props.Prop[i].Proto3
 			switch {
 			case isPointer:
 				panic("bad pointer in byte slice case in " + tf.Name())
diff --git a/proto/text.go b/proto/text.go
index dcbd98d..439bb76 100644
--- a/proto/text.go
+++ b/proto/text.go
@@ -364,7 +364,7 @@
 			}
 			continue
 		}
-		if props.proto3 && fv.Kind() == reflect.Slice && fv.Len() == 0 {
+		if props.Proto3 && fv.Kind() == reflect.Slice && fv.Len() == 0 {
 			// empty bytes field
 			continue
 		}
diff --git a/proto/text_parser.go b/proto/text_parser.go
index 0371f0d..543176e 100644
--- a/proto/text_parser.go
+++ b/proto/text_parser.go
@@ -14,6 +14,7 @@
 	"reflect"
 	"strconv"
 	"strings"
+	"sync"
 	"unicode/utf8"
 
 	"github.com/golang/protobuf/protoapi"
@@ -357,7 +358,7 @@
 }
 
 // Returns the index in the struct for the named field, as well as the parsed tag properties.
-func structFieldByName(sprops *StructProperties, name string) (int, *Properties, bool) {
+func structFieldByName(sprops *textStructProperties, name string) (int, *Properties, bool) {
 	i, ok := sprops.decoderOrigNames[name]
 	if ok {
 		return i, sprops.Prop[i], true
@@ -407,9 +408,42 @@
 	return nil
 }
 
+var textPropertiesCache sync.Map // map[reflect.Type]*textStructProperties
+
+type textStructProperties struct {
+	*StructProperties
+	reqCount         int
+	decoderOrigNames map[string]int
+}
+
+func getTextProperties(t reflect.Type) *textStructProperties {
+	if p, ok := textPropertiesCache.Load(t); ok {
+		return p.(*textStructProperties)
+	}
+
+	prop := &textStructProperties{StructProperties: GetProperties(t)}
+	reqCount := 0
+	prop.decoderOrigNames = make(map[string]int)
+	for i, p := range prop.Prop {
+		if strings.HasPrefix(p.Name, "XXX_") {
+			// Internal fields should not appear in tags/origNames maps.
+			// They are handled specially when encoding and decoding.
+			continue
+		}
+		if p.Required {
+			reqCount++
+		}
+		prop.decoderOrigNames[p.OrigName] = i
+	}
+	prop.reqCount = reqCount
+
+	textPropertiesCache.Store(t, prop)
+	return prop
+}
+
 func (p *textParser) readStruct(sv reflect.Value, terminator string) error {
 	st := sv.Type()
-	sprops := GetProperties(st)
+	sprops := getTextProperties(st)
 	reqCount := sprops.reqCount
 	var reqFieldErr error
 	fieldSet := make(map[string]bool)