http2: optimize server frame writes

Don't do async frame writes when the write is known to be small enough
to definitely fit in the write buffer and not cause a flush (which
might block). This avoids starting a new goroutine and doing a channel
send operation for many frame writes.

name                  old time/op    new time/op    delta
ServerGets-4             146µs ± 2%     144µs ± 1%  -1.46%        (p=0.000 n=14+14)
ServerPosts-4            162µs ± 1%     160µs ± 1%  -1.15%        (p=0.000 n=15+15)
Server_GetRequest-4      145µs ± 1%     143µs ± 0%  -1.26%        (p=0.000 n=15+15)
Server_PostRequest-4     160µs ± 1%     158µs ± 1%  -1.32%        (p=0.000 n=15+15)

The headers frame is the main last one which might show some benefit
if there's a cheap enough way to conservatively calculate its size.

Change-Id: I9be2ddbf04689340819d8701ea671fff378d9e79
Reviewed-on: https://go-review.googlesource.com/31495
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
diff --git a/http2/http2.go b/http2/http2.go
index dd6824d..68afcc4 100644
--- a/http2/http2.go
+++ b/http2/http2.go
@@ -254,14 +254,27 @@
 	return &bufferedWriter{w: w}
 }
 
+// bufWriterPoolBufferSize is the size of bufio.Writer's
+// buffers created using bufWriterPool.
+//
+// TODO: pick a less arbitrary value? this is a bit under
+// (3 x typical 1500 byte MTU) at least. Other than that,
+// not much thought went into it.
+const bufWriterPoolBufferSize = 4 << 10
+
 var bufWriterPool = sync.Pool{
 	New: func() interface{} {
-		// TODO: pick something better? this is a bit under
-		// (3 x typical 1500 byte MTU) at least.
-		return bufio.NewWriterSize(nil, 4<<10)
+		return bufio.NewWriterSize(nil, bufWriterPoolBufferSize)
 	},
 }
 
+func (w *bufferedWriter) Available() int {
+	if w.bw == nil {
+		return bufWriterPoolBufferSize
+	}
+	return w.bw.Available()
+}
+
 func (w *bufferedWriter) Write(p []byte) (n int, err error) {
 	if w.bw == nil {
 		bw := bufWriterPool.Get().(*bufio.Writer)
diff --git a/http2/server.go b/http2/server.go
index a76b16a..345b08c 100644
--- a/http2/server.go
+++ b/http2/server.go
@@ -884,7 +884,12 @@
 
 	sc.writingFrame = true
 	sc.needsFrameFlush = true
-	go sc.writeFrameAsync(wr)
+	if wr.write.staysWithinBuffer(sc.bw.Available()) {
+		err := wr.write.writeFrame(sc)
+		sc.wroteFrame(frameWriteResult{wr, err})
+	} else {
+		go sc.writeFrameAsync(wr)
+	}
 }
 
 // errHandlerPanicked is the error given to any callers blocked in a read from
diff --git a/http2/write.go b/http2/write.go
index 27ef0dd..a45d6de 100644
--- a/http2/write.go
+++ b/http2/write.go
@@ -18,6 +18,11 @@
 // writeFramer is implemented by any type that is used to write frames.
 type writeFramer interface {
 	writeFrame(writeContext) error
+
+	// staysWithinBuffer reports whether this writer promises that
+	// it will only write less than or equal to size bytes, and it
+	// won't Flush the write context.
+	staysWithinBuffer(size int) bool
 }
 
 // writeContext is the interface needed by the various frame writer
@@ -62,8 +67,16 @@
 	return ctx.Flush()
 }
 
+func (flushFrameWriter) staysWithinBuffer(max int) bool { return false }
+
 type writeSettings []Setting
 
+func (s writeSettings) staysWithinBuffer(max int) bool {
+	const settingSize = 6 // uint16 + uint32
+	return frameHeaderLen+settingSize*len(s) <= max
+
+}
+
 func (s writeSettings) writeFrame(ctx writeContext) error {
 	return ctx.Framer().WriteSettings([]Setting(s)...)
 }
@@ -83,6 +96,8 @@
 	return err
 }
 
+func (*writeGoAway) staysWithinBuffer(max int) bool { return false } // flushes
+
 type writeData struct {
 	streamID  uint32
 	p         []byte
@@ -97,6 +112,10 @@
 	return ctx.Framer().WriteData(w.streamID, w.endStream, w.p)
 }
 
+func (w *writeData) staysWithinBuffer(max int) bool {
+	return frameHeaderLen+len(w.p) <= max
+}
+
 // handlerPanicRST is the message sent from handler goroutines when
 // the handler panics.
 type handlerPanicRST struct {
@@ -107,22 +126,30 @@
 	return ctx.Framer().WriteRSTStream(hp.StreamID, ErrCodeInternal)
 }
 
+func (hp handlerPanicRST) staysWithinBuffer(max int) bool { return frameHeaderLen+4 <= max }
+
 func (se StreamError) writeFrame(ctx writeContext) error {
 	return ctx.Framer().WriteRSTStream(se.StreamID, se.Code)
 }
 
+func (se StreamError) staysWithinBuffer(max int) bool { return frameHeaderLen+4 <= max }
+
 type writePingAck struct{ pf *PingFrame }
 
 func (w writePingAck) writeFrame(ctx writeContext) error {
 	return ctx.Framer().WritePing(true, w.pf.Data)
 }
 
+func (w writePingAck) staysWithinBuffer(max int) bool { return frameHeaderLen+len(w.pf.Data) <= max }
+
 type writeSettingsAck struct{}
 
 func (writeSettingsAck) writeFrame(ctx writeContext) error {
 	return ctx.Framer().WriteSettingsAck()
 }
 
+func (writeSettingsAck) staysWithinBuffer(max int) bool { return frameHeaderLen <= max }
+
 // writeResHeaders is a request to write a HEADERS and 0+ CONTINUATION frames
 // for HTTP response headers or trailers from a server handler.
 type writeResHeaders struct {
@@ -144,6 +171,17 @@
 	enc.WriteField(hpack.HeaderField{Name: k, Value: v})
 }
 
+func (w *writeResHeaders) staysWithinBuffer(max int) bool {
+	// TODO: this is a common one. It'd be nice to return true
+	// here and get into the fast path if we could be clever and
+	// calculate the size fast enough, or at least a conservative
+	// uppper bound that usually fires. (Maybe if w.h and
+	// w.trailers are nil, so we don't need to enumerate it.)
+	// Otherwise I'm afraid that just calculating the length to
+	// answer this question would be slower than the ~2µs benefit.
+	return false
+}
+
 func (w *writeResHeaders) writeFrame(ctx writeContext) error {
 	enc, buf := ctx.HeaderEncoder()
 	buf.Reset()
@@ -220,11 +258,18 @@
 	})
 }
 
+func (w write100ContinueHeadersFrame) staysWithinBuffer(max int) bool {
+	// Sloppy but conservative:
+	return 9+2*(len(":status")+len("100")) <= max
+}
+
 type writeWindowUpdate struct {
 	streamID uint32 // or 0 for conn-level
 	n        uint32
 }
 
+func (wu writeWindowUpdate) staysWithinBuffer(max int) bool { return frameHeaderLen+4 <= max }
+
 func (wu writeWindowUpdate) writeFrame(ctx writeContext) error {
 	return ctx.Framer().WriteWindowUpdate(wu.streamID, wu.n)
 }