trace: don't allocate all events upfront
SetMaxEvent allocates storage for all events, which is very expensive
if we want to set a high value; even with small values, the
unnecessary allocation of the initial slice is measurable.
We improve the performance by having a few events preallocated as part
of the trace, and appending as necessary. We also co-locate the flags
in event which makes it smaller (by a word).
We also add a set of benchmarks; before and after amortized cost
per-event is shown:
name old time/op new time/op delta
Trace/0-2-32 1.19µs ± 1% 0.96µs ± 0% -18.94% (p=0.008 n=5+5)
Trace/0-10-32 579ns ± 0% 635ns ± 1% +9.82% (p=0.008 n=5+5)
Trace/0-100-32 463ns ± 1% 466ns ± 1% ~ (p=0.190 n=5+5)
Trace/0-1000-32 451ns ± 1% 451ns ± 0% ~ (p=0.984 n=5+5)
Trace/0-10000-32 451ns ± 2% 449ns ± 1% ~ (p=0.492 n=5+5)
Trace/10-2-32 1.41µs ± 1% 0.96µs ± 1% -31.74% (p=0.008 n=5+5)
Trace/10-10-32 623ns ± 1% 634ns ± 1% +1.73% (p=0.008 n=5+5)
Trace/10-100-32 469ns ± 1% 466ns ± 1% ~ (p=0.357 n=5+5)
Trace/10-1000-32 452ns ± 1% 453ns ± 2% ~ (p=0.913 n=5+5)
Trace/10-10000-32 451ns ± 1% 449ns ± 1% ~ (p=0.175 n=5+5)
Trace/100-2-32 2.78µs ± 2% 0.97µs ± 1% -65.28% (p=0.008 n=5+5)
Trace/100-10-32 912ns ± 1% 637ns ± 1% -30.23% (p=0.008 n=5+5)
Trace/100-100-32 479ns ± 1% 541ns ± 0% +12.77% (p=0.008 n=5+5)
Trace/100-1000-32 510ns ± 0% 541ns ± 1% +6.08% (p=0.008 n=5+5)
Trace/100-10000-32 514ns ± 1% 540ns ± 0% +5.14% (p=0.008 n=5+5)
Trace/1000-2-32 17.2µs ± 1% 1.0µs ± 1% -94.39% (p=0.008 n=5+5)
Trace/1000-10-32 3.90µs ± 1% 0.64µs ± 0% -83.68% (p=0.008 n=5+5)
Trace/1000-100-32 814ns ± 1% 542ns ± 0% -33.46% (p=0.008 n=5+5)
Trace/1000-1000-32 503ns ± 1% 581ns ± 0% +15.56% (p=0.008 n=5+5)
Trace/1000-10000-32 1.28µs ± 2% 1.03µs ± 1% -19.68% (p=0.008 n=5+5)
Change-Id: If7c68bb1809fb92fa5d06cb6640be5e09e1f131f
Reviewed-on: https://go-review.googlesource.com/30374
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
diff --git a/trace/trace.go b/trace/trace.go
index 182499a..1826586 100644
--- a/trace/trace.go
+++ b/trace/trace.go
@@ -333,7 +333,8 @@
tr.ref()
tr.Family, tr.Title = family, title
tr.Start = time.Now()
- tr.events = make([]event, 0, maxEventsPerTrace)
+ tr.maxEvents = maxEventsPerTrace
+ tr.events = tr.eventsBuf[:0]
activeMu.RLock()
s := activeTraces[tr.Family]
@@ -650,8 +651,8 @@
Elapsed time.Duration // since previous event in trace
NewDay bool // whether this event is on a different day to the previous event
Recyclable bool // whether this event was passed via LazyLog
- What interface{} // string or fmt.Stringer
Sensitive bool // whether this event contains sensitive information
+ What interface{} // string or fmt.Stringer
}
// WhenString returns a string representation of the elapsed time of the event.
@@ -692,14 +693,17 @@
IsError bool
// Append-only sequence of events (modulo discards).
- mu sync.RWMutex
- events []event
+ mu sync.RWMutex
+ events []event
+ maxEvents int
refs int32 // how many buckets this is in
recycler func(interface{})
disc discarded // scratch space to avoid allocation
finishStack []byte // where finish was called, if DebugUseAfterFinish is set
+
+ eventsBuf [4]event // preallocated buffer in case we only log a few events
}
func (tr *trace) reset() {
@@ -711,11 +715,15 @@
tr.traceID = 0
tr.spanID = 0
tr.IsError = false
+ tr.maxEvents = 0
tr.events = nil
tr.refs = 0
tr.recycler = nil
tr.disc = 0
tr.finishStack = nil
+ for i := range tr.eventsBuf {
+ tr.eventsBuf[i] = event{}
+ }
}
// delta returns the elapsed time since the last event or the trace start,
@@ -753,11 +761,11 @@
e := event{When: time.Now(), What: x, Recyclable: recyclable, Sensitive: sensitive}
tr.mu.Lock()
e.Elapsed, e.NewDay = tr.delta(e.When)
- if len(tr.events) < cap(tr.events) {
+ if len(tr.events) < tr.maxEvents {
tr.events = append(tr.events, e)
} else {
// Discard the middle events.
- di := int((cap(tr.events) - 1) / 2)
+ di := int((tr.maxEvents - 1) / 2)
if d, ok := tr.events[di].What.(*discarded); ok {
(*d)++
} else {
@@ -777,7 +785,7 @@
go tr.recycler(tr.events[di+1].What)
}
copy(tr.events[di+1:], tr.events[di+2:])
- tr.events[cap(tr.events)-1] = e
+ tr.events[tr.maxEvents-1] = e
}
tr.mu.Unlock()
}
@@ -803,7 +811,7 @@
func (tr *trace) SetMaxEvents(m int) {
// Always keep at least three events: first, discarded count, last.
if len(tr.events) == 0 && m > 3 {
- tr.events = make([]event, 0, m)
+ tr.maxEvents = m
}
}
diff --git a/trace/trace_test.go b/trace/trace_test.go
index 14d7c23..28758a2 100644
--- a/trace/trace_test.go
+++ b/trace/trace_test.go
@@ -5,6 +5,7 @@
package trace
import (
+ "fmt"
"net/http"
"reflect"
"testing"
@@ -69,3 +70,27 @@
}
}
}
+
+func benchmarkTrace(b *testing.B, maxEvents, numEvents int) {
+ numSpans := (b.N + numEvents + 1) / numEvents
+
+ for i := 0; i < numSpans; i++ {
+ tr := New("test", "test")
+ tr.SetMaxEvents(maxEvents)
+ for j := 0; j < numEvents; j++ {
+ tr.LazyPrintf("%d", j)
+ }
+ tr.Finish()
+ }
+}
+
+func BenchmarkTrace(b *testing.B) {
+ for _, maxEvents := range []int{0, 10, 100, 1000} {
+ for _, numEvents := range []int{2, 10, 100, 1000, 10000} {
+ name := fmt.Sprintf("%d-%d", maxEvents, numEvents)
+ b.Run(name, func(b *testing.B) {
+ benchmarkTrace(b, maxEvents, numEvents)
+ })
+ }
+ }
+}