| /* |
| * |
| * Copyright 2017 gRPC authors. |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| * |
| */ |
| |
| // Package stats tracks the statistics associated with benchmark runs. |
| package stats |
| |
| import ( |
| "bytes" |
| "fmt" |
| "log" |
| "math" |
| "runtime" |
| "sort" |
| "strconv" |
| "sync" |
| "time" |
| |
| "google.golang.org/grpc" |
| ) |
| |
| // FeatureIndex is an enum for features that usually differ across individual |
| // benchmark runs in a single execution. These are usually configured by the |
| // user through command line flags. |
| type FeatureIndex int |
| |
| // FeatureIndex enum values corresponding to individually settable features. |
| const ( |
| EnableTraceIndex FeatureIndex = iota |
| ReadLatenciesIndex |
| ReadKbpsIndex |
| ReadMTUIndex |
| MaxConcurrentCallsIndex |
| ReqSizeBytesIndex |
| RespSizeBytesIndex |
| ReqPayloadCurveIndex |
| RespPayloadCurveIndex |
| CompModesIndex |
| EnableChannelzIndex |
| EnablePreloaderIndex |
| |
| // MaxFeatureIndex is a place holder to indicate the total number of feature |
| // indices we have. Any new feature indices should be added above this. |
| MaxFeatureIndex |
| ) |
| |
| // Features represent configured options for a specific benchmark run. This is |
| // usually constructed from command line arguments passed by the caller. See |
| // benchmark/benchmain/main.go for defined command line flags. This is also |
| // part of the BenchResults struct which is serialized and written to a file. |
| type Features struct { |
| // Network mode used for this benchmark run. Could be one of Local, LAN, WAN |
| // or Longhaul. |
| NetworkMode string |
| // UseBufCon indicates whether an in-memory connection was used for this |
| // benchmark run instead of system network I/O. |
| UseBufConn bool |
| // EnableKeepalive indicates if keepalives were enabled on the connections |
| // used in this benchmark run. |
| EnableKeepalive bool |
| // BenchTime indicates the duration of the benchmark run. |
| BenchTime time.Duration |
| |
| // Features defined above are usually the same for all benchmark runs in a |
| // particular invocation, while the features defined below could vary from |
| // run to run based on the configured command line. These features have a |
| // corresponding featureIndex value which is used for a variety of reasons. |
| |
| // EnableTrace indicates if tracing was enabled. |
| EnableTrace bool |
| // Latency is the simulated one-way network latency used. |
| Latency time.Duration |
| // Kbps is the simulated network throughput used. |
| Kbps int |
| // MTU is the simulated network MTU used. |
| MTU int |
| // MaxConcurrentCalls is the number of concurrent RPCs made during this |
| // benchmark run. |
| MaxConcurrentCalls int |
| // ReqSizeBytes is the request size in bytes used in this benchmark run. |
| // Unused if ReqPayloadCurve is non-nil. |
| ReqSizeBytes int |
| // RespSizeBytes is the response size in bytes used in this benchmark run. |
| // Unused if RespPayloadCurve is non-nil. |
| RespSizeBytes int |
| // ReqPayloadCurve is a histogram representing the shape a random |
| // distribution request payloads should take. |
| ReqPayloadCurve *PayloadCurve |
| // RespPayloadCurve is a histogram representing the shape a random |
| // distribution request payloads should take. |
| RespPayloadCurve *PayloadCurve |
| // ModeCompressor represents the compressor mode used. |
| ModeCompressor string |
| // EnableChannelz indicates if channelz was turned on. |
| EnableChannelz bool |
| // EnablePreloader indicates if preloading was turned on. |
| EnablePreloader bool |
| } |
| |
| // String returns all the feature values as a string. |
| func (f Features) String() string { |
| var reqPayloadString, respPayloadString string |
| if f.ReqPayloadCurve != nil { |
| reqPayloadString = fmt.Sprintf("reqPayloadCurve_%s", f.ReqPayloadCurve.ShortHash()) |
| } else { |
| reqPayloadString = fmt.Sprintf("reqSize_%vB", f.ReqSizeBytes) |
| } |
| if f.RespPayloadCurve != nil { |
| respPayloadString = fmt.Sprintf("respPayloadCurve_%s", f.RespPayloadCurve.ShortHash()) |
| } else { |
| respPayloadString = fmt.Sprintf("respSize_%vB", f.RespSizeBytes) |
| } |
| return fmt.Sprintf("networkMode_%v-bufConn_%v-keepalive_%v-benchTime_%v-"+ |
| "trace_%v-latency_%v-kbps_%v-MTU_%v-maxConcurrentCalls_%v-%s-%s-"+ |
| "compressor_%v-channelz_%v-preloader_%v", |
| f.NetworkMode, f.UseBufConn, f.EnableKeepalive, f.BenchTime, f.EnableTrace, |
| f.Latency, f.Kbps, f.MTU, f.MaxConcurrentCalls, reqPayloadString, |
| respPayloadString, f.ModeCompressor, f.EnableChannelz, f.EnablePreloader) |
| } |
| |
| // SharedFeatures returns the shared features as a pretty printable string. |
| // 'wantFeatures' is a bitmask of wanted features, indexed by FeaturesIndex. |
| func (f Features) SharedFeatures(wantFeatures []bool) string { |
| var b bytes.Buffer |
| if f.NetworkMode != "" { |
| b.WriteString(fmt.Sprintf("Network: %v\n", f.NetworkMode)) |
| } |
| if f.UseBufConn { |
| b.WriteString(fmt.Sprintf("UseBufConn: %v\n", f.UseBufConn)) |
| } |
| if f.EnableKeepalive { |
| b.WriteString(fmt.Sprintf("EnableKeepalive: %v\n", f.EnableKeepalive)) |
| } |
| b.WriteString(fmt.Sprintf("BenchTime: %v\n", f.BenchTime)) |
| f.partialString(&b, wantFeatures, ": ", "\n") |
| return b.String() |
| } |
| |
| // PrintableName returns a one line name which includes the features specified |
| // by 'wantFeatures' which is a bitmask of wanted features, indexed by |
| // FeaturesIndex. |
| func (f Features) PrintableName(wantFeatures []bool) string { |
| var b bytes.Buffer |
| f.partialString(&b, wantFeatures, "_", "-") |
| return b.String() |
| } |
| |
| // partialString writes features specified by 'wantFeatures' to the provided |
| // bytes.Buffer. |
| func (f Features) partialString(b *bytes.Buffer, wantFeatures []bool, sep, delim string) { |
| for i, sf := range wantFeatures { |
| if sf { |
| switch FeatureIndex(i) { |
| case EnableTraceIndex: |
| b.WriteString(fmt.Sprintf("Trace%v%v%v", sep, f.EnableTrace, delim)) |
| case ReadLatenciesIndex: |
| b.WriteString(fmt.Sprintf("Latency%v%v%v", sep, f.Latency, delim)) |
| case ReadKbpsIndex: |
| b.WriteString(fmt.Sprintf("Kbps%v%v%v", sep, f.Kbps, delim)) |
| case ReadMTUIndex: |
| b.WriteString(fmt.Sprintf("MTU%v%v%v", sep, f.MTU, delim)) |
| case MaxConcurrentCallsIndex: |
| b.WriteString(fmt.Sprintf("Callers%v%v%v", sep, f.MaxConcurrentCalls, delim)) |
| case ReqSizeBytesIndex: |
| b.WriteString(fmt.Sprintf("ReqSize%v%vB%v", sep, f.ReqSizeBytes, delim)) |
| case RespSizeBytesIndex: |
| b.WriteString(fmt.Sprintf("RespSize%v%vB%v", sep, f.RespSizeBytes, delim)) |
| case ReqPayloadCurveIndex: |
| if f.ReqPayloadCurve != nil { |
| b.WriteString(fmt.Sprintf("ReqPayloadCurve%vSHA-256:%v%v", sep, f.ReqPayloadCurve.Hash(), delim)) |
| } |
| case RespPayloadCurveIndex: |
| if f.RespPayloadCurve != nil { |
| b.WriteString(fmt.Sprintf("RespPayloadCurve%vSHA-256:%v%v", sep, f.RespPayloadCurve.Hash(), delim)) |
| } |
| case CompModesIndex: |
| b.WriteString(fmt.Sprintf("Compressor%v%v%v", sep, f.ModeCompressor, delim)) |
| case EnableChannelzIndex: |
| b.WriteString(fmt.Sprintf("Channelz%v%v%v", sep, f.EnableChannelz, delim)) |
| case EnablePreloaderIndex: |
| b.WriteString(fmt.Sprintf("Preloader%v%v%v", sep, f.EnablePreloader, delim)) |
| default: |
| log.Fatalf("Unknown feature index %v. maxFeatureIndex is %v", i, MaxFeatureIndex) |
| } |
| } |
| } |
| } |
| |
| // BenchResults records features and results of a benchmark run. A collection |
| // of these structs is usually serialized and written to a file after a |
| // benchmark execution, and could later be read for pretty-printing or |
| // comparison with other benchmark results. |
| type BenchResults struct { |
| // GoVersion is the version of the compiler the benchmark was compiled with. |
| GoVersion string |
| // GrpcVersion is the gRPC version being benchmarked. |
| GrpcVersion string |
| // RunMode is the workload mode for this benchmark run. This could be unary, |
| // stream or unconstrained. |
| RunMode string |
| // Features represents the configured feature options for this run. |
| Features Features |
| // SharedFeatures represents the features which were shared across all |
| // benchmark runs during one execution. It is a slice indexed by |
| // 'FeaturesIndex' and a value of true indicates that the associated |
| // feature is shared across all runs. |
| SharedFeatures []bool |
| // Data contains the statistical data of interest from the benchmark run. |
| Data RunData |
| } |
| |
| // RunData contains statistical data of interest from a benchmark run. |
| type RunData struct { |
| // TotalOps is the number of operations executed during this benchmark run. |
| // Only makes sense for unary and streaming workloads. |
| TotalOps uint64 |
| // SendOps is the number of send operations executed during this benchmark |
| // run. Only makes sense for unconstrained workloads. |
| SendOps uint64 |
| // RecvOps is the number of receive operations executed during this benchmark |
| // run. Only makes sense for unconstrained workloads. |
| RecvOps uint64 |
| // AllocedBytes is the average memory allocation in bytes per operation. |
| AllocedBytes float64 |
| // Allocs is the average number of memory allocations per operation. |
| Allocs float64 |
| // ReqT is the average request throughput associated with this run. |
| ReqT float64 |
| // RespT is the average response throughput associated with this run. |
| RespT float64 |
| |
| // We store different latencies associated with each run. These latencies are |
| // only computed for unary and stream workloads as they are not very useful |
| // for unconstrained workloads. |
| |
| // Fiftieth is the 50th percentile latency. |
| Fiftieth time.Duration |
| // Ninetieth is the 90th percentile latency. |
| Ninetieth time.Duration |
| // Ninetyninth is the 99th percentile latency. |
| NinetyNinth time.Duration |
| // Average is the average latency. |
| Average time.Duration |
| } |
| |
| type durationSlice []time.Duration |
| |
| func (a durationSlice) Len() int { return len(a) } |
| func (a durationSlice) Swap(i, j int) { a[i], a[j] = a[j], a[i] } |
| func (a durationSlice) Less(i, j int) bool { return a[i] < a[j] } |
| |
| // Stats is a helper for gathering statistics about individual benchmark runs. |
| type Stats struct { |
| mu sync.Mutex |
| numBuckets int |
| hw *histWrapper |
| results []BenchResults |
| startMS runtime.MemStats |
| stopMS runtime.MemStats |
| } |
| |
| type histWrapper struct { |
| unit time.Duration |
| histogram *Histogram |
| durations durationSlice |
| } |
| |
| // NewStats creates a new Stats instance. If numBuckets is not positive, the |
| // default value (16) will be used. |
| func NewStats(numBuckets int) *Stats { |
| if numBuckets <= 0 { |
| numBuckets = 16 |
| } |
| // Use one more bucket for the last unbounded bucket. |
| s := &Stats{numBuckets: numBuckets + 1} |
| s.hw = &histWrapper{} |
| return s |
| } |
| |
| // StartRun is to be invoked to indicate the start of a new benchmark run. |
| func (s *Stats) StartRun(mode string, f Features, sf []bool) { |
| s.mu.Lock() |
| defer s.mu.Unlock() |
| |
| runtime.ReadMemStats(&s.startMS) |
| s.results = append(s.results, BenchResults{ |
| GoVersion: runtime.Version(), |
| GrpcVersion: grpc.Version, |
| RunMode: mode, |
| Features: f, |
| SharedFeatures: sf, |
| }) |
| } |
| |
| // EndRun is to be invoked to indicate the end of the ongoing benchmark run. It |
| // computes a bunch of stats and dumps them to stdout. |
| func (s *Stats) EndRun(count uint64) { |
| s.mu.Lock() |
| defer s.mu.Unlock() |
| |
| runtime.ReadMemStats(&s.stopMS) |
| r := &s.results[len(s.results)-1] |
| r.Data = RunData{ |
| TotalOps: count, |
| AllocedBytes: float64(s.stopMS.TotalAlloc-s.startMS.TotalAlloc) / float64(count), |
| Allocs: float64(s.stopMS.Mallocs-s.startMS.Mallocs) / float64(count), |
| ReqT: float64(count) * float64(r.Features.ReqSizeBytes) * 8 / r.Features.BenchTime.Seconds(), |
| RespT: float64(count) * float64(r.Features.RespSizeBytes) * 8 / r.Features.BenchTime.Seconds(), |
| } |
| s.computeLatencies(r) |
| s.dump(r) |
| s.hw = &histWrapper{} |
| } |
| |
| // EndUnconstrainedRun is similar to EndRun, but is to be used for |
| // unconstrained workloads. |
| func (s *Stats) EndUnconstrainedRun(req uint64, resp uint64) { |
| s.mu.Lock() |
| defer s.mu.Unlock() |
| |
| runtime.ReadMemStats(&s.stopMS) |
| r := &s.results[len(s.results)-1] |
| r.Data = RunData{ |
| SendOps: req, |
| RecvOps: resp, |
| AllocedBytes: float64(s.stopMS.TotalAlloc-s.startMS.TotalAlloc) / float64((req+resp)/2), |
| Allocs: float64(s.stopMS.Mallocs-s.startMS.Mallocs) / float64((req+resp)/2), |
| ReqT: float64(req) * float64(r.Features.ReqSizeBytes) * 8 / r.Features.BenchTime.Seconds(), |
| RespT: float64(resp) * float64(r.Features.RespSizeBytes) * 8 / r.Features.BenchTime.Seconds(), |
| } |
| s.computeLatencies(r) |
| s.dump(r) |
| s.hw = &histWrapper{} |
| } |
| |
| // AddDuration adds an elapsed duration per operation to the stats. This is |
| // used by unary and stream modes where request and response stats are equal. |
| func (s *Stats) AddDuration(d time.Duration) { |
| s.mu.Lock() |
| defer s.mu.Unlock() |
| |
| s.hw.durations = append(s.hw.durations, d) |
| } |
| |
| // GetResults returns the results from all benchmark runs. |
| func (s *Stats) GetResults() []BenchResults { |
| s.mu.Lock() |
| defer s.mu.Unlock() |
| |
| return s.results |
| } |
| |
| // computeLatencies computes percentile latencies based on durations stored in |
| // the stats object and updates the corresponding fields in the result object. |
| func (s *Stats) computeLatencies(result *BenchResults) { |
| if len(s.hw.durations) == 0 { |
| return |
| } |
| sort.Sort(s.hw.durations) |
| minDuration := int64(s.hw.durations[0]) |
| maxDuration := int64(s.hw.durations[len(s.hw.durations)-1]) |
| |
| // Use the largest unit that can represent the minimum time duration. |
| s.hw.unit = time.Nanosecond |
| for _, u := range []time.Duration{time.Microsecond, time.Millisecond, time.Second} { |
| if minDuration <= int64(u) { |
| break |
| } |
| s.hw.unit = u |
| } |
| |
| numBuckets := s.numBuckets |
| if n := int(maxDuration - minDuration + 1); n < numBuckets { |
| numBuckets = n |
| } |
| s.hw.histogram = NewHistogram(HistogramOptions{ |
| NumBuckets: numBuckets, |
| // max-min(lower bound of last bucket) = (1 + growthFactor)^(numBuckets-2) * baseBucketSize. |
| GrowthFactor: math.Pow(float64(maxDuration-minDuration), 1/float64(numBuckets-2)) - 1, |
| BaseBucketSize: 1.0, |
| MinValue: minDuration, |
| }) |
| for _, d := range s.hw.durations { |
| s.hw.histogram.Add(int64(d)) |
| } |
| result.Data.Fiftieth = s.hw.durations[max(s.hw.histogram.Count*int64(50)/100-1, 0)] |
| result.Data.Ninetieth = s.hw.durations[max(s.hw.histogram.Count*int64(90)/100-1, 0)] |
| result.Data.NinetyNinth = s.hw.durations[max(s.hw.histogram.Count*int64(99)/100-1, 0)] |
| result.Data.Average = time.Duration(float64(s.hw.histogram.Sum) / float64(s.hw.histogram.Count)) |
| } |
| |
| // dump returns a printable version. |
| func (s *Stats) dump(result *BenchResults) { |
| var b bytes.Buffer |
| |
| // Go and gRPC version information. |
| b.WriteString(fmt.Sprintf("%s/grpc%s\n", result.GoVersion, result.GrpcVersion)) |
| |
| // This prints the run mode and all features of the bench on a line. |
| b.WriteString(fmt.Sprintf("%s-%s:\n", result.RunMode, result.Features.String())) |
| |
| unit := s.hw.unit |
| tUnit := fmt.Sprintf("%v", unit)[1:] // stores one of s, ms, μs, ns |
| |
| if l := result.Data.Fiftieth; l != 0 { |
| b.WriteString(fmt.Sprintf("50_Latency: %s%s\t", strconv.FormatFloat(float64(l)/float64(unit), 'f', 4, 64), tUnit)) |
| } |
| if l := result.Data.Ninetieth; l != 0 { |
| b.WriteString(fmt.Sprintf("90_Latency: %s%s\t", strconv.FormatFloat(float64(l)/float64(unit), 'f', 4, 64), tUnit)) |
| } |
| if l := result.Data.NinetyNinth; l != 0 { |
| b.WriteString(fmt.Sprintf("99_Latency: %s%s\t", strconv.FormatFloat(float64(l)/float64(unit), 'f', 4, 64), tUnit)) |
| } |
| if l := result.Data.Average; l != 0 { |
| b.WriteString(fmt.Sprintf("Avg_Latency: %s%s\t", strconv.FormatFloat(float64(l)/float64(unit), 'f', 4, 64), tUnit)) |
| } |
| b.WriteString(fmt.Sprintf("Bytes/op: %v\t", result.Data.AllocedBytes)) |
| b.WriteString(fmt.Sprintf("Allocs/op: %v\t\n", result.Data.Allocs)) |
| |
| // This prints the histogram stats for the latency. |
| if s.hw.histogram == nil { |
| b.WriteString("Histogram (empty)\n") |
| } else { |
| b.WriteString(fmt.Sprintf("Histogram (unit: %s)\n", tUnit)) |
| s.hw.histogram.PrintWithUnit(&b, float64(unit)) |
| } |
| |
| // Print throughput data. |
| req := result.Data.SendOps |
| if req == 0 { |
| req = result.Data.TotalOps |
| } |
| resp := result.Data.RecvOps |
| if resp == 0 { |
| resp = result.Data.TotalOps |
| } |
| b.WriteString(fmt.Sprintf("Number of requests: %v\tRequest throughput: %v bit/s\n", req, result.Data.ReqT)) |
| b.WriteString(fmt.Sprintf("Number of responses: %v\tResponse throughput: %v bit/s\n", resp, result.Data.RespT)) |
| fmt.Println(b.String()) |
| } |
| |
| func max(a, b int64) int64 { |
| if a > b { |
| return a |
| } |
| return b |
| } |