399 lines
10 KiB
Go
399 lines
10 KiB
Go
// Copyright 2013 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
// Package driver provides common benchmarking logic shared between benchmarks.
|
|
//
|
|
// A benchmark should call Main with a benchmark function. The
|
|
// benchmark function can do one of two things when invoked:
|
|
// 1. Do whatever it wants, fill and return Result object.
|
|
// 2. Call Benchmark helper function and provide benchmarking function
|
|
// func(N uint64), similar to standard testing benchmarks. The rest is handled
|
|
// by the driver.
|
|
package driver // import "golang.org/x/benchmarks/driver"
|
|
|
|
import (
|
|
"bytes"
|
|
"flag"
|
|
"fmt"
|
|
"log"
|
|
"os"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"runtime"
|
|
"runtime/pprof"
|
|
"sort"
|
|
"sync"
|
|
"sync/atomic"
|
|
"time"
|
|
)
|
|
|
|
var (
|
|
flake = flag.Int("flake", 0, "test flakiness of a benchmark")
|
|
benchNum = flag.Int("benchnum", 1, "number of benchmark runs")
|
|
benchMem = flag.Int("benchmem", 64, "approx RSS value to aim at in benchmarks, in MB")
|
|
benchTime = flag.Duration("benchtime", 5*time.Second, "run enough iterations of each benchmark to take the specified time")
|
|
affinity = flag.Int("affinity", 0, "process affinity (passed to an OS-specific function like sched_setaffinity/SetProcessAffinityMask)")
|
|
tmpDir = flag.String("tmpdir", os.TempDir(), "dir for temporary files")
|
|
genSvg = flag.Bool("svg", false, "generate svg profiles")
|
|
|
|
BenchTime time.Duration
|
|
WorkDir string
|
|
|
|
usedBenchMem bool
|
|
|
|
// startTrace starts runtime tracing if supported and
|
|
// requested and returns a function to stop tracing.
|
|
startTrace = func() func() {
|
|
return func() {}
|
|
}
|
|
)
|
|
|
|
func Main(name string, f func() Result) {
|
|
flag.Parse()
|
|
// Copy to public variables, so that benchmarks can access the values.
|
|
BenchTime = *benchTime
|
|
WorkDir = *tmpDir
|
|
|
|
if *affinity != 0 {
|
|
setProcessAffinity(*affinity)
|
|
}
|
|
|
|
setupWatchdog()
|
|
|
|
if *flake > 0 {
|
|
testFlakiness(f, *flake)
|
|
return
|
|
}
|
|
|
|
fmt.Print("pkg: golang.org/x/benchmarks\n")
|
|
fmt.Printf("goos: %s\ngoarch: %s\n\n", runtime.GOOS, runtime.GOARCH)
|
|
|
|
stopTrace := startTrace()
|
|
defer stopTrace()
|
|
|
|
for i := 0; i < *benchNum; i++ {
|
|
res := f()
|
|
report(name, res)
|
|
}
|
|
}
|
|
|
|
func BenchMem() int {
|
|
usedBenchMem = true
|
|
return *benchMem
|
|
}
|
|
|
|
func setupWatchdog() {
|
|
t := *benchTime
|
|
// Be somewhat conservative, and build benchmark does not care about benchTime.
|
|
if t < time.Minute {
|
|
t = time.Minute
|
|
}
|
|
t *= time.Duration(*benchNum)
|
|
t *= 2 // to account for iteration number auto-tuning
|
|
if *flake > 0 {
|
|
t *= time.Duration(*flake + 2)
|
|
}
|
|
go func() {
|
|
time.Sleep(t)
|
|
panic(fmt.Sprintf("timed out after %v", t))
|
|
}()
|
|
}
|
|
|
|
// testFlakiness runs the function N+2 times and prints metrics diffs between
|
|
// the second and subsequent runs.
|
|
func testFlakiness(f func() Result, N int) {
|
|
res := make([]Result, N+2)
|
|
for i := range res {
|
|
res[i] = f()
|
|
}
|
|
fmt.Printf("\n")
|
|
for k, v := range res[0].Metrics {
|
|
fmt.Printf("%v:\t", k)
|
|
for i := 2; i < len(res); i++ {
|
|
d := 100*float64(v)/float64(res[i].Metrics[k]) - 100
|
|
fmt.Printf(" %+.2f%%", d)
|
|
}
|
|
fmt.Printf("\n")
|
|
}
|
|
}
|
|
|
|
// Result contains all the interesting data about benchmark execution.
|
|
type Result struct {
|
|
N uint64 // number of iterations
|
|
Duration time.Duration // total run duration
|
|
RunTime uint64 // ns/op
|
|
Metrics map[string]uint64
|
|
Files map[string]string
|
|
}
|
|
|
|
func MakeResult() Result {
|
|
return Result{Metrics: make(map[string]uint64), Files: make(map[string]string)}
|
|
}
|
|
|
|
func report(name string, res Result) {
|
|
for name, path := range res.Files {
|
|
fmt.Printf("# %s=%s\n", name, path)
|
|
}
|
|
|
|
if usedBenchMem {
|
|
name = fmt.Sprintf("%s/benchmem-MB=%d", name, *benchMem)
|
|
}
|
|
fmt.Printf("Benchmark%s-%d %8d\t%10d ns/op", name, runtime.GOMAXPROCS(-1), res.N, res.RunTime)
|
|
var metrics []string
|
|
for metric := range res.Metrics {
|
|
if metric == "ns/op" {
|
|
// Already reported from res.RunTime.
|
|
continue
|
|
}
|
|
metrics = append(metrics, metric)
|
|
}
|
|
sort.Strings(metrics)
|
|
for _, metric := range metrics {
|
|
fmt.Printf("\t%10d %s", res.Metrics[metric], metric)
|
|
}
|
|
fmt.Printf("\n")
|
|
}
|
|
|
|
// Benchmark runs f several times, collects stats,
|
|
// and creates cpu/mem profiles.
|
|
func Benchmark(f func(uint64)) Result {
|
|
res := runBenchmark(f)
|
|
|
|
cpuprof := processProfile(os.Args[0], res.Files["cpuprof"])
|
|
delete(res.Files, "cpuprof")
|
|
if cpuprof != "" {
|
|
res.Files["cpuprof"] = cpuprof
|
|
}
|
|
|
|
memprof := processProfile("--lines", "--unit=byte", "--alloc_space", "--base", res.Files["memprof0"], os.Args[0], res.Files["memprof"])
|
|
delete(res.Files, "memprof")
|
|
delete(res.Files, "memprof0")
|
|
if memprof != "" {
|
|
res.Files["memprof"] = memprof
|
|
}
|
|
|
|
return res
|
|
}
|
|
|
|
// processProfile invokes 'go tool pprof' with the specified args
|
|
// and returns name of the resulting file, or an empty string.
|
|
func processProfile(args ...string) string {
|
|
fname := "prof.txt"
|
|
typ := "--text"
|
|
if *genSvg {
|
|
fname = "prof.svg"
|
|
typ = "--svg"
|
|
}
|
|
proff, err := os.Create(tempFilename(fname))
|
|
if err != nil {
|
|
log.Printf("Failed to create profile file: %v", err)
|
|
return ""
|
|
}
|
|
defer proff.Close()
|
|
var proflog bytes.Buffer
|
|
cmdargs := append([]string{"tool", "pprof", typ}, args...)
|
|
cmd := exec.Command("go", cmdargs...)
|
|
cmd.Stdout = proff
|
|
cmd.Stderr = &proflog
|
|
err = cmd.Run()
|
|
if err != nil {
|
|
log.Printf("go tool pprof cpuprof failed: %v\n%v", err, proflog.String())
|
|
return "" // Deliberately ignore the error.
|
|
}
|
|
return proff.Name()
|
|
}
|
|
|
|
// runBenchmark runs f several times with increasing number of iterations
|
|
// until execution time reaches the requested duration.
|
|
func runBenchmark(f func(uint64)) Result {
|
|
res := MakeResult()
|
|
for chooseN(&res) {
|
|
log.Printf("Benchmarking %v iterations\n", res.N)
|
|
res = runBenchmarkOnce(f, res.N)
|
|
}
|
|
return res
|
|
}
|
|
|
|
// runBenchmarkOnce runs f once and collects all performance metrics and profiles.
|
|
func runBenchmarkOnce(f func(uint64), N uint64) Result {
|
|
latencyInit(N)
|
|
runtime.GC()
|
|
mstats0 := new(runtime.MemStats)
|
|
runtime.ReadMemStats(mstats0)
|
|
ss := InitSysStats(N)
|
|
res := MakeResult()
|
|
res.N = N
|
|
res.Files["memprof0"] = tempFilename("memprof")
|
|
memprof0, err := os.Create(res.Files["memprof0"])
|
|
if err != nil {
|
|
log.Fatalf("Failed to create profile file '%v': %v", res.Files["memprof0"], err)
|
|
}
|
|
pprof.WriteHeapProfile(memprof0)
|
|
memprof0.Close()
|
|
|
|
res.Files["cpuprof"] = tempFilename("cpuprof")
|
|
cpuprof, err := os.Create(res.Files["cpuprof"])
|
|
if err != nil {
|
|
log.Fatalf("Failed to create profile file '%v': %v", res.Files["cpuprof"], err)
|
|
}
|
|
defer cpuprof.Close()
|
|
pprof.StartCPUProfile(cpuprof)
|
|
t0 := time.Now()
|
|
f(N)
|
|
res.Duration = time.Since(t0)
|
|
res.RunTime = uint64(time.Since(t0)) / N
|
|
res.Metrics["ns/op"] = res.RunTime
|
|
pprof.StopCPUProfile()
|
|
|
|
latencyCollect(&res)
|
|
ss.Collect(&res)
|
|
|
|
res.Files["memprof"] = tempFilename("memprof")
|
|
memprof, err := os.Create(res.Files["memprof"])
|
|
if err != nil {
|
|
log.Fatalf("Failed to create profile file '%v': %v", res.Files["memprof"], err)
|
|
}
|
|
pprof.WriteHeapProfile(memprof)
|
|
memprof.Close()
|
|
|
|
mstats1 := new(runtime.MemStats)
|
|
runtime.ReadMemStats(mstats1)
|
|
res.Metrics["allocated-bytes/op"] = (mstats1.TotalAlloc - mstats0.TotalAlloc) / N
|
|
res.Metrics["allocs/op"] = (mstats1.Mallocs - mstats0.Mallocs) / N
|
|
res.Metrics["bytes-from-system"] = mstats1.Sys
|
|
res.Metrics["heap-bytes-from-system"] = mstats1.HeapSys
|
|
res.Metrics["stack-bytes-from-system"] = mstats1.StackSys
|
|
res.Metrics["STW-ns/op"] = (mstats1.PauseTotalNs - mstats0.PauseTotalNs) / N
|
|
collectGo12MemStats(&res, mstats0, mstats1)
|
|
numGC := uint64(mstats1.NumGC - mstats0.NumGC)
|
|
if numGC == 0 {
|
|
res.Metrics["STW-ns/GC"] = 0
|
|
} else {
|
|
res.Metrics["STW-ns/GC"] = (mstats1.PauseTotalNs - mstats0.PauseTotalNs) / numGC
|
|
}
|
|
return res
|
|
}
|
|
|
|
// Parallel is a public helper function that runs f N times in P*GOMAXPROCS goroutines.
|
|
func Parallel(N uint64, P int, f func()) {
|
|
numProcs := P * runtime.GOMAXPROCS(0)
|
|
var wg sync.WaitGroup
|
|
wg.Add(numProcs)
|
|
for p := 0; p < numProcs; p++ {
|
|
go func() {
|
|
defer wg.Done()
|
|
for int64(atomic.AddUint64(&N, ^uint64(0))) >= 0 {
|
|
f()
|
|
}
|
|
}()
|
|
}
|
|
wg.Wait()
|
|
}
|
|
|
|
// perfLatency collects and reports information about latencies.
|
|
var latency struct {
|
|
data latencyData
|
|
idx int32
|
|
}
|
|
|
|
type latencyData []uint64
|
|
|
|
func (p latencyData) Len() int { return len(p) }
|
|
func (p latencyData) Less(i, j int) bool { return p[i] < p[j] }
|
|
func (p latencyData) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
|
|
|
|
func latencyInit(N uint64) {
|
|
// Use fixed size slice to:
|
|
// - bound the amount of memory consumed
|
|
// - eliminate variance in runs that use different number of iterations
|
|
latency.data = make(latencyData, 1e6)
|
|
latency.idx = 0
|
|
}
|
|
|
|
func LatencyNote(t time.Time) {
|
|
d := time.Since(t)
|
|
if int(atomic.LoadInt32(&latency.idx)) >= len(latency.data) {
|
|
return
|
|
}
|
|
i := atomic.AddInt32(&latency.idx, 1) - 1
|
|
if int(i) >= len(latency.data) {
|
|
return
|
|
}
|
|
latency.data[i] = uint64(d)
|
|
}
|
|
|
|
func latencyCollect(res *Result) {
|
|
cnt := int(latency.idx)
|
|
if cnt == 0 {
|
|
return
|
|
}
|
|
if cnt > len(latency.data) {
|
|
cnt = len(latency.data)
|
|
}
|
|
sort.Sort(latency.data[:cnt])
|
|
res.Metrics["P50-ns/op"] = latency.data[cnt*50/100]
|
|
res.Metrics["P95-ns/op"] = latency.data[cnt*95/100]
|
|
res.Metrics["P99-ns/op"] = latency.data[cnt*99/100]
|
|
}
|
|
|
|
// chooseN chooses the next number of iterations for benchmark.
|
|
func chooseN(res *Result) bool {
|
|
const MaxN = 1e12
|
|
last := res.N
|
|
if last == 0 {
|
|
res.N = 1
|
|
return true
|
|
} else if res.Duration >= *benchTime || last >= MaxN {
|
|
return false
|
|
}
|
|
nsPerOp := max(1, res.RunTime)
|
|
res.N = uint64(*benchTime) / nsPerOp
|
|
res.N = max(min(res.N+res.N/2, 100*last), last+1)
|
|
res.N = roundUp(res.N)
|
|
return true
|
|
}
|
|
|
|
// roundUp rounds the number of iterations to a nice value.
|
|
func roundUp(n uint64) uint64 {
|
|
tmp := n
|
|
base := uint64(1)
|
|
for tmp >= 10 {
|
|
tmp /= 10
|
|
base *= 10
|
|
}
|
|
switch {
|
|
case n <= base:
|
|
return base
|
|
case n <= (2 * base):
|
|
return 2 * base
|
|
case n <= (5 * base):
|
|
return 5 * base
|
|
default:
|
|
return 10 * base
|
|
}
|
|
panic("unreachable")
|
|
return 0
|
|
}
|
|
|
|
func min(a, b uint64) uint64 {
|
|
if a < b {
|
|
return a
|
|
}
|
|
return b
|
|
}
|
|
|
|
func max(a, b uint64) uint64 {
|
|
if a > b {
|
|
return a
|
|
}
|
|
return b
|
|
}
|
|
|
|
var tmpSeq = 0
|
|
|
|
func tempFilename(ext string) string {
|
|
tmpSeq++
|
|
return filepath.Join(*tmpDir, fmt.Sprintf("%v.%v", tmpSeq, ext))
|
|
}
|