2020-08-06 20:36:49 +00:00
|
|
|
// Copyright 2020 The Go Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
package runtime
|
|
|
|
|
|
|
|
import (
|
2024-02-01 10:21:14 +08:00
|
|
|
"internal/runtime/atomic"
|
2024-07-23 11:43:23 -04:00
|
|
|
"internal/runtime/sys"
|
2021-01-06 23:05:22 +00:00
|
|
|
"unsafe"
|
2020-08-06 20:36:49 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
|
|
|
// For the time histogram type, we use an HDR histogram.
|
2022-08-30 03:13:36 +00:00
|
|
|
// Values are placed in buckets based solely on the most
|
|
|
|
// significant set bit. Thus, buckets are power-of-2 sized.
|
2020-08-06 20:36:49 +00:00
|
|
|
// Values are then placed into sub-buckets based on the value of
|
|
|
|
// the next timeHistSubBucketBits most significant bits. Thus,
|
2022-08-30 03:13:36 +00:00
|
|
|
// sub-buckets are linear within a bucket.
|
2020-08-06 20:36:49 +00:00
|
|
|
//
|
|
|
|
// Therefore, the number of sub-buckets (timeHistNumSubBuckets)
|
|
|
|
// defines the error. This error may be computed as
|
|
|
|
// 1/timeHistNumSubBuckets*100%. For example, for 16 sub-buckets
|
2022-08-30 03:13:36 +00:00
|
|
|
// per bucket the error is approximately 6%.
|
2020-08-06 20:36:49 +00:00
|
|
|
//
|
2022-08-30 03:13:36 +00:00
|
|
|
// The number of buckets (timeHistNumBuckets), on the
|
|
|
|
// other hand, defines the range. To avoid producing a large number
|
|
|
|
// of buckets that are close together, especially for small numbers
|
|
|
|
// (e.g. 1, 2, 3, 4, 5 ns) that aren't very useful, timeHistNumBuckets
|
|
|
|
// is defined in terms of the least significant bit (timeHistMinBucketBits)
|
|
|
|
// that needs to be set before we start bucketing and the most
|
|
|
|
// significant bit (timeHistMaxBucketBits) that we bucket before we just
|
|
|
|
// dump it into a catch-all bucket.
|
2020-08-06 20:36:49 +00:00
|
|
|
//
|
2022-08-30 03:13:36 +00:00
|
|
|
// As an example, consider the configuration:
|
2020-08-06 20:36:49 +00:00
|
|
|
//
|
2022-08-30 03:13:36 +00:00
|
|
|
// timeHistMinBucketBits = 9
|
|
|
|
// timeHistMaxBucketBits = 48
|
|
|
|
// timeHistSubBucketBits = 2
|
2020-08-06 20:36:49 +00:00
|
|
|
//
|
2022-08-30 03:13:36 +00:00
|
|
|
// Then:
|
2020-08-06 20:36:49 +00:00
|
|
|
//
|
2022-08-30 03:13:36 +00:00
|
|
|
// 011000001
|
|
|
|
// ^--
|
|
|
|
// │ ^
|
|
|
|
// │ └---- Next 2 bits -> sub-bucket 3
|
|
|
|
// └------- Bit 9 unset -> bucket 0
|
|
|
|
//
|
|
|
|
// 110000001
|
|
|
|
// ^--
|
|
|
|
// │ ^
|
|
|
|
// │ └---- Next 2 bits -> sub-bucket 2
|
|
|
|
// └------- Bit 9 set -> bucket 1
|
|
|
|
//
|
|
|
|
// 1000000010
|
|
|
|
// ^-- ^
|
|
|
|
// │ ^ └-- Lower bits ignored
|
|
|
|
// │ └---- Next 2 bits -> sub-bucket 0
|
|
|
|
// └------- Bit 10 set -> bucket 2
|
|
|
|
//
|
|
|
|
// Following this pattern, bucket 38 will have the bit 46 set. We don't
|
|
|
|
// have any buckets for higher values, so we spill the rest into an overflow
|
|
|
|
// bucket containing values of 2^47-1 nanoseconds or approx. 1 day or more.
|
|
|
|
// This range is more than enough to handle durations produced by the runtime.
|
|
|
|
timeHistMinBucketBits = 9
|
|
|
|
timeHistMaxBucketBits = 48 // Note that this is exclusive; 1 higher than the actual range.
|
|
|
|
timeHistSubBucketBits = 2
|
|
|
|
timeHistNumSubBuckets = 1 << timeHistSubBucketBits
|
|
|
|
timeHistNumBuckets = timeHistMaxBucketBits - timeHistMinBucketBits + 1
|
|
|
|
// Two extra buckets, one for underflow, one for overflow.
|
|
|
|
timeHistTotalBuckets = timeHistNumBuckets*timeHistNumSubBuckets + 2
|
2020-08-06 20:36:49 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
// timeHistogram represents a distribution of durations in
|
|
|
|
// nanoseconds.
|
|
|
|
//
|
|
|
|
// The accuracy and range of the histogram is defined by the
|
2022-08-30 03:13:36 +00:00
|
|
|
// timeHistSubBucketBits and timeHistNumBuckets constants.
|
2020-08-06 20:36:49 +00:00
|
|
|
//
|
|
|
|
// It is an HDR histogram with exponentially-distributed
|
|
|
|
// buckets and linearly distributed sub-buckets.
|
|
|
|
//
|
2022-07-25 15:58:23 -04:00
|
|
|
// The histogram is safe for concurrent reads and writes.
|
2020-08-06 20:36:49 +00:00
|
|
|
type timeHistogram struct {
|
2022-08-30 03:13:36 +00:00
|
|
|
counts [timeHistNumBuckets * timeHistNumSubBuckets]atomic.Uint64
|
2021-01-06 23:05:22 +00:00
|
|
|
|
|
|
|
// underflow counts all the times we got a negative duration
|
|
|
|
// sample. Because of how time works on some platforms, it's
|
|
|
|
// possible to measure negative durations. We could ignore them,
|
|
|
|
// but we record them anyway because it's better to have some
|
|
|
|
// signal that it's happening than just missing samples.
|
2022-07-25 15:58:23 -04:00
|
|
|
underflow atomic.Uint64
|
2022-08-30 03:13:36 +00:00
|
|
|
|
|
|
|
// overflow counts all the times we got a duration that exceeded
|
|
|
|
// the range counts represents.
|
|
|
|
overflow atomic.Uint64
|
2020-08-06 20:36:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// record adds the given duration to the distribution.
|
2020-08-07 16:28:35 +00:00
|
|
|
//
|
|
|
|
// Disallow preemptions and stack growths because this function
|
|
|
|
// may run in sensitive locations.
|
2022-01-30 20:13:43 -05:00
|
|
|
//
|
2020-08-07 16:28:35 +00:00
|
|
|
//go:nosplit
|
2020-08-06 20:36:49 +00:00
|
|
|
func (h *timeHistogram) record(duration int64) {
|
2022-08-30 03:13:36 +00:00
|
|
|
// If the duration is negative, capture that in underflow.
|
2020-08-06 20:36:49 +00:00
|
|
|
if duration < 0 {
|
2022-07-25 15:58:23 -04:00
|
|
|
h.underflow.Add(1)
|
runtime: shift timeHistogram buckets and allow negative durations
Today, timeHistogram, when copied, has the wrong set of counts for the
bucket that should represent (-inf, 0), when in fact it contains [0, 1).
In essence, the buckets are all shifted over by one from where they're
supposed to be.
But this also means that the existence of the overflow bucket is wrong:
the top bucket is supposed to extend to infinity, and what we're really
missing is an underflow bucket to represent the range (-inf, 0).
We could just always zero this bucket and continue ignoring negative
durations, but that likely isn't prudent.
timeHistogram is intended to be used with differences in nanotime, but
depending on how a platform is implemented (or due to a bug in that
platform) it's possible to get a negative duration without having done
anything wrong. We should just be resilient to that and be able to
detect it.
So this change removes the overflow bucket and replaces it with an
underflow bucket, and timeHistogram no longer panics when faced with a
negative duration.
Fixes #43328.
Fixes #43329.
Change-Id: If336425d7d080fd37bf071e18746800e22d38108
Reviewed-on: https://go-review.googlesource.com/c/go/+/279468
Run-TryBot: Michael Knyszek <mknyszek@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Trust: Michael Knyszek <mknyszek@google.com>
Reviewed-by: Michael Pratt <mpratt@google.com>
2020-12-22 17:47:43 +00:00
|
|
|
return
|
2020-08-06 20:36:49 +00:00
|
|
|
}
|
2022-08-30 03:13:36 +00:00
|
|
|
// bucketBit is the target bit for the bucket which is usually the
|
|
|
|
// highest 1 bit, but if we're less than the minimum, is the highest
|
|
|
|
// 1 bit of the minimum (which will be zero in the duration).
|
|
|
|
//
|
|
|
|
// bucket is the bucket index, which is the bucketBit minus the
|
|
|
|
// highest bit of the minimum, plus one to leave room for the catch-all
|
|
|
|
// bucket for samples lower than the minimum.
|
|
|
|
var bucketBit, bucket uint
|
|
|
|
if l := sys.Len64(uint64(duration)); l < timeHistMinBucketBits {
|
|
|
|
bucketBit = timeHistMinBucketBits
|
|
|
|
bucket = 0 // bucketBit - timeHistMinBucketBits
|
2020-08-06 20:36:49 +00:00
|
|
|
} else {
|
2022-08-30 03:13:36 +00:00
|
|
|
bucketBit = uint(l)
|
|
|
|
bucket = bucketBit - timeHistMinBucketBits + 1
|
|
|
|
}
|
|
|
|
// If the bucket we computed is greater than the number of buckets,
|
|
|
|
// count that in overflow.
|
|
|
|
if bucket >= timeHistNumBuckets {
|
|
|
|
h.overflow.Add(1)
|
|
|
|
return
|
2020-08-06 20:36:49 +00:00
|
|
|
}
|
2022-08-30 03:13:36 +00:00
|
|
|
// The sub-bucket index is just next timeHistSubBucketBits after the bucketBit.
|
|
|
|
subBucket := uint(duration>>(bucketBit-1-timeHistSubBucketBits)) % timeHistNumSubBuckets
|
|
|
|
h.counts[bucket*timeHistNumSubBuckets+subBucket].Add(1)
|
2020-08-06 20:36:49 +00:00
|
|
|
}
|
|
|
|
|
runtime/metrics: add STW stopping and total time metrics
This CL adds four new time histogram metrics:
/sched/pauses/stopping/gc:seconds
/sched/pauses/stopping/other:seconds
/sched/pauses/total/gc:seconds
/sched/pauses/total/other:seconds
The "stopping" metrics measure the time taken to start a stop-the-world
pause. i.e., how long it takes stopTheWorldWithSema to stop all Ps.
This can be used to detect STW struggling to preempt Ps.
The "total" metrics measure the total duration of a stop-the-world
pause, from starting to stop-the-world until the world is started again.
This includes the time spent in the "start" phase.
The "gc" metrics are used for GC-related STW pauses. The "other" metrics
are used for all other STW pauses.
All of these metrics start timing in stopTheWorldWithSema only after
successfully acquiring sched.lock, thus excluding lock contention on
sched.lock. The reasoning behind this is that while waiting on
sched.lock the world is not stopped at all (all other Ps can run), so
the impact of this contention is primarily limited to the goroutine
attempting to stop-the-world. Additionally, we already have some
visibility into sched.lock contention via contention profiles (#57071).
/sched/pauses/total/gc:seconds is conceptually equivalent to
/gc/pauses:seconds, so the latter is marked as deprecated and returns
the same histogram as the former.
In the implementation, there are a few minor differences:
* For both mark and sweep termination stops, /gc/pauses:seconds started
timing prior to calling startTheWorldWithSema, thus including lock
contention.
These details are minor enough, that I do not believe the slight change
in reporting will matter. For mark termination stops, moving timing stop
into startTheWorldWithSema does have the side effect of requiring moving
other GC metric calculations outside of the STW, as they depend on the
same end time.
Fixes #63340
Change-Id: Iacd0bab11bedab85d3dcfb982361413a7d9c0d05
Reviewed-on: https://go-review.googlesource.com/c/go/+/534161
Reviewed-by: Michael Knyszek <mknyszek@google.com>
Auto-Submit: Michael Pratt <mpratt@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
2023-10-10 15:28:32 -04:00
|
|
|
// write dumps the histogram to the passed metricValue as a float64 histogram.
|
|
|
|
func (h *timeHistogram) write(out *metricValue) {
|
|
|
|
hist := out.float64HistOrInit(timeHistBuckets)
|
|
|
|
// The bottom-most bucket, containing negative values, is tracked
|
|
|
|
// separately as underflow, so fill that in manually and then iterate
|
|
|
|
// over the rest.
|
|
|
|
hist.counts[0] = h.underflow.Load()
|
|
|
|
for i := range h.counts {
|
|
|
|
hist.counts[i+1] = h.counts[i].Load()
|
|
|
|
}
|
|
|
|
hist.counts[len(hist.counts)-1] = h.overflow.Load()
|
|
|
|
}
|
|
|
|
|
2021-01-06 23:05:22 +00:00
|
|
|
const (
|
|
|
|
fInf = 0x7FF0000000000000
|
|
|
|
fNegInf = 0xFFF0000000000000
|
|
|
|
)
|
|
|
|
|
|
|
|
func float64Inf() float64 {
|
|
|
|
inf := uint64(fInf)
|
|
|
|
return *(*float64)(unsafe.Pointer(&inf))
|
|
|
|
}
|
|
|
|
|
|
|
|
func float64NegInf() float64 {
|
|
|
|
inf := uint64(fNegInf)
|
|
|
|
return *(*float64)(unsafe.Pointer(&inf))
|
|
|
|
}
|
|
|
|
|
2020-08-06 20:36:49 +00:00
|
|
|
// timeHistogramMetricsBuckets generates a slice of boundaries for
|
|
|
|
// the timeHistogram. These boundaries are represented in seconds,
|
|
|
|
// not nanoseconds like the timeHistogram represents durations.
|
|
|
|
func timeHistogramMetricsBuckets() []float64 {
|
2021-01-06 23:05:22 +00:00
|
|
|
b := make([]float64, timeHistTotalBuckets+1)
|
2022-08-30 03:13:36 +00:00
|
|
|
// Underflow bucket.
|
2021-01-06 23:05:22 +00:00
|
|
|
b[0] = float64NegInf()
|
2022-08-30 03:13:36 +00:00
|
|
|
|
|
|
|
for j := 0; j < timeHistNumSubBuckets; j++ {
|
|
|
|
// No bucket bit for the first few buckets. Just sub-bucket bits after the
|
|
|
|
// min bucket bit.
|
|
|
|
bucketNanos := uint64(j) << (timeHistMinBucketBits - 1 - timeHistSubBucketBits)
|
|
|
|
// Convert nanoseconds to seconds via a division.
|
|
|
|
// These values will all be exactly representable by a float64.
|
|
|
|
b[j+1] = float64(bucketNanos) / 1e9
|
2022-01-21 06:52:43 +00:00
|
|
|
}
|
2022-08-30 03:13:36 +00:00
|
|
|
// Generate the rest of the buckets. It's easier to reason
|
|
|
|
// about if we cut out the 0'th bucket.
|
|
|
|
for i := timeHistMinBucketBits; i < timeHistMaxBucketBits; i++ {
|
2020-08-06 20:36:49 +00:00
|
|
|
for j := 0; j < timeHistNumSubBuckets; j++ {
|
2022-08-30 03:13:36 +00:00
|
|
|
// Set the bucket bit.
|
|
|
|
bucketNanos := uint64(1) << (i - 1)
|
2022-01-21 06:52:43 +00:00
|
|
|
// Set the sub-bucket bits.
|
2022-08-30 03:13:36 +00:00
|
|
|
bucketNanos |= uint64(j) << (i - 1 - timeHistSubBucketBits)
|
|
|
|
// The index for this bucket is going to be the (i+1)'th bucket
|
|
|
|
// (note that we're starting from zero, but handled the first bucket
|
2022-01-21 06:52:43 +00:00
|
|
|
// earlier, so we need to compensate), and the j'th sub bucket.
|
|
|
|
// Add 1 because we left space for -Inf.
|
2022-08-30 03:13:36 +00:00
|
|
|
bucketIndex := (i-timeHistMinBucketBits+1)*timeHistNumSubBuckets + j + 1
|
2022-01-21 06:52:43 +00:00
|
|
|
// Convert nanoseconds to seconds via a division.
|
2020-08-06 20:36:49 +00:00
|
|
|
// These values will all be exactly representable by a float64.
|
2022-01-21 06:52:43 +00:00
|
|
|
b[bucketIndex] = float64(bucketNanos) / 1e9
|
2020-08-06 20:36:49 +00:00
|
|
|
}
|
|
|
|
}
|
2022-08-30 03:13:36 +00:00
|
|
|
// Overflow bucket.
|
|
|
|
b[len(b)-2] = float64(uint64(1)<<(timeHistMaxBucketBits-1)) / 1e9
|
2021-01-06 23:05:22 +00:00
|
|
|
b[len(b)-1] = float64Inf()
|
2020-08-06 20:36:49 +00:00
|
|
|
return b
|
|
|
|
}
|