From 0ae59d23ac9db008abda5d5a52006007942b41e5 Mon Sep 17 00:00:00 2001 From: David Taylor Date: Sat, 1 Nov 2025 14:25:26 +0000 Subject: [PATCH] runtime: refactor goroutine profilerecord types Previously, goroutine profiles were collected into two adjacent slices each of length n, with the i'th index of the first holding the captured stack of the i'th goroutine and the i'th index of the second holding its captured labels. This changes the representation used during collection to be a single slice of length n, with the stack and labels of the i'th goroutine now residing in two fields of each struct in that slice. Switching from multiple slices each of a single attribute each to one slice of multiple attributes avoids allocating multiple slices, as well as needing to pass them both around, side-by-side, in all goroutine profile collection code. While maintaining and passing two slices was workable -- if perhaps slightly cumbersome -- passing side-by-side around would quickly become unwieldy if or when any additional attributes such as wait reasons or wait times are collected during goroutine profiling, for example as proposed in #74954. Regardless of the user-facing shape that any such extension to goroutine profiling may end up taking, this updated internal representation should be substantially easier to extend and maintain than side-by-side slices. This is a pure refactor of this internal representation; it should have no user-facing behavior change. While in the area: concurrent goroutine collection has been the only mechanism used for some time now, so the disused legacy implementation goroutineProfileWithLabelsSync is removed and the single remaining implementation is renamed to drop its 'concurrent' qualifier. Updates #74954. Change-Id: I3fd14834b2f3aae73317d3fad3294d539302713f --- src/internal/profilerecord/profilerecord.go | 15 ++ src/runtime/mprof.go | 143 +++----------------- src/runtime/pprof/pprof.go | 57 ++++---- src/runtime/pprof/pprof_test.go | 2 +- 4 files changed, 64 insertions(+), 153 deletions(-) diff --git a/src/internal/profilerecord/profilerecord.go b/src/internal/profilerecord/profilerecord.go index a5efdced8f77c9..b256dab1ea037e 100644 --- a/src/internal/profilerecord/profilerecord.go +++ b/src/internal/profilerecord/profilerecord.go @@ -8,10 +8,16 @@ // TODO: Consider moving this to internal/runtime, see golang.org/issue/65355. package profilerecord +import "unsafe" + type StackRecord struct { Stack []uintptr } +func (r StackRecord) GetStack() []uintptr { return r.Stack } +func (r StackRecord) GetLabels() unsafe.Pointer { return nil } +func (r StackRecord) GetGoroutine() GoroutineRecord { return GoroutineRecord{} } + type MemProfileRecord struct { AllocBytes, FreeBytes int64 AllocObjects, FreeObjects int64 @@ -26,3 +32,12 @@ type BlockProfileRecord struct { Cycles int64 Stack []uintptr } + +type GoroutineRecord struct { + Labels unsafe.Pointer + Stack []uintptr +} + +func (r GoroutineRecord) GetStack() []uintptr { return r.Stack } +func (r GoroutineRecord) GetLabels() unsafe.Pointer { return r.Labels } +func (r GoroutineRecord) GetGoroutine() GoroutineRecord { return r } diff --git a/src/runtime/mprof.go b/src/runtime/mprof.go index d745d5f5b95885..29ecd87244a867 100644 --- a/src/runtime/mprof.go +++ b/src/runtime/mprof.go @@ -1245,40 +1245,21 @@ func pprof_threadCreateInternal(p []profilerecord.StackRecord) (n int, ok bool) }) } -//go:linkname pprof_goroutineProfileWithLabels -func pprof_goroutineProfileWithLabels(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) { - return goroutineProfileWithLabels(p, labels) +//go:linkname pprof_goroutineProfile +func pprof_goroutineProfile(p []profilerecord.GoroutineRecord) (n int, ok bool) { + return goroutineProfileInternal(p) } -// labels may be nil. If labels is non-nil, it must have the same length as p. -func goroutineProfileWithLabels(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) { - if labels != nil && len(labels) != len(p) { - labels = nil - } - - return goroutineProfileWithLabelsConcurrent(p, labels) -} - -//go:linkname pprof_goroutineLeakProfileWithLabels -func pprof_goroutineLeakProfileWithLabels(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) { - return goroutineLeakProfileWithLabelsConcurrent(p, labels) -} - -// labels may be nil. If labels is non-nil, it must have the same length as p. -func goroutineLeakProfileWithLabels(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) { - if labels != nil && len(labels) != len(p) { - labels = nil - } - - return goroutineLeakProfileWithLabelsConcurrent(p, labels) +//go:linkname pprof_goroutineLeakProfile +func pprof_goroutineLeakProfile(p []profilerecord.GoroutineRecord) (n int, ok bool) { + return goroutineLeakProfileInternal(p) } var goroutineProfile = struct { sema uint32 active bool offset atomic.Int64 - records []profilerecord.StackRecord - labels []unsafe.Pointer + records []profilerecord.GoroutineRecord }{ sema: 1, } @@ -1316,18 +1297,18 @@ func (p *goroutineProfileStateHolder) CompareAndSwap(old, new goroutineProfileSt return (*atomic.Uint32)(p).CompareAndSwap(uint32(old), uint32(new)) } -func goroutineLeakProfileWithLabelsConcurrent(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) { +func goroutineLeakProfileInternal(p []profilerecord.GoroutineRecord) (n int, ok bool) { if len(p) == 0 { // An empty slice is obviously too small. Return a rough // allocation estimate. return work.goroutineLeak.count, false } - // Use the same semaphore as goroutineProfileWithLabelsConcurrent, + // Use the same semaphore as goroutineProfileInternal, // because ultimately we still use goroutine profiles. semacquire(&goroutineProfile.sema) - // Unlike in goroutineProfileWithLabelsConcurrent, we don't need to + // Unlike in goroutineProfileInternal, we don't need to // save the current goroutine stack, because it is obviously not leaked. pcbuf := makeProfStack() // see saveg() for explanation @@ -1358,7 +1339,7 @@ func goroutineLeakProfileWithLabelsConcurrent(p []profilerecord.StackRecord, lab return n, true } -func goroutineProfileWithLabelsConcurrent(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) { +func goroutineProfileInternal(p []profilerecord.GoroutineRecord) (n int, ok bool) { if len(p) == 0 { // An empty slice is obviously too small. Return a rough // allocation estimate without bothering to STW. As long as @@ -1401,9 +1382,8 @@ func goroutineProfileWithLabelsConcurrent(p []profilerecord.StackRecord, labels systemstack(func() { saveg(pc, sp, ourg, &p[0], pcbuf) }) - if labels != nil { - labels[0] = ourg.labels - } + + p[0].Labels = ourg.labels ourg.goroutineProfiled.Store(goroutineProfileSatisfied) goroutineProfile.offset.Store(1) @@ -1414,7 +1394,6 @@ func goroutineProfileWithLabelsConcurrent(p []profilerecord.StackRecord, labels // field set to goroutineProfileSatisfied. goroutineProfile.active = true goroutineProfile.records = p - goroutineProfile.labels = labels startTheWorld(stw) // Visit each goroutine that existed as of the startTheWorld call above. @@ -1436,7 +1415,6 @@ func goroutineProfileWithLabelsConcurrent(p []profilerecord.StackRecord, labels endOffset := goroutineProfile.offset.Swap(0) goroutineProfile.active = false goroutineProfile.records = nil - goroutineProfile.labels = nil startTheWorld(stw) // Restore the invariant that every goroutine struct in allgs has its @@ -1528,7 +1506,7 @@ func doRecordGoroutineProfile(gp1 *g, pcbuf []uintptr) { // System goroutines should not appear in the profile. // Check this here and not in tryRecordGoroutineProfile because isSystemGoroutine // may change on a goroutine while it is executing, so while the scheduler might - // see a system goroutine, goroutineProfileWithLabelsConcurrent might not, and + // see a system goroutine, goroutineProfileInternal might not, and // this inconsistency could cause invariants to be violated, such as trying to // record the stack of a running goroutine below. In short, we still want system // goroutines to participate in the same state machine on gp1.goroutineProfiled as @@ -1556,7 +1534,7 @@ func doRecordGoroutineProfile(gp1 *g, pcbuf []uintptr) { if offset >= len(goroutineProfile.records) { // Should be impossible, but better to return a truncated profile than // to crash the entire process at this point. Instead, deal with it in - // goroutineProfileWithLabelsConcurrent where we have more context. + // goroutineProfileInternal where we have more context. return } @@ -1570,88 +1548,7 @@ func doRecordGoroutineProfile(gp1 *g, pcbuf []uintptr) { // to avoid schedule delays. systemstack(func() { saveg(^uintptr(0), ^uintptr(0), gp1, &goroutineProfile.records[offset], pcbuf) }) - if goroutineProfile.labels != nil { - goroutineProfile.labels[offset] = gp1.labels - } -} - -func goroutineProfileWithLabelsSync(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) { - gp := getg() - - isOK := func(gp1 *g) bool { - // Checking isSystemGoroutine here makes GoroutineProfile - // consistent with both NumGoroutine and Stack. - if gp1 == gp { - return false - } - if status := readgstatus(gp1); status == _Gdead || status == _Gdeadextra { - return false - } - if isSystemGoroutine(gp1, false) { - return false - } - return true - } - - pcbuf := makeProfStack() // see saveg() for explanation - stw := stopTheWorld(stwGoroutineProfile) - - // World is stopped, no locking required. - n = 1 - forEachGRace(func(gp1 *g) { - if isOK(gp1) { - n++ - } - }) - - if n <= len(p) { - ok = true - r, lbl := p, labels - - // Save current goroutine. - sp := sys.GetCallerSP() - pc := sys.GetCallerPC() - systemstack(func() { - saveg(pc, sp, gp, &r[0], pcbuf) - }) - r = r[1:] - - // If we have a place to put our goroutine labelmap, insert it there. - if labels != nil { - lbl[0] = gp.labels - lbl = lbl[1:] - } - - // Save other goroutines. - forEachGRace(func(gp1 *g) { - if !isOK(gp1) { - return - } - - if len(r) == 0 { - // Should be impossible, but better to return a - // truncated profile than to crash the entire process. - return - } - // saveg calls gentraceback, which may call cgo traceback functions. - // The world is stopped, so it cannot use cgocall (which will be - // blocked at exitsyscall). Do it on the system stack so it won't - // call into the schedular (see traceback.go:cgoContextPCs). - systemstack(func() { saveg(^uintptr(0), ^uintptr(0), gp1, &r[0], pcbuf) }) - if labels != nil { - lbl[0] = gp1.labels - lbl = lbl[1:] - } - r = r[1:] - }) - } - - if raceenabled { - raceacquire(unsafe.Pointer(&labelSync)) - } - - startTheWorld(stw) - return n, ok + goroutineProfile.records[offset].Labels = gp1.labels } // GoroutineProfile returns n, the number of records in the active goroutine stack profile. @@ -1661,7 +1558,7 @@ func goroutineProfileWithLabelsSync(p []profilerecord.StackRecord, labels []unsa // Most clients should use the [runtime/pprof] package instead // of calling GoroutineProfile directly. func GoroutineProfile(p []StackRecord) (n int, ok bool) { - records := make([]profilerecord.StackRecord, len(p)) + records := make([]profilerecord.GoroutineRecord, len(p)) n, ok = goroutineProfileInternal(records) if !ok { return @@ -1673,11 +1570,7 @@ func GoroutineProfile(p []StackRecord) (n int, ok bool) { return } -func goroutineProfileInternal(p []profilerecord.StackRecord) (n int, ok bool) { - return goroutineProfileWithLabels(p, nil) -} - -func saveg(pc, sp uintptr, gp *g, r *profilerecord.StackRecord, pcbuf []uintptr) { +func saveg(pc, sp uintptr, gp *g, r *profilerecord.GoroutineRecord, pcbuf []uintptr) { // To reduce memory usage, we want to allocate a r.Stack that is just big // enough to hold gp's stack trace. Naively we might achieve this by // recording our stack trace into mp.profStack, and then allocating a diff --git a/src/runtime/pprof/pprof.go b/src/runtime/pprof/pprof.go index b524e992b8b209..72f522e9fdccd3 100644 --- a/src/runtime/pprof/pprof.go +++ b/src/runtime/pprof/pprof.go @@ -402,7 +402,9 @@ type stackProfile [][]uintptr func (x stackProfile) Len() int { return len(x) } func (x stackProfile) Stack(i int) []uintptr { return x[i] } -func (x stackProfile) Label(i int) *labelMap { return nil } +func (x stackProfile) Goroutine(i int) profilerecord.GoroutineRecord { + return profilerecord.GoroutineRecord{} +} // A countProfile is a set of stack traces to be printed as counts // grouped by stack trace. There are multiple implementations: @@ -411,7 +413,7 @@ func (x stackProfile) Label(i int) *labelMap { return nil } type countProfile interface { Len() int Stack(i int) []uintptr - Label(i int) *labelMap + Goroutine(i int) profilerecord.GoroutineRecord } // expandInlinedFrames copies the call stack from pcs into dst, expanding any @@ -484,7 +486,7 @@ func printCountProfile(w io.Writer, debug int, name string, p countProfile) erro var keys []string n := p.Len() for i := 0; i < n; i++ { - k := key(p.Stack(i), p.Label(i)) + k := key(p.Stack(i), (*labelMap)(p.Goroutine(i).Labels)) if count[k] == 0 { index[k] = i keys = append(keys, k) @@ -520,9 +522,9 @@ func printCountProfile(w io.Writer, debug int, name string, p countProfile) erro locs = b.appendLocsForStack(locs[:0], p.Stack(index[k])) idx := index[k] var labels func() - if p.Label(idx) != nil { + if l := p.Goroutine(idx).Labels; l != nil { labels = func() { - for _, lbl := range p.Label(idx).list { + for _, lbl := range (*labelMap)(l).list { b.pbLabel(tagSample_Label, lbl.key, lbl.value, 0) } } @@ -742,7 +744,7 @@ func writeThreadCreate(w io.Writer, debug int) error { // Until https://golang.org/issues/6104 is addressed, wrap // ThreadCreateProfile because there's no point in tracking labels when we // don't get any stack-traces. - return writeRuntimeProfile(w, debug, "threadcreate", func(p []profilerecord.StackRecord, _ []unsafe.Pointer) (n int, ok bool) { + return writeRuntimeProfile(w, debug, "threadcreate", func(p []profilerecord.StackRecord) (n int, ok bool) { return pprof_threadCreateInternal(p) }) } @@ -757,7 +759,7 @@ func writeGoroutine(w io.Writer, debug int) error { if debug >= 2 { return writeGoroutineStacks(w) } - return writeRuntimeProfile(w, debug, "goroutine", pprof_goroutineProfileWithLabels) + return writeRuntimeProfile(w, debug, "goroutine", pprof_goroutineProfile) } // writeGoroutineLeak first invokes a GC cycle that performs goroutine leak detection. @@ -774,7 +776,7 @@ func writeGoroutineLeak(w io.Writer, debug int) error { } // Otherwise, write the goroutine leak profile. - return writeRuntimeProfile(w, debug, "goroutineleak", pprof_goroutineLeakProfileWithLabels) + return writeRuntimeProfile(w, debug, "goroutineleak", pprof_goroutineLeakProfile) } func writeGoroutineStacks(w io.Writer) error { @@ -798,24 +800,28 @@ func writeGoroutineStacks(w io.Writer) error { return err } -func writeRuntimeProfile(w io.Writer, debug int, name string, fetch func([]profilerecord.StackRecord, []unsafe.Pointer) (int, bool)) error { +type capturedStack interface { + GetStack() []uintptr + GetLabels() unsafe.Pointer + GetGoroutine() profilerecord.GoroutineRecord +} + +func writeRuntimeProfile[T capturedStack](w io.Writer, debug int, name string, fetch func([]T) (int, bool)) error { // Find out how many records there are (fetch(nil)), // allocate that many records, and get the data. // There's a race—more records might be added between // the two calls—so allocate a few extra records for safety // and also try again if we're very unlucky. // The loop should only execute one iteration in the common case. - var p []profilerecord.StackRecord - var labels []unsafe.Pointer - n, ok := fetch(nil, nil) + var p []T + n, ok := fetch(nil) for { // Allocate room for a slightly bigger profile, // in case a few more entries have been added // since the call to ThreadProfile. - p = make([]profilerecord.StackRecord, n+10) - labels = make([]unsafe.Pointer, n+10) - n, ok = fetch(p, labels) + p = make([]T, n+10) + n, ok = fetch(p) if ok { p = p[0:n] break @@ -823,17 +829,14 @@ func writeRuntimeProfile(w io.Writer, debug int, name string, fetch func([]profi // Profile grew; try again. } - return printCountProfile(w, debug, name, &runtimeProfile{p, labels}) + return printCountProfile(w, debug, name, runtimeProfile[T](p)) } -type runtimeProfile struct { - stk []profilerecord.StackRecord - labels []unsafe.Pointer -} +type runtimeProfile[T capturedStack] []T -func (p *runtimeProfile) Len() int { return len(p.stk) } -func (p *runtimeProfile) Stack(i int) []uintptr { return p.stk[i].Stack } -func (p *runtimeProfile) Label(i int) *labelMap { return (*labelMap)(p.labels[i]) } +func (p runtimeProfile[T]) Len() int { return len(p) } +func (p runtimeProfile[T]) Stack(i int) []uintptr { return p[i].GetStack() } +func (p runtimeProfile[T]) Goroutine(i int) profilerecord.GoroutineRecord { return p[i].GetGoroutine() } var cpu struct { sync.Mutex @@ -996,11 +999,11 @@ func writeProfileInternal(w io.Writer, debug int, name string, runtimeProfile fu return b.Flush() } -//go:linkname pprof_goroutineProfileWithLabels runtime.pprof_goroutineProfileWithLabels -func pprof_goroutineProfileWithLabels(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) +//go:linkname pprof_goroutineProfile runtime.pprof_goroutineProfile +func pprof_goroutineProfile(p []profilerecord.GoroutineRecord) (n int, ok bool) -//go:linkname pprof_goroutineLeakProfileWithLabels runtime.pprof_goroutineLeakProfileWithLabels -func pprof_goroutineLeakProfileWithLabels(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) +//go:linkname pprof_goroutineLeakProfile runtime.pprof_goroutineLeakProfile +func pprof_goroutineLeakProfile(p []profilerecord.GoroutineRecord) (n int, ok bool) //go:linkname pprof_cyclesPerSecond runtime/pprof.runtime_cyclesPerSecond func pprof_cyclesPerSecond() int64 diff --git a/src/runtime/pprof/pprof_test.go b/src/runtime/pprof/pprof_test.go index b816833e52285f..f9083e5f8d1bd9 100644 --- a/src/runtime/pprof/pprof_test.go +++ b/src/runtime/pprof/pprof_test.go @@ -1571,7 +1571,7 @@ func TestGoroutineProfileConcurrency(t *testing.T) { goroutineProf := Lookup("goroutine") profilerCalls := func(s string) int { - return strings.Count(s, "\truntime/pprof.runtime_goroutineProfileWithLabels+") + return strings.Count(s, "\truntime/pprof.runtime_goroutineProfile+") } includesFinalizerOrCleanup := func(s string) bool {