Skip to content

Commit

Permalink
profiletool: Add subcommand to check for similarity between two pro…
Browse files Browse the repository at this point in the history
…files.

This is meant to be used as part of a pipeline to refresh the profiles used
for PGO builds. This is used to quantify how similar the profiles are to
existing checked-in profiles. If they are similar enough, then the checked-in
profiles do not need to be updated.

PiperOrigin-RevId: 648888376
  • Loading branch information
EtiennePerot authored and gvisor-bot committed Nov 27, 2024
1 parent ac42faf commit d1fc19d
Show file tree
Hide file tree
Showing 2 changed files with 214 additions and 8 deletions.
2 changes: 1 addition & 1 deletion WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -3051,7 +3051,7 @@ go_repository(
name = "com_github_google_pprof",
importpath = "github.com/google/pprof",
sum = "h1:wORs2YN3R3ona/CXYuTvLM31QlgoNKHvlCNuArCDDCU=",
version = "v0.0.0-20221219190121-3cb0bae90811",
version = "v0.0.0-20240710211743-f6c9dda6c6da",
)

go_repository(
Expand Down
220 changes: 213 additions & 7 deletions tools/profiletool/profiletool.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,24 +19,30 @@ import (
"compress/gzip"
"errors"
"fmt"
"math"
"os"
"path/filepath"
"strings"

"github.com/google/pprof/profile"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/runsc/flag"
)

var (
mergeCmd = flag.NewFlagSet("merge", flag.ContinueOnError)
mergeOut = mergeCmd.String("out", "/dev/stdout", "file to write the merged profile to")
compactCmd = flag.NewFlagSet("compact", flag.ContinueOnError)
compactOut = compactCmd.String("out", "/dev/stdout", "file to write the compacted profile to")
mergeCmd = flag.NewFlagSet("merge", flag.ContinueOnError)
mergeOut = mergeCmd.String("out", "/dev/stdout", "file to write the merged profile to")
compactCmd = flag.NewFlagSet("compact", flag.ContinueOnError)
compactOut = compactCmd.String("out", "/dev/stdout", "file to write the compacted profile to")
checkSimilarCmd = flag.NewFlagSet("check-similar", flag.ContinueOnError)
checkSimilarQuiet = checkSimilarCmd.Bool("quiet", false, "if set, do not print any output; comparison result is still provided as exit code")
checkSimilarThreshold = checkSimilarCmd.Float64("threshold", 0.7, "threshold (between 0.0 and 1.0) above which the profiles are considered similar")

allCommands = []*flag.FlagSet{mergeCmd, compactCmd}
allCommands = []*flag.FlagSet{mergeCmd, compactCmd, checkSimilarCmd}
commandSet = map[*flag.FlagSet]string{
mergeCmd: "merge two or more profile files into one",
compactCmd: "minimize the size of a profile",
mergeCmd: "merge two or more profile files into one",
compactCmd: "minimize the size of a profile",
checkSimilarCmd: "check if two profiles are similar",
}
)

Expand Down Expand Up @@ -171,6 +177,202 @@ func writeMaxCompressionProfile(p *profile.Profile, out *os.File) error {
return nil
}

type comparisonKey struct {
Filename string
FunctionName string
SystemName string
LineFromFunctionStart int64
InlineTrace string
}

func (k comparisonKey) isGoRuntime() bool {
return strings.HasPrefix(k.FunctionName, "runtime.")
}

func (k comparisonKey) String() string {
return fmt.Sprintf("%s:%s:%s:%d:%s", k.Filename, k.FunctionName, k.SystemName, k.LineFromFunctionStart, k.InlineTrace)
}

type aggregateProfileData struct {
profile *profile.Profile
keys map[comparisonKey]float64
}

func aggregateProfile(p *profile.Profile) (aggregateProfileData, error) {
keysCount := make(map[comparisonKey]int64)
var lineKey strings.Builder
addLine := func(line *profile.Line) {
if lineKey.Len() > 0 {
lineKey.WriteString(";")
}
if line.Function != nil {
lineKey.WriteString(fmt.Sprintf("%s:%s:%d:%d", line.Function.Filename, line.Function.Name, line.Line-line.Function.StartLine, line.Column))
} else {
lineKey.WriteString(fmt.Sprintf("<unknown>:%d:%d", line.Line, line.Column))
}
}
sampleValueIndex := -1
for i, typ := range p.SampleType {
if typ.Type == "cpu" {
if sampleValueIndex != -1 {
return aggregateProfileData{}, errors.New("multiple cpu columns found in profile")
}
sampleValueIndex = i
}
}
if sampleValueIndex == -1 {
return aggregateProfileData{}, errors.New("no cpu data found in profile")
}
var total int64
for _, s := range p.Sample {
value := s.Value[sampleValueIndex]
stackHeight := len(s.Location)
for i := stackHeight - 1; i >= 0; i-- {
for _, loc := range s.Location[i : len(s.Location)-1] {
if len(loc.Line) == 0 {
continue
}
lastLine := loc.Line[len(loc.Line)-1]
if lastLine.Function == nil {
continue
}
key := comparisonKey{
Filename: lastLine.Function.Filename,
FunctionName: lastLine.Function.Name,
SystemName: lastLine.Function.SystemName,
LineFromFunctionStart: lastLine.Line - lastLine.Function.StartLine,
}
if len(loc.Line) > 1 {
lineKey.Reset()
for _, line := range loc.Line[:len(loc.Line)-1] {
addLine(&line)
}
key.InlineTrace = lineKey.String()
}
keysCount[key] += value
total += value
}
}
}
result := aggregateProfileData{
profile: p,
keys: make(map[comparisonKey]float64, len(keysCount)),
}
for key, count := range keysCount {
result.keys[key] = float64(count) / float64(total)
}
return result, nil
}

// computeSimilarityScore computes the similarity score between two profiles.
// This score is between 0.0 (profiles are completely different) and 1.0
// (profiles are identical).
func computeSimilarityScore(a, b *profile.Profile) (float64, error) {
aggA, err := aggregateProfile(a)
if err != nil {
return 0.0, fmt.Errorf("cannot aggregate profile A: %w", err)
}
aggB, err := aggregateProfile(b)
if err != nil {
return 0.0, fmt.Errorf("cannot aggregate profile B: %w", err)
}
if len(aggA.keys) == 0 || len(aggB.keys) == 0 {
return 0.0, errors.New("one or both profiles are empty")
}

// The scoring algorithm is as follows:
// Compute the union of all comparison keys for both profiles.
// For each such key, look at the frequency in A and in B.
// If a key is not found in a profile, its frequency is assumed to be
// zero.
// The error score for a key is the absolute difference between the
// frequencies in A and B.
// The total score is the sum of these differences across for all keys,
// divided by the sum of the frequencies for all keys; effectively a
// weighted-average of the non-overlap of samples weighted by their
// frequency.
// This is a number between 0.0 and 1.0, with 1.0 meaning completely
// different profiles. We flip this score at the very end to convert it
// from a measure of difference to a measure of similarity.
var totalFreq float64
var sum float64
for key, freqA := range aggA.keys {
if key.isGoRuntime() {
continue
}
if freqB, inB := aggB.keys[key]; inB {
log.Debugf("%v is in both profiles: %.2f%% vs %.2f%%", key, freqA*100.0, freqB*100.0)
sum += math.Abs(freqA - freqB)
totalFreq += max(freqA, freqB)
} else {
log.Debugf("%v is in A only: %.2f%%: %v", key, freqA*100.0)
sum += freqA
totalFreq += freqA
}
}
for key, freqB := range aggB.keys {
if key.isGoRuntime() {
continue
}
if _, inA := aggA.keys[key]; !inA {
log.Debugf("%v is in B only: %.2f%%: %v", key, freqB*100.0)
sum += freqB
totalFreq += freqB
}
}
return 1.0 - sum/totalFreq, nil
}

func checkSimilarProfiles() error {
if err := checkSimilarCmd.Parse(os.Args[2:]); err != nil {
return fmt.Errorf("invalid flags: %w", err)
}
if len(checkSimilarCmd.Args()) != 2 {
return errors.New("must provide exactly two profile names as positional arguments")
}

// Open both profiles.
profileAPath := checkSimilarCmd.Args()[0]
profileAFile, err := os.Open(profileAPath)
if err != nil {
return fmt.Errorf("cannot open %q: %w", profileAPath, err)
}
defer profileAFile.Close()
profileA, err := profile.Parse(profileAFile)
if err != nil {
return fmt.Errorf("cannot parse %q: %w", profileAPath, err)
}
profileA = profileA.Compact()
profileBPath := checkSimilarCmd.Args()[1]
profileBFile, err := os.Open(profileBPath)
if err != nil {
return fmt.Errorf("cannot open %q: %w", profileBPath, err)
}
defer profileBFile.Close()
profileB, err := profile.Parse(profileBFile)
if err != nil {
return fmt.Errorf("cannot parse %q: %w", profileBPath, err)
}
profileB = profileB.Compact()

// Check similarity.
similarScore, err := computeSimilarityScore(profileA, profileB)
if err != nil {
return fmt.Errorf("cannot compute similarity score: %w", err)
}
if !*checkSimilarQuiet {
if similarScore < *checkSimilarThreshold {
fmt.Fprintf(os.Stderr, "The profiles are %.2f%% similar, which is under the threshold of %.2f%%.\n", 100.0*similarScore, 100.0**checkSimilarThreshold)
} else {
fmt.Fprintf(os.Stderr, "The profiles are %.2f%% similar, which is above the threshold of %.2f%%.\n", 100.0*similarScore, 100.0**checkSimilarThreshold)
}
}
if similarScore < *checkSimilarThreshold {
os.Exit(1)
}
return nil
}

func main() {
if len(os.Args) < 2 {
printUsage()
Expand All @@ -185,6 +387,10 @@ func main() {
if err := compactProfile(); err != nil {
fail(err.Error())
}
case checkSimilarCmd.Name():
if err := checkSimilarProfiles(); err != nil {
fail(err.Error())
}
default:
printUsage()
os.Exit(1)
Expand Down

0 comments on commit d1fc19d

Please sign in to comment.