-
Notifications
You must be signed in to change notification settings - Fork 12.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SampleFDO] Improve stale profile matching by diff algorithm #87375
Changes from 4 commits
51c8adc
858b040
390efcc
40b03c9
10a9838
31dd272
d6978c4
5db6df8
30242b5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,6 +19,59 @@ | |
|
||
namespace llvm { | ||
|
||
// Callsite location based matching anchor. | ||
struct Anchor { | ||
LineLocation Loc; | ||
FunctionId FuncId; | ||
|
||
Anchor(const LineLocation &Loc, const FunctionId &FuncId) | ||
: Loc(Loc), FuncId(FuncId) {} | ||
Anchor(const LineLocation &Loc, StringRef &FName) : Loc(Loc), FuncId(FName) {} | ||
bool operator==(const Anchor &Other) const { | ||
return this->FuncId == Other.FuncId; | ||
} | ||
}; | ||
|
||
// This class implements the Myers diff algorithm used for stale profile | ||
// matching. The algorithm provides a simple and efficient way to find the | ||
// Longest Common Subsequence(LCS) or the Shortest Edit Script(SES) of two | ||
// sequences. For more details, refer to the paper 'An O(ND) Difference | ||
// Algorithm and Its Variations' by Eugene W. Myers. | ||
// In the scenario of profile fuzzy matching, the two sequences are the IR | ||
// callsite anchors and profile callsite anchors. The subsequence equivalent | ||
// parts from the resulting SES are used to remap the IR locations to the | ||
// profile locations. As the number of function callsite is usually not big, we | ||
// currently just implements the basic greedy version(page 6 of the paper). | ||
class MyersDiff { | ||
public: | ||
struct DiffResult { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I hope we can simplify this. The abstractions here seem unnecessary -- 1) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sounds good, removed this class |
||
LocToLocMap EqualLocations; | ||
#ifndef NDEBUG | ||
// New IR locations that are inserted in the new version. | ||
std::vector<LineLocation> Insertions; | ||
wlei-llvm marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// Old Profile locations that are deleted in the new version. | ||
std::vector<LineLocation> Deletions; | ||
#endif | ||
void addEqualLocations(const LineLocation &IRLoc, | ||
const LineLocation &ProfLoc) { | ||
EqualLocations.insert({IRLoc, ProfLoc}); | ||
} | ||
#ifndef NDEBUG | ||
void addInsertion(const LineLocation &IRLoc) { | ||
Insertions.push_back(IRLoc); | ||
} | ||
void addDeletion(const LineLocation &ProfLoc) { | ||
Deletions.push_back(ProfLoc); | ||
} | ||
#endif | ||
}; | ||
|
||
DiffResult shortestEditScript(const std::vector<Anchor> &A, | ||
const std::vector<Anchor> &B) const; | ||
}; | ||
|
||
using AnchorMap = std::map<LineLocation, Anchor>; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Removed the duplicated location, now the value is only the |
||
|
||
// Sample profile matching - fuzzy match. | ||
class SampleProfileMatcher { | ||
Module &M; | ||
|
@@ -27,8 +80,8 @@ class SampleProfileMatcher { | |
const ThinOrFullLTOPhase LTOPhase; | ||
SampleProfileMap FlattenedProfiles; | ||
// For each function, the matcher generates a map, of which each entry is a | ||
// mapping from the source location of current build to the source location in | ||
// the profile. | ||
// mapping from the source location of current build to the source location | ||
// in the profile. | ||
StringMap<LocToLocMap> FuncMappings; | ||
|
||
// Match state for an anchor/callsite. | ||
|
@@ -95,18 +148,13 @@ class SampleProfileMatcher { | |
return nullptr; | ||
} | ||
void runOnFunction(Function &F); | ||
void findIRAnchors(const Function &F, | ||
std::map<LineLocation, StringRef> &IRAnchors); | ||
void findProfileAnchors( | ||
const FunctionSamples &FS, | ||
std::map<LineLocation, std::unordered_set<FunctionId>> &ProfileAnchors); | ||
void findIRAnchors(const Function &F, AnchorMap &IRAnchors); | ||
void findProfileAnchors(const FunctionSamples &FS, AnchorMap &ProfileAnchors); | ||
// Record the callsite match states for profile staleness report, the result | ||
// is saved in FuncCallsiteMatchStates. | ||
void recordCallsiteMatchStates( | ||
const Function &F, const std::map<LineLocation, StringRef> &IRAnchors, | ||
const std::map<LineLocation, std::unordered_set<FunctionId>> | ||
&ProfileAnchors, | ||
const LocToLocMap *IRToProfileLocationMap); | ||
void recordCallsiteMatchStates(const Function &F, const AnchorMap &IRAnchors, | ||
const AnchorMap &ProfileAnchors, | ||
const LocToLocMap *IRToProfileLocationMap); | ||
|
||
bool isMismatchState(const enum MatchState &State) { | ||
return State == MatchState::InitialMismatch || | ||
|
@@ -143,11 +191,15 @@ class SampleProfileMatcher { | |
} | ||
void distributeIRToProfileLocationMap(); | ||
void distributeIRToProfileLocationMap(FunctionSamples &FS); | ||
void runStaleProfileMatching( | ||
const Function &F, const std::map<LineLocation, StringRef> &IRAnchors, | ||
const std::map<LineLocation, std::unordered_set<FunctionId>> | ||
&ProfileAnchors, | ||
LocToLocMap &IRToProfileLocationMap); | ||
LocToLocMap longestCommonSequence( | ||
const std::vector<Anchor> &IRCallsiteAnchors, | ||
const std::vector<Anchor> &ProfileCallsiteAnchors) const; | ||
void matchNonCallsiteLocsAndWriteResults(const LocToLocMap &AnchorMatchings, | ||
const AnchorMap &IRAnchors, | ||
LocToLocMap &IRToProfileLocationMap); | ||
void runStaleProfileMatching(const Function &F, const AnchorMap &IRAnchors, | ||
const AnchorMap &ProfileAnchors, | ||
LocToLocMap &IRToProfileLocationMap); | ||
void reportOrPersistProfileStats(); | ||
}; | ||
} // end namespace llvm | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why do we not compare
Loc
as well?Except for this special operator== that excludes Loc, this is essentially just a
typedef pair<LineLocation, FunctionId> Anchor
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see, changed to
using Anchor = std::pair<LineLocation, FunctionId>;
and removed this struct.