diff --git a/llvm/include/llvm/IR/StructuralHash.h b/llvm/include/llvm/IR/StructuralHash.h index 57fb45db84911..e2e192cc9501b 100644 --- a/llvm/include/llvm/IR/StructuralHash.h +++ b/llvm/include/llvm/IR/StructuralHash.h @@ -14,6 +14,7 @@ #ifndef LLVM_IR_STRUCTURALHASH_H #define LLVM_IR_STRUCTURALHASH_H +#include "llvm/ADT/StableHashing.h" #include namespace llvm { @@ -21,20 +22,18 @@ namespace llvm { class Function; class Module; -using IRHash = uint64_t; - /// Returns a hash of the function \p F. /// \param F The function to hash. /// \param DetailedHash Whether or not to encode additional information in the /// hash. The additional information added into the hash when this flag is set /// to true includes instruction and operand type information. -IRHash StructuralHash(const Function &F, bool DetailedHash = false); +stable_hash StructuralHash(const Function &F, bool DetailedHash = false); /// Returns a hash of the module \p M by hashing all functions and global /// variables contained within. \param M The module to hash. \param DetailedHash /// Whether or not to encode additional information in the function hashes that /// composed the module hash. -IRHash StructuralHash(const Module &M, bool DetailedHash = false); +stable_hash StructuralHash(const Module &M, bool DetailedHash = false); } // end namespace llvm diff --git a/llvm/lib/IR/StructuralHash.cpp b/llvm/lib/IR/StructuralHash.cpp index fb4f33a021a96..267a085c5af70 100644 --- a/llvm/lib/IR/StructuralHash.cpp +++ b/llvm/lib/IR/StructuralHash.cpp @@ -24,61 +24,93 @@ namespace { // by the MergeFunctions pass. class StructuralHashImpl { - uint64_t Hash = 4; + stable_hash Hash = 4; - void hash(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); } + bool DetailedHash; + + // This random value acts as a block header, as otherwise the partition of + // opcodes into BBs wouldn't affect the hash, only the order of the opcodes. + static constexpr stable_hash BlockHeaderHash = 45798; + static constexpr stable_hash FunctionHeaderHash = 0x62642d6b6b2d6b72; + static constexpr stable_hash GlobalHeaderHash = 23456; // This will produce different values on 32-bit and 64-bit systens as // hash_combine returns a size_t. However, this is only used for // detailed hashing which, in-tree, only needs to distinguish between // differences in functions. - template void hashArbitaryType(const T &V) { - hash(hash_combine(V)); + // TODO: This is not stable. + template stable_hash hashArbitaryType(const T &V) { + return hash_combine(V); } - void hashType(Type *ValueType) { - hash(ValueType->getTypeID()); + stable_hash hashType(Type *ValueType) { + SmallVector Hashes; + Hashes.emplace_back(ValueType->getTypeID()); if (ValueType->isIntegerTy()) - hash(ValueType->getIntegerBitWidth()); + Hashes.emplace_back(ValueType->getIntegerBitWidth()); + return stable_hash_combine(Hashes); } public: - StructuralHashImpl() = default; - - void updateOperand(Value *Operand) { - hashType(Operand->getType()); - - // The cases enumerated below are not exhaustive and are only aimed to - // get decent coverage over the function. - if (ConstantInt *ConstInt = dyn_cast(Operand)) { - hashArbitaryType(ConstInt->getValue()); - } else if (ConstantFP *ConstFP = dyn_cast(Operand)) { - hashArbitaryType(ConstFP->getValue()); - } else if (Argument *Arg = dyn_cast(Operand)) { - hash(Arg->getArgNo()); - } else if (Function *Func = dyn_cast(Operand)) { + StructuralHashImpl() = delete; + explicit StructuralHashImpl(bool DetailedHash) : DetailedHash(DetailedHash) {} + + stable_hash hashConstant(Constant *C) { + SmallVector Hashes; + // TODO: hashArbitaryType() is not stable. + if (ConstantInt *ConstInt = dyn_cast(C)) { + Hashes.emplace_back(hashArbitaryType(ConstInt->getValue())); + } else if (ConstantFP *ConstFP = dyn_cast(C)) { + Hashes.emplace_back(hashArbitaryType(ConstFP->getValue())); + } else if (Function *Func = dyn_cast(C)) { // Hashing the name will be deterministic as LLVM's hashing infrastructure // has explicit support for hashing strings and will not simply hash // the pointer. - hashArbitaryType(Func->getName()); + Hashes.emplace_back(hashArbitaryType(Func->getName())); } + + return stable_hash_combine(Hashes); + } + + stable_hash hashValue(Value *V) { + // Check constant and return its hash. + Constant *C = dyn_cast(V); + if (C) + return hashConstant(C); + + // Hash argument number. + SmallVector Hashes; + if (Argument *Arg = dyn_cast(V)) + Hashes.emplace_back(Arg->getArgNo()); + + return stable_hash_combine(Hashes); } - void updateInstruction(const Instruction &Inst, bool DetailedHash) { - hash(Inst.getOpcode()); + stable_hash hashOperand(Value *Operand) { + SmallVector Hashes; + Hashes.emplace_back(hashType(Operand->getType())); + Hashes.emplace_back(hashValue(Operand)); + return stable_hash_combine(Hashes); + } + + stable_hash hashInstruction(const Instruction &Inst) { + SmallVector Hashes; + Hashes.emplace_back(Inst.getOpcode()); if (!DetailedHash) - return; + return stable_hash_combine(Hashes); - hashType(Inst.getType()); + Hashes.emplace_back(hashType(Inst.getType())); // Handle additional properties of specific instructions that cause // semantic differences in the IR. if (const auto *ComparisonInstruction = dyn_cast(&Inst)) - hash(ComparisonInstruction->getPredicate()); + Hashes.emplace_back(ComparisonInstruction->getPredicate()); for (const auto &Op : Inst.operands()) - updateOperand(Op); + Hashes.emplace_back(hashOperand(Op)); + + return stable_hash_combine(Hashes); } // A function hash is calculated by considering only the number of arguments @@ -97,15 +129,17 @@ class StructuralHashImpl { // expensive checks for pass modification status). When modifying this // function, most changes should be gated behind an option and enabled // selectively. - void update(const Function &F, bool DetailedHash) { + void update(const Function &F) { // Declarations don't affect analyses. if (F.isDeclaration()) return; - hash(0x62642d6b6b2d6b72); // Function header + SmallVector Hashes; + Hashes.emplace_back(Hash); + Hashes.emplace_back(FunctionHeaderHash); - hash(F.isVarArg()); - hash(F.arg_size()); + Hashes.emplace_back(F.isVarArg()); + Hashes.emplace_back(F.arg_size()); SmallVector BBs; SmallPtrSet VisitedBBs; @@ -118,17 +152,17 @@ class StructuralHashImpl { while (!BBs.empty()) { const BasicBlock *BB = BBs.pop_back_val(); - // This random value acts as a block header, as otherwise the partition of - // opcodes into BBs wouldn't affect the hash, only the order of the - // opcodes - hash(45798); + Hashes.emplace_back(BlockHeaderHash); for (auto &Inst : *BB) - updateInstruction(Inst, DetailedHash); + Hashes.emplace_back(hashInstruction(Inst)); for (const BasicBlock *Succ : successors(BB)) if (VisitedBBs.insert(Succ).second) BBs.push_back(Succ); } + + // Update the combined hash in place. + Hash = stable_hash_combine(Hashes); } void update(const GlobalVariable &GV) { @@ -137,15 +171,20 @@ class StructuralHashImpl { // we ignore anything with the `.llvm` prefix if (GV.isDeclaration() || GV.getName().starts_with("llvm.")) return; - hash(23456); // Global header - hash(GV.getValueType()->getTypeID()); + SmallVector Hashes; + Hashes.emplace_back(Hash); + Hashes.emplace_back(GlobalHeaderHash); + Hashes.emplace_back(GV.getValueType()->getTypeID()); + + // Update the combined hash in place. + Hash = stable_hash_combine(Hashes); } - void update(const Module &M, bool DetailedHash) { + void update(const Module &M) { for (const GlobalVariable &GV : M.globals()) update(GV); for (const Function &F : M) - update(F, DetailedHash); + update(F); } uint64_t getHash() const { return Hash; } @@ -153,14 +192,14 @@ class StructuralHashImpl { } // namespace -IRHash llvm::StructuralHash(const Function &F, bool DetailedHash) { - StructuralHashImpl H; - H.update(F, DetailedHash); +stable_hash llvm::StructuralHash(const Function &F, bool DetailedHash) { + StructuralHashImpl H(DetailedHash); + H.update(F); return H.getHash(); } -IRHash llvm::StructuralHash(const Module &M, bool DetailedHash) { - StructuralHashImpl H; - H.update(M, DetailedHash); +stable_hash llvm::StructuralHash(const Module &M, bool DetailedHash) { + StructuralHashImpl H(DetailedHash); + H.update(M); return H.getHash(); } diff --git a/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/llvm/lib/Transforms/IPO/MergeFunctions.cpp index b50a700e09038..ad16b0b350149 100644 --- a/llvm/lib/Transforms/IPO/MergeFunctions.cpp +++ b/llvm/lib/Transforms/IPO/MergeFunctions.cpp @@ -172,14 +172,14 @@ namespace { class FunctionNode { mutable AssertingVH F; - IRHash Hash; + stable_hash Hash; public: // Note the hash is recalculated potentially multiple times, but it is cheap. FunctionNode(Function *F) : F(F), Hash(StructuralHash(*F)) {} Function *getFunc() const { return F; } - IRHash getHash() const { return Hash; } + stable_hash getHash() const { return Hash; } /// Replace the reference to the function F by the function G, assuming their /// implementations are equal. @@ -420,7 +420,7 @@ bool MergeFunctions::runOnModule(Module &M) { // All functions in the module, ordered by hash. Functions with a unique // hash value are easily eliminated. - std::vector> HashedFuncs; + std::vector> HashedFuncs; for (Function &Func : M) { if (isEligibleForMerging(Func)) { HashedFuncs.push_back({StructuralHash(Func), &Func}); diff --git a/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll b/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll index e7718ca84d316..0ceb363a67b1f 100644 --- a/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll +++ b/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll @@ -63,6 +63,14 @@ lpad: resume { ptr, i32 } zeroinitializer } +define i8 @call_with_same_range() { +; CHECK-LABEL: @call_with_same_range +; CHECK: tail call i8 @call_with_range + bitcast i8 0 to i8 + %out = call i8 @dummy(), !range !0 + ret i8 %out +} + define i8 @invoke_with_same_range() personality ptr undef { ; CHECK-LABEL: @invoke_with_same_range() ; CHECK: tail call i8 @invoke_with_range() @@ -76,15 +84,6 @@ lpad: resume { ptr, i32 } zeroinitializer } -define i8 @call_with_same_range() { -; CHECK-LABEL: @call_with_same_range -; CHECK: tail call i8 @call_with_range - bitcast i8 0 to i8 - %out = call i8 @dummy(), !range !0 - ret i8 %out -} - - declare i8 @dummy(); declare i32 @__gxx_personality_v0(...)