diff --git a/include/retdec/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter/labels_handler.h b/include/retdec/llvmir2hll/llvm/llvmir2bir_converters/new_llvmir2bir_converter/labels_handler.h similarity index 83% rename from include/retdec/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter/labels_handler.h rename to include/retdec/llvmir2hll/llvm/llvmir2bir_converters/new_llvmir2bir_converter/labels_handler.h index 008f556dfb..7112e93b09 100644 --- a/include/retdec/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter/labels_handler.h +++ b/include/retdec/llvmir2hll/llvm/llvmir2bir_converters/new_llvmir2bir_converter/labels_handler.h @@ -1,11 +1,11 @@ /** -* @file include/retdec/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter/labels_handler.h +* @file include/retdec/llvmir2hll/llvm/llvmir2bir_converters/new_llvmir2bir_converter/labels_handler.h * @brief Handling of labels during conversion of LLVM IR to BIR. * @copyright (c) 2017 Avast Software, licensed under the MIT license */ -#ifndef RETDEC_LLVMIR2HLL_LLVM_LLVMIR2BIR_CONVERTERS_ORIG_LLVMIR2BIR_CONVERTER_LABELS_HANDLER_H -#define RETDEC_LLVMIR2HLL_LLVM_LLVMIR2BIR_CONVERTERS_ORIG_LLVMIR2BIR_CONVERTER_LABELS_HANDLER_H +#ifndef RETDEC_LLVMIR2HLL_LLVM_LLVMIR2BIR_CONVERTERS_NEW_LLVMIR2BIR_CONVERTER_LABELS_HANDLER_H +#define RETDEC_LLVMIR2HLL_LLVM_LLVMIR2BIR_CONVERTERS_NEW_LLVMIR2BIR_CONVERTER_LABELS_HANDLER_H #include diff --git a/include/retdec/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter.h b/include/retdec/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter.h deleted file mode 100644 index c94b1ceb8e..0000000000 --- a/include/retdec/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter.h +++ /dev/null @@ -1,172 +0,0 @@ -/** -* @file include/retdec/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter.h -* @brief The original converter of LLVM IR into BIR. -* @copyright (c) 2017 Avast Software, licensed under the MIT license -*/ - -#ifndef RETDEC_LLVMIR2HLL_LLVM_LLVMIR2BIR_CONVERTERS_ORIG_LLVMIR2BIR_CONVERTER_H -#define RETDEC_LLVMIR2HLL_LLVM_LLVMIR2BIR_CONVERTERS_ORIG_LLVMIR2BIR_CONVERTER_H - -#include -#include - -#include - -#include "retdec/llvmir2hll/llvm/llvmir2bir_converter.h" -#include "retdec/llvmir2hll/support/smart_ptr.h" -#include "retdec/llvmir2hll/support/types.h" - -namespace llvm { - -class Loop; -class Pass; - -} // namespace llvm - -namespace retdec { -namespace llvmir2hll { - -class GotoStmt; -class LLVMBranchInfo; -class LLVMConverter; -class LabelsHandler; -class Statement; -class Value; -class VarsHandler; - -/** -* @brief The original converter of LLVM IR into BIR. -* -* Instances of this class have reference object semantics. -*/ -class OrigLLVMIR2BIRConverter: public LLVMIR2BIRConverter, - private llvm::InstVisitor> { -public: - static ShPtr create(llvm::Pass *basePass); - - virtual std::string getId() const override; - virtual ShPtr convert(llvm::Module *llvmModule, - const std::string &moduleName, ShPtr semantics, - ShPtr config, bool enableDebug) override; - -private: - /// Mapping of basic blocks to how many times they have been processed. - using BBProcessedMap = std::map; - - /// Goto statements to be patched into a basic block. - // Note: To make the converted deterministic, we need to use a vector - // instead of a map. - using GotoStmtsToPatch = std::vector, - llvm::BasicBlock *>>; - - /// Mapping of basic blocks to statements in BIR. - using BBStmtMap = std::map>; - -private: - OrigLLVMIR2BIRConverter(llvm::Pass *basePass); - - // Instruction visitation functions. - friend class llvm::InstVisitor>; - ShPtr visitReturnInst(llvm::ReturnInst &i); - ShPtr visitBranchInst(llvm::BranchInst &i); - ShPtr visitSwitchInst(llvm::SwitchInst &i); - ShPtr visitCallInst(llvm::CallInst &i); - ShPtr visitGetElementPtrInst(llvm::GetElementPtrInst &i); - ShPtr visitLoadInst(llvm::LoadInst &i); - ShPtr visitStoreInst(llvm::StoreInst &i); - ShPtr visitAllocaInst(llvm::AllocaInst &i); - ShPtr visitCastInst(llvm::CastInst &i); - ShPtr visitInsertValueInst(llvm::InsertValueInst &i); - ShPtr visitExtractValueInst(llvm::ExtractValueInst &i); - ShPtr visitUnreachableInst(llvm::UnreachableInst &i); - ShPtr visitInstruction(llvm::Instruction &i); - - void visitAndAddFunctions(); - void visitAndAddGlobalVariables(); - void visitAndAddFunctionDeclarations(); - void visitAndAddFunction(llvm::Function &f, bool onlyDeclaration = false); - - void makeIdentifiersValid(); - void generateMissingStatements(ShPtr funcBody); - bool isBBMissingStatements(llvm::BasicBlock *bb); - void generateMissingStatementsForBB(ShPtr funcBody, - llvm::BasicBlock *bb); - - void addGotoStmtToPatch(ShPtr gotoStmt, llvm::BasicBlock *bb); - void patchTargetsOfGotoStmts(); - void setGotoTargetLabel(ShPtr target, - const llvm::BasicBlock *targetBB); - - ShPtr visitBasicBlockOrLoop(llvm::BasicBlock *bb, - bool genTerm = true); - ShPtr visitBasicBlock(llvm::BasicBlock *bb, bool genTerm = true); - ShPtr visitLoop(llvm::Loop *l); - - VarVector getFunctionParams(llvm::Function &f); - ShPtr getFunctionBody(llvm::Function &f); - ShPtr getDefaultSwitchBlock(llvm::BasicBlock *bb, - llvm::BasicBlock *succ); - ShPtr getSwitchCaseExpression(llvm::Value *v); - ShPtr getLoopEnd(llvm::BasicBlock *currBB, - llvm::BasicBlock *loopHeader, llvm::BasicBlock *loopEnd, - llvm::Value *cond, bool isCondNegated = false, - bool justPHICopies = false); - llvm::Value *getInitialValueOfIndVar(const llvm::Loop *l) const; - ShPtr getPHICopiesForSuccessor(llvm::BasicBlock *currBB, - llvm::BasicBlock *succ) const; - ShPtr generateGotoForConditionalBranch(llvm::BasicBlock *bb1, - llvm::BasicBlock *bb2, llvm::BasicBlock *source, llvm::Value *cond, - bool negateCond = false); - - static void addStatementToStatementBlock(ShPtr stmtToAdd, - ShPtr &firstStmt, ShPtr &prevStmt); - static ShPtr addDebugCommentToStatement( - ShPtr stmt, std::string debugComment); - -private: - /// The input LLVM module. - llvm::Module *llvmModule; - - /// The resulting module in BIR. - ShPtr resModule; - - /// Should debugging messages be emitted? - bool enableDebug; - - /// Handler of variables created during decompilation. - ShPtr varsHandler; - - /// Type and values converter. - ShPtr converter; - - /// Supportive information about branches and loops. - ShPtr branchInfo; - - /// Handler of labels. - UPtr labelsHandler; - - /// Mapping of basic blocks to how many times they have been processed. - BBProcessedMap processedBBs; - - /// Mapping of basic blocks to the first statement in the - /// corresponding basic block in BIR. - BBStmtMap bbStmtMap; - - /// Goto statements whose target has to be patched after the current - /// function is converted. - GotoStmtsToPatch gotoStmtsToPatch; - - /// Basic block corresponding to the currently generated loop. - llvm::BasicBlock *currLoopBB; - - /// Basic block which should be generated after the current loop. - llvm::BasicBlock *lastLoopExitBB; - - /// Are we generating a switch statement? - bool generatingSwitchStmt; -}; - -} // namespace llvmir2hll -} // namespace retdec - -#endif diff --git a/include/retdec/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter/llvm_branch_info.h b/include/retdec/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter/llvm_branch_info.h deleted file mode 100644 index 9908b6825a..0000000000 --- a/include/retdec/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter/llvm_branch_info.h +++ /dev/null @@ -1,143 +0,0 @@ -/** -* @file include/retdec/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter/llvm_branch_info.h -* @brief Supportive information about LLVM branches and loops. -* @copyright (c) 2017 Avast Software, licensed under the MIT license -*/ - -#ifndef RETDEC_LLVMIR2HLL_LLVM_LLVMIR2BIR_CONVERTERS_ORIG_LLVMIR2BIR_CONVERTER_LLVM_BRANCH_INFO_H -#define RETDEC_LLVMIR2HLL_LLVM_LLVMIR2BIR_CONVERTERS_ORIG_LLVMIR2BIR_CONVERTER_LLVM_BRANCH_INFO_H - -#include -#include -#include -#include -#include -#include - -#include "retdec/llvmir2hll/support/smart_ptr.h" -#include "retdec/utils/non_copyable.h" - -namespace llvm { - -class BasicBlock; -class Loop; -class LoopInfo; -class Module; -class SwitchInst; - -} // namespace llvm - -namespace retdec { -namespace llvmir2hll { - -class ConstInt; -class LLVMConverter; -class Module; -class Statement; -class VarsHandler; - -/** -* @brief Supportive information about LLVM branches and loops. -* -* This class contains supportive information about LLVM branches and loops. For -* example, it can compute the common branch destination of two basic blocks, -* which comes handy when generating if statements. It also provides several -* functions regarding loops. -* -* Before an instance of this class can be used, the init() member function has -* to be called. More specifically, this member function has to be called -* whenever a new function is being decompiled. -* -* Instances of this class have reference object semantics. This class is not -* meant to be subclassed. -*/ -class LLVMBranchInfo final: private retdec::utils::NonCopyable { -public: - LLVMBranchInfo(ShPtr converter, - ShPtr varsHandler); - ~LLVMBranchInfo(); - - void init(llvm::LoopInfo *loopInfo); - - /// @name Manipulation with branch stack. - /// @{ - void branchStackPush(llvm::BasicBlock *bb); - llvm::BasicBlock *branchStackTop() const; - std::size_t branchStackSize() const; - void branchStackPop(); - /// @} - - /// @name Supportive information for loops. - /// @{ - bool generatingLoop() const; - void startGeneratingLoop(llvm::Loop *l); - void endGeneratingLoop(); - - bool isLoopHeader(llvm::BasicBlock *bb, llvm::Loop *loop) const; - bool isOptimizableToForLoop(const llvm::Loop *l) const; - bool isSuccHeaderOfInnerLoop(llvm::BasicBlock *bb, - llvm::BasicBlock *succ) const; - bool isSuccHeaderOfOuterLoop(llvm::BasicBlock *bb, - llvm::BasicBlock *succ) const; - - ShPtr getTripCount(const llvm::Loop *l) const; - llvm::Loop *getLoopFor(const llvm::BasicBlock *bb) const; - /// }@ - - /// @name Supportive information for branches. - /// @{ - llvm::BasicBlock *findCommonBranchDestination(llvm::BasicBlock *bb1, - llvm::BasicBlock *bb2) const; - llvm::BasicBlock *findCommonSwitchDestination(llvm::SwitchInst *si) const; - bool isGotoNecessary(llvm::BasicBlock *srcBB, - llvm::BasicBlock *dstBB) const; - bool isAccessible(llvm::BasicBlock *bb, llvm::BasicBlock *from) const; - /// }@ - -private: - /// Set of basic blocks. - using BBSet = std::set; - - /// Queue for BFS (breadth-first search) traversals. - using BFSQueue = std::queue; - - /// Stack of loops that are being generated (this is needed to properly - /// generate loop bodies). - // Note to developers: We use std::deque instead of std::stack because - // std::deque allows us to access the second topmost - // symbol in an easier way. - using LoopStack = std::deque; - - /// Stack of basic blocks to remember common branch destinations when - /// generating nested if-else if statements. - using BranchStack = std::stack; - -private: - void processAndPushBasicBlock(llvm::BasicBlock *poppedBB, - BFSQueue &bfsQueue, BBSet &bfsProcessedBBs) const; - bool isAccessibleImpl(llvm::BasicBlock *bb, llvm::BasicBlock *from, - BBSet &visitedBlocks) const; - -private: - /// Information about loops. - llvm::LoopInfo *loopInfo; - - /// Type and values converter. - ShPtr converter; - - /// Handler of variables created during decompilation. - ShPtr varsHandler; - - /// Stack of loops that are being generated (this is needed to properly - /// generate loop bodies). - LoopStack loopStack; - - /// Stack of basic blocks to remember common branch destinations when - /// generating nested if-else if statements. - BranchStack branchStack; -}; - -} // namespace llvmir2hll -} // namespace retdec - -#endif diff --git a/include/retdec/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter/llvm_converter.h b/include/retdec/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter/llvm_converter.h deleted file mode 100644 index 5c94ac5ddd..0000000000 --- a/include/retdec/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter/llvm_converter.h +++ /dev/null @@ -1,139 +0,0 @@ -/** -* @file include/retdec/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter/llvm_converter.h -* @brief A converter from LLVM values to values in the backend IR. -* @copyright (c) 2017 Avast Software, licensed under the MIT license -*/ - -#ifndef RETDEC_LLVMIR2HLL_LLVM_LLVMIR2BIR_CONVERTERS_ORIG_LLVMIR2BIR_CONVERTER_LLVM_CONVERTER_H -#define RETDEC_LLVMIR2HLL_LLVM_LLVMIR2BIR_CONVERTERS_ORIG_LLVMIR2BIR_CONVERTER_LLVM_CONVERTER_H - -#include - -#include -#include -#include - -#include "retdec/llvmir2hll/support/smart_ptr.h" -#include "retdec/utils/non_copyable.h" - -namespace llvm { - -class ConstantArray; -class ConstantDataArray; -class ConstantDataSequential; -class FunctionType; -class Module; -class Type; -class Value; - -} // namespace llvm - -namespace retdec { -namespace llvmir2hll { - -class AssignStmt; -class CallStmt; -class ConstArray; -class ConstString; -class Constant; -class Expression; -class FunctionType; -class Module; -class Statement; -class TernaryOpExpr; -class Type; -class Value; -class VarsHandler; - -/** -* @brief A converter from LLVM values to values in the backend IR. -* -* Instances of this class have reference object semantics. This class is not -* meant to be subclassed. -*/ -class LLVMConverter final: private llvm::InstVisitor>, - private retdec::utils::NonCopyable { -public: - LLVMConverter(llvm::Module *module, ShPtr resModule, - ShPtr varsHandler); - ~LLVMConverter(); - - // The default value of llvmSigned determines whether integers are signed - // or unsigned by default. - ShPtr llvmTypeToType(llvm::Type *llvmType, bool llvmSigned = true); - ShPtr llvmFunctionTypeToFunctionType(llvm::FunctionType *llvmType); - ShPtr llvmConstantToExpression(llvm::Constant *c); - ShPtr llvmConstantDataSequentialToConstArray(llvm::ConstantDataSequential *cds); - ShPtr llvmValueToExpression(llvm::Value *v); - ShPtr llvmValueToExpressionDeref(llvm::Value *v); - ShPtr llvmConstantArrayToConstArray(llvm::ConstantArray *ca); - ShPtr llvmConstantArrayToExpression(llvm::ConstantArray *ca); - ShPtr llvmLoadInstToExpression(llvm::LoadInst &i); - ShPtr llvmStoreInstToAssignStmt(llvm::StoreInst &i); - ShPtr llvmSelectInstToTernaryOp(llvm::SelectInst &i); - ShPtr llvmBinaryOperatorToExpression(llvm::Instruction &i); - ShPtr llvmICmpInstToExpression(llvm::ICmpInst &i); - ShPtr llvmFCmpInstToExpression(llvm::FCmpInst &i); - ShPtr llvmInsertValueInstToStatement(llvm::InsertValueInst &i); - ShPtr llvmExtractValueInstToExpression(llvm::ExtractValueInst &i); - ShPtr generateAccessesToCompositeType(llvm::CompositeType *ct, - llvm::ArrayRef indices, ShPtr base); - ShPtr llvmCallInstToCallStmt(llvm::CallInst &i); - ShPtr llvmGEPInstToExpression(llvm::GetElementPtrInst &i); - ShPtr llvmAllocaInstToExpression(llvm::AllocaInst &i); - ShPtr llvmInstructionToValue(llvm::Instruction &i); - ShPtr llvmReturnInstToReturnStmt(llvm::ReturnInst &i); - - ShPtr getInitializer(llvm::GlobalVariable *v); - ShPtr getDefaultInitializer(llvm::Type *t); - - /// @name Options - /// @{ - void setOptionStrictFPUSemantics(bool strict = true); - /// @} - -private: - ShPtr llvmValueToExpressionInternal(llvm::Value *v); - ShPtr llvmGEPExpressionToExpressionInternal(llvm::Value *ptr, - llvm::gep_type_iterator i, llvm::gep_type_iterator e); - ShPtr llvmFCmpBinInstToExpressionStrictFPUSemantics(llvm::FCmpInst &i); - ShPtr llvmFCmpBinInstToExpressionNonStrictFPUSemantics(llvm::FCmpInst &i); - - // Instruction visitation functions. - friend class llvm::InstVisitor>; - ShPtr visitBinaryOperator(llvm::Instruction &i); - ShPtr visitCastInst(llvm::CastInst &i); - ShPtr visitCallInst(llvm::CallInst &i); - ShPtr visitAllocaInst(llvm::AllocaInst &i); - ShPtr visitGetElementPtrInst(llvm::GetElementPtrInst &i); - ShPtr visitInsertValueInst(llvm::InsertValueInst &i); - ShPtr visitExtractValueInst(llvm::ExtractValueInst &i); - ShPtr visitICmpInst(llvm::ICmpInst &i); - ShPtr visitFCmpInst(llvm::FCmpInst &i); - ShPtr visitLoadInst(llvm::LoadInst &i); - ShPtr visitStoreInst(llvm::StoreInst &i); - ShPtr visitSelectInst(llvm::SelectInst &i); - ShPtr visitInstruction(llvm::Instruction &i); - -private: - /// The currently processed LLVM module. - llvm::Module *module; - - /// The resulting module in our IR. - ShPtr resModule; - - /// Handler of variables created during decompilation. - ShPtr varsHandler; - - /// Mapping of an LLVM type into a type in our IR. - std::map> llvmTypeToTypeMap; - std::map> llvmTypeToSignedTypeMap; - - /// Use strict FPU semantics? - bool optionStrictFPUSemantics; -}; - -} // namespace llvmir2hll -} // namespace retdec - -#endif diff --git a/include/retdec/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter/vars_handler.h b/include/retdec/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter/vars_handler.h deleted file mode 100644 index b056500763..0000000000 --- a/include/retdec/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter/vars_handler.h +++ /dev/null @@ -1,85 +0,0 @@ -/** -* @file include/retdec/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter/vars_handler.h -* @brief Handling of variables created during decompilation. -* @copyright (c) 2017 Avast Software, licensed under the MIT license -*/ - -#ifndef RETDEC_LLVMIR2HLL_LLVM_LLVMIR2BIR_CONVERTERS_ORIG_LLVMIR2BIR_CONVERTER_VARS_HANDLER_H -#define RETDEC_LLVMIR2HLL_LLVM_LLVMIR2BIR_CONVERTERS_ORIG_LLVMIR2BIR_CONVERTER_VARS_HANDLER_H - -#include - -#include "retdec/llvmir2hll/support/smart_ptr.h" -#include "retdec/llvmir2hll/support/types.h" -#include "retdec/llvmir2hll/var_name_gen/var_name_gen.h" -#include "retdec/utils/non_copyable.h" - -namespace llvm { - -class Type; -class Value; - -} // namespace llvm - -namespace retdec { -namespace llvmir2hll { - -class Module; -class Variable; - -/** -* @brief Handler of variables created during decompilation. -* -* Instances of this class have reference object semantics. This class is not -* meant to be subclassed. -*/ -class VarsHandler final: private retdec::utils::NonCopyable { -public: - VarsHandler(ShPtr resModule, ShPtr varNameGen); - ~VarsHandler(); - - void startConvertingGlobalVars(); - void stopConvertingGlobalVars(); - void reset(); - - ShPtr getVariableByName(const std::string &varName); - std::string getValueName(const llvm::Value *v); - - void addLocalVar(ShPtr var); - bool localVarExists(const std::string &varName) const; - VarSet getLocalVars() const; - - void addAllocatedVarType(llvm::Value *var, llvm::Type *varType); - llvm::Type *getAllocatedVarType(llvm::Value *var) const; - -private: - /// Mapping of a string into a variable. - using StringVarMap = std::map>; - -private: - /// The resulting module in our IR. - ShPtr resModule; - - /// Variable names generator. - ShPtr varNameGen; - - /// Naming of unnamed local variables. - std::map anonVarNames; - - /// Mapping between a local variable's name and the actual variable (or - /// the null pointer if the variable hasn't been defined yet). Function - /// parameters are also included. - StringVarMap localVars; - - /// Mapping between a local variable allocated by an alloca instruction - /// and its type. - std::map allocatedVarTypes; - - /// Are we converting global variables? - bool convertingGlobalVars; -}; - -} // namespace llvmir2hll -} // namespace retdec - -#endif diff --git a/scripts/retdec-decompiler.py b/scripts/retdec-decompiler.py index 26c050b232..b498f395e6 100644 --- a/scripts/retdec-decompiler.py +++ b/scripts/retdec-decompiler.py @@ -166,7 +166,7 @@ def parse_args(args): parser.add_argument('--backend-llvmir2bir-converter', dest='backend_llvmir2bir_converter', default='new', - choices=['orig', 'new'], + choices=['new'], help='Name of the converter from LLVM IR to BIR.') parser.add_argument('--backend-no-compound-operators', diff --git a/src/llvmir2hll/CMakeLists.txt b/src/llvmir2hll/CMakeLists.txt index 94db86000f..3f9e40940c 100644 --- a/src/llvmir2hll/CMakeLists.txt +++ b/src/llvmir2hll/CMakeLists.txt @@ -138,6 +138,7 @@ set(LLVMIR2HLL_SOURCES llvm/llvmir2bir_converters/new_llvmir2bir_converter.cpp llvm/llvmir2bir_converters/new_llvmir2bir_converter/basic_block_converter.cpp llvm/llvmir2bir_converters/new_llvmir2bir_converter/cfg_node.cpp + llvm/llvmir2bir_converters/new_llvmir2bir_converter/labels_handler.cpp llvm/llvmir2bir_converters/new_llvmir2bir_converter/llvm_constant_converter.cpp llvm/llvmir2bir_converters/new_llvmir2bir_converter/llvm_fcmp_converter.cpp llvm/llvmir2bir_converters/new_llvmir2bir_converter/llvm_instruction_converter.cpp @@ -145,11 +146,6 @@ set(LLVMIR2HLL_SOURCES llvm/llvmir2bir_converters/new_llvmir2bir_converter/llvm_value_converter.cpp llvm/llvmir2bir_converters/new_llvmir2bir_converter/structure_converter.cpp llvm/llvmir2bir_converters/new_llvmir2bir_converter/variables_manager.cpp - llvm/llvmir2bir_converters/orig_llvmir2bir_converter.cpp - llvm/llvmir2bir_converters/orig_llvmir2bir_converter/labels_handler.cpp - llvm/llvmir2bir_converters/orig_llvmir2bir_converter/llvm_branch_info.cpp - llvm/llvmir2bir_converters/orig_llvmir2bir_converter/llvm_converter.cpp - llvm/llvmir2bir_converters/orig_llvmir2bir_converter/vars_handler.cpp llvm/string_conversions.cpp obtainer/call_info_obtainer.cpp obtainer/call_info_obtainers/optim_call_info_obtainer.cpp @@ -308,29 +304,6 @@ set(LLVMIR2HLL_SOURCES var_renamer/var_renamers/unified_var_renamer.cpp ) -# Build of the original LLVMIR2BIR converter fails on GCC 6.1 with the -# following internal error: -# -# from orig_llvmir2bir_converter.cpp:57: -# include/llvm/IR/InstVisitor.h: In member function llvm::InstVisitor::visitInvokeInst(...): -# include/llvm/IR/InstVisitor.h:244:7: internal compiler error: in assign_temp, at function.c:961 -# DELEGATE(Instruction); -# ^ -# -# It seems to be related to the following bug report: -# -# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69241 -# -# Although the bug report says that this issue has already been fixed, GCC -# 6.1.1 still fails (checked on 2016-05-03). A suggested workaround is to -# use -O1 instead of -O2 when building with GCC >= 6.1. -if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 6) - set_source_files_properties( - "${CMAKE_CURRENT_SOURCE_DIR}/llvm/llvmir2bir_converters/orig_llvmir2bir_converter.cpp" - PROPERTIES COMPILE_FLAGS "-O1" - ) -endif() - add_library(retdec-llvmir2hll STATIC ${LLVMIR2HLL_SOURCES}) target_link_libraries(retdec-llvmir2hll retdec-config retdec-utils retdec-llvm-support llvm) target_include_directories(retdec-llvmir2hll PUBLIC ${PROJECT_SOURCE_DIR}/include/) diff --git a/src/llvmir2hll/llvm/llvmir2bir_converters/new_llvmir2bir_converter/basic_block_converter.cpp b/src/llvmir2hll/llvm/llvmir2bir_converters/new_llvmir2bir_converter/basic_block_converter.cpp index 08aafaf1b8..cab48497c5 100644 --- a/src/llvmir2hll/llvm/llvmir2bir_converters/new_llvmir2bir_converter/basic_block_converter.cpp +++ b/src/llvmir2hll/llvm/llvmir2bir_converters/new_llvmir2bir_converter/basic_block_converter.cpp @@ -17,8 +17,8 @@ #include "retdec/llvmir2hll/ir/variable.h" #include "retdec/llvmir2hll/llvm/llvm_support.h" #include "retdec/llvmir2hll/llvm/llvmir2bir_converters/new_llvmir2bir_converter/basic_block_converter.h" +#include "retdec/llvmir2hll/llvm/llvmir2bir_converters/new_llvmir2bir_converter/labels_handler.h" #include "retdec/llvmir2hll/llvm/llvmir2bir_converters/new_llvmir2bir_converter/llvm_value_converter.h" -#include "retdec/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter/labels_handler.h" namespace retdec { namespace llvmir2hll { diff --git a/src/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter/labels_handler.cpp b/src/llvmir2hll/llvm/llvmir2bir_converters/new_llvmir2bir_converter/labels_handler.cpp similarity index 91% rename from src/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter/labels_handler.cpp rename to src/llvmir2hll/llvm/llvmir2bir_converters/new_llvmir2bir_converter/labels_handler.cpp index 74d67e921c..60cf8bc7a3 100644 --- a/src/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter/labels_handler.cpp +++ b/src/llvmir2hll/llvm/llvmir2bir_converters/new_llvmir2bir_converter/labels_handler.cpp @@ -1,5 +1,5 @@ /** -* @file src/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter/labels_handler.cpp +* @file src/llvmir2hll/llvm/llvmir2bir_converters/new_llvmir2bir_converter/labels_handler.cpp * @brief Implementation of LabelsHandler. * @copyright (c) 2017 Avast Software, licensed under the MIT license */ @@ -8,7 +8,7 @@ #include "retdec/llvmir2hll/ir/statement.h" #include "retdec/llvmir2hll/llvm/llvm_support.h" -#include "retdec/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter/labels_handler.h" +#include "retdec/llvmir2hll/llvm/llvmir2bir_converters/new_llvmir2bir_converter/labels_handler.h" #include "retdec/llvmir2hll/utils/string.h" #include "retdec/utils/container.h" diff --git a/src/llvmir2hll/llvm/llvmir2bir_converters/new_llvmir2bir_converter/structure_converter.cpp b/src/llvmir2hll/llvm/llvmir2bir_converters/new_llvmir2bir_converter/structure_converter.cpp index 40a4213c6d..34a596a8c1 100644 --- a/src/llvmir2hll/llvm/llvmir2bir_converters/new_llvmir2bir_converter/structure_converter.cpp +++ b/src/llvmir2hll/llvm/llvmir2bir_converters/new_llvmir2bir_converter/structure_converter.cpp @@ -30,9 +30,9 @@ #include "retdec/llvmir2hll/ir/while_loop_stmt.h" #include "retdec/llvmir2hll/llvm/llvm_support.h" #include "retdec/llvmir2hll/llvm/llvmir2bir_converters/new_llvmir2bir_converter/basic_block_converter.h" +#include "retdec/llvmir2hll/llvm/llvmir2bir_converters/new_llvmir2bir_converter/labels_handler.h" #include "retdec/llvmir2hll/llvm/llvmir2bir_converters/new_llvmir2bir_converter/llvm_value_converter.h" #include "retdec/llvmir2hll/llvm/llvmir2bir_converters/new_llvmir2bir_converter/structure_converter.h" -#include "retdec/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter/labels_handler.h" #include "retdec/llvmir2hll/support/debug.h" #include "retdec/llvmir2hll/support/expression_negater.h" #include "retdec/llvmir2hll/utils/ir.h" diff --git a/src/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter.cpp b/src/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter.cpp deleted file mode 100644 index 7b31a1c017..0000000000 --- a/src/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter.cpp +++ /dev/null @@ -1,1999 +0,0 @@ -/** -* @file src/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter.cpp -* @brief Implementation of OrigLLVMIR2BIRConverter. -* @copyright (c) 2017 Avast Software, licensed under the MIT license -*/ - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "retdec/llvmir2hll/ir/add_op_expr.h" -#include "retdec/llvmir2hll/ir/array_type.h" -#include "retdec/llvmir2hll/ir/assign_stmt.h" -#include "retdec/llvmir2hll/ir/break_stmt.h" -#include "retdec/llvmir2hll/ir/call_expr.h" -#include "retdec/llvmir2hll/ir/call_stmt.h" -#include "retdec/llvmir2hll/ir/const_bool.h" -#include "retdec/llvmir2hll/ir/const_int.h" -#include "retdec/llvmir2hll/ir/const_string.h" -#include "retdec/llvmir2hll/ir/constant.h" -#include "retdec/llvmir2hll/ir/continue_stmt.h" -#include "retdec/llvmir2hll/ir/empty_stmt.h" -#include "retdec/llvmir2hll/ir/for_loop_stmt.h" -#include "retdec/llvmir2hll/ir/function.h" -#include "retdec/llvmir2hll/ir/global_var_def.h" -#include "retdec/llvmir2hll/ir/goto_stmt.h" -#include "retdec/llvmir2hll/ir/if_stmt.h" -#include "retdec/llvmir2hll/ir/int_type.h" -#include "retdec/llvmir2hll/ir/lt_eq_op_expr.h" -#include "retdec/llvmir2hll/ir/lt_op_expr.h" -#include "retdec/llvmir2hll/ir/module.h" -#include "retdec/llvmir2hll/ir/pointer_type.h" -#include "retdec/llvmir2hll/ir/statement.h" -#include "retdec/llvmir2hll/ir/switch_stmt.h" -#include "retdec/llvmir2hll/ir/type.h" -#include "retdec/llvmir2hll/ir/unknown_type.h" -#include "retdec/llvmir2hll/ir/unreachable_stmt.h" -#include "retdec/llvmir2hll/ir/value.h" -#include "retdec/llvmir2hll/ir/var_def_stmt.h" -#include "retdec/llvmir2hll/ir/variable.h" -#include "retdec/llvmir2hll/ir/while_loop_stmt.h" -#include "retdec/llvmir2hll/llvm/llvm_debug_info_obtainer.h" -#include "retdec/llvmir2hll/llvm/llvm_support.h" -#include "retdec/llvmir2hll/llvm/llvmir2bir_converter_factory.h" -#include "retdec/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter.h" -#include "retdec/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter/labels_handler.h" -#include "retdec/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter/llvm_branch_info.h" -#include "retdec/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter/llvm_converter.h" -#include "retdec/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter/vars_handler.h" -#include "retdec/llvmir2hll/llvm/string_conversions.h" -#include "retdec/llvmir2hll/support/debug.h" -#include "retdec/llvmir2hll/support/expression_negater.h" -#include "retdec/llvmir2hll/support/types.h" -#include "retdec/llvmir2hll/utils/ir.h" -#include "retdec/llvmir2hll/utils/string.h" -#include "retdec/llvmir2hll/var_name_gen/var_name_gens/num_var_name_gen.h" -#include "retdec/llvm-support/diagnostics.h" -#include "retdec/utils/container.h" -#include "retdec/utils/conversion.h" - -using namespace retdec::llvm_support; -using namespace std::string_literals; - -using retdec::utils::hasItem; -using retdec::utils::mapHasKey; - -namespace retdec { -namespace llvmir2hll { - -namespace { - -REGISTER_AT_FACTORY("orig", ORIG_LLVMIR2BIR_CONVERTER_ID, LLVMIR2BIRConverterFactory, - OrigLLVMIR2BIRConverter::create); - -/// Vector of PHI nodes. -using PHINodesVector = std::vector; - -/** -* @brief Returns @c true if @a phiNode1 depends on @a phiNode2, @c false -* otherwise. -* -* @e Depends means that @a phiNode2 sets a variable which is used in @a -* phiNode1. For example, in the following code, the second PHI node depends on -* the first one: -* @code -* %A = phi i32 [ %D, %label ], [ 10, %0 ] -* %B = phi i32 [ %A, %label ], [ 66, %0 ] -* @endcode -* So, dependsOn(first, second) returns @c false and -* dependsOn(second, first) returns @c true. -*/ -bool dependsOn(llvm::PHINode *phiNode1, llvm::PHINode *phiNode2) { - // We assume that every variable has a name. To ensure this, the -instnamer - // pass has to be used when optimizing the LLVM IR in bin2llvmir. - const std::string &var2Name(phiNode2->getName()); - for (unsigned i = 0, e = phiNode1->getNumIncomingValues(); i < e; ++i) { - llvm::Value *incValue(phiNode1->getIncomingValue(i)); - if (incValue->getName() == var2Name) { - return true; - } - } - return false; -} - -/** -* @brief Returns all PHI nodes in the given basic block. -*/ -PHINodesVector getPHINodes(llvm::BasicBlock &bb) { - PHINodesVector phiNodes; - // In LLVM IR, PHI nodes precede all other instructions. Therefore, we can - // stop when we reach the first non-PHI instruction. - for (auto i = bb.begin(); llvm::isa(i); ++i) { - phiNodes.push_back(llvm::cast(&*i)); - } - return phiNodes; -} - -/** -* @brief Returns @c true if the given PHI nodes can be ordered, @c false -* otherwise. -* -* Two PHI nodes can be ordered if and only if there are no distinct nodes @c A -* and @c B such that @c A (transitively) depends on @c B and @c B -* (transitively) depends on @c A. -*/ -bool canBeOrdered(const PHINodesVector &phiNodes) { - // For every PHI node... - for (std::size_t i = 0, e = phiNodes.size(); i != e; ++i) { - // Follow the dependency relation for the current PHI node and check - // that there is no cycle in the dependency graph. - std::set checkedNodes{phiNodes[i]}; - std::size_t k = i, j = 0, f = phiNodes.size(); - while (j != f) { - if (k != j && dependsOn(phiNodes[k], phiNodes[j])) { - if (hasItem(checkedNodes, phiNodes[j])) { - // We have encountered a PHI node that depends on a node - // that we have already checked. This means that there is a - // cycle in the dependency graph. - return false; - } - checkedNodes.insert(phiNodes[j]); - k = j; - j = 0; - } else { - ++j; - } - } - } - return true; -} - -/** -* @brief Returns @c true if @a srcNode is reachable from @a dstNode in the -* given basic block @a bb, @c false otherwise. -*/ -bool isReachable(llvm::PHINode *dstNode, llvm::PHINode *srcNode, - llvm::BasicBlock &bb) { - bool srcNodeFound(false); - // In LLVM IR, PHI nodes precede all other instructions. Therefore, we can - // stop when we reach the first non-PHI instruction. - for (auto i = bb.begin(); llvm::isa(i); ++i) { - if (&*i == srcNode) { - srcNodeFound = true; - } - if (srcNodeFound && &*i == dstNode) { - return true; - } - } - return false; -} - -/** -* @brief Performs the ordering of the given PHI nodes in the given basic block -* according to their dependencies. -* -* @par Preconditions -* - the PHI nodes can be ordered (see canBeOrdered()) -*/ -void performOrderingOfDependentPHINodes(const PHINodesVector &phiNodes, - llvm::BasicBlock &bb) { - bool changed; - do { - changed = false; - for (std::size_t i = 0, e = phiNodes.size(); i != e; ++i) { - for (std::size_t j = 0, f = phiNodes.size(); j != f; ++j) { - if (i != j && dependsOn(phiNodes[i], phiNodes[j]) && - !isReachable(phiNodes[j], phiNodes[i], bb)) { - phiNodes[i]->moveBefore(phiNodes[j]); - changed = true; - } - } - } - } while (changed); -} - -/** -* @brief Orders PHI nodes in the given basic block according to their -* dependencies. -* -* For a description of what this function does, see orderDependentPHINodes() -* for llvm::Module. -*/ -void orderDependentPHINodes(llvm::BasicBlock &bb) { - PHINodesVector phiNodes(getPHINodes(bb)); - if (phiNodes.size() < 2) { - return; - } - - if (!canBeOrdered(phiNodes)) { - printWarningMessage("Cannot order PHI nodes of basic block ", bb.getName(), - " in function ", bb.getParent()->getName(), "()."); - return; - } - - performOrderingOfDependentPHINodes(phiNodes, bb); -} - -/** -* @brief Orders PHI nodes in the given function according to their dependencies. -* -* For a description of what this function does, see orderDependentPHINodes() -* for llvm::Module. -*/ -void orderDependentPHINodes(llvm::Function &func) { - for (auto &bb : func) { - orderDependentPHINodes(bb); - } -} - -/** -* @brief Orders PHI nodes in the given LLVM module according to their -* dependencies. -* -* Before we convert LLVM IR to BIR, we need to order PHI nodes according to -* their interdependencies. Consider the following two PHI nodes: -* @code -* %A = phi i32 [ %D, %label ], [ 10, %0 ] -* %B = phi i32 [ %A, %label ], [ 66, %0 ] -* @endcode -* The semantics of PHI nodes dictates that both these instructions should be -* evaluated in parallel. Therefore, @c %B gets the old value of @c %A, not the -* updated one. However, in BIR, we want the evaluation to be sequential. To -* this end, in this function, we try to order the PHI nodes in such a way that -* allows easy conversion into BIR. In the example above, we swap the two PHI -* nodes. -* -* If the nodes cannot be ordered (i.e. there are two nodes that depend on each -* other), it prints an error message. -*/ -void orderDependentPHINodes(llvm::Module &module) { - // For every function in the module... - for (auto &func : module) { - orderDependentPHINodes(func); - } -} - -/** -* @brief Checks if the given global variable is internal. -* -* See Variable::isInternal() for more details. -*/ -bool isInternal(const llvm::GlobalVariable *gv) { - // Consider also private linkage as internal linkage. - return gv->hasInternalLinkage() || gv->hasPrivateLinkage(); -} - -/** -* @brief Checks if the given global variable is external. -* -* See Variable::isExternal() for more details. -*/ -bool isExternal(const llvm::GlobalVariable *gv) { - return !isInternal(gv); -} - -} // anonymous namespace - -/** -* @brief Constructs a new converter. -* -* See create() for the description of parameters. -*/ -OrigLLVMIR2BIRConverter::OrigLLVMIR2BIRConverter(llvm::Pass *basePass): - LLVMIR2BIRConverter(basePass), llvmModule(nullptr), resModule(), varsHandler(), - converter(), branchInfo(), labelsHandler(), processedBBs(), bbStmtMap(), - gotoStmtsToPatch(), currLoopBB(nullptr), lastLoopExitBB(nullptr), - generatingSwitchStmt(false) {} - -/** -* @brief Creates a new instance of LLVMIR2BIRConverter. -* -* @param[in] basePass Pass that instantiates this converter. -* -* @par Preconditions -* - @a basePass is non-null -*/ -ShPtr OrigLLVMIR2BIRConverter::create(llvm::Pass *basePass) { - PRECONDITION_NON_NULL(basePass); - - return ShPtr(new OrigLLVMIR2BIRConverter(basePass)); -} - -std::string OrigLLVMIR2BIRConverter::getId() const { - return ORIG_LLVMIR2BIR_CONVERTER_ID; -} - -ShPtr OrigLLVMIR2BIRConverter::convert(llvm::Module *llvmModule, - const std::string &moduleName, ShPtr semantics, - ShPtr config, bool enableDebug) { - PRECONDITION_NON_NULL(llvmModule); - PRECONDITION_NON_NULL(semantics); - - if (enableDebug) { - printSubPhase("ordering dependent PHI nodes"); - } - orderDependentPHINodes(*llvmModule); - - this->llvmModule = llvmModule; - this->enableDebug = enableDebug; - resModule = ShPtr(new Module(llvmModule, moduleName, semantics, config)); - varsHandler = ShPtr(new VarsHandler(resModule, - NumVarNameGen::create())); - converter = ShPtr(new LLVMConverter(llvmModule, - resModule, varsHandler)); - converter->setOptionStrictFPUSemantics(optionStrictFPUSemantics); - branchInfo = ShPtr(new LLVMBranchInfo( - converter, varsHandler)); - labelsHandler = std::make_unique(); - - // Function declarations have to be added to the module before any function - // or global variable definition. The reason is that if there is an - // assignment or call of a function whose declaration hasn't been added to - // the module yet, we'll run into troubles. - visitAndAddFunctionDeclarations(); - visitAndAddGlobalVariables(); - visitAndAddFunctions(); - - makeIdentifiersValid(); - - return resModule; -} - -/** -* @brief Visits and adds all functions in the module. -* -* Only function definitions are considered. -*/ -void OrigLLVMIR2BIRConverter::visitAndAddFunctions() { - for (auto &f : *llvmModule) { - // Do not convert 'available_externally' functions because they have - // definitions outside the translation unit. Also, skip function - // declarations since they've been already added into the resulting - // module. - if (f.hasAvailableExternallyLinkage() || f.isDeclaration()) { - continue; - } - - if (enableDebug) { - printSubPhase("converting "s + std::string(f.getName()) + "()"s); - } - - // Initialization of all per-function lists, maps, etc. - varsHandler->reset(); - branchInfo->init( - &basePass->getAnalysis(f).getLoopInfo() - ); - lastLoopExitBB = nullptr; - - // Generate the IR for the function. - visitAndAddFunction(f); - } -} - -/** -* @brief Visits global variables of the current module and stores them into -* the resulting module. -*/ -void OrigLLVMIR2BIRConverter::visitAndAddGlobalVariables() { - varsHandler->startConvertingGlobalVars(); - - // For each global variable... - for (auto i = llvmModule->global_begin(), e = llvmModule->global_end(); - i != e; ++i) { - // Ignore global constants storing string literals. - if (resModule->isGlobalVarStoringStringLiteral(i->getName()) || - stores8BitStringLiteral(&*i)) { - continue; - } - - // Create and add the variable. - ShPtr varType; - ShPtr varInit; - - if (i->hasInitializer()) { - // Check whether the variable represents a string, and if so, - // convert it into a string. - llvm::ConstantDataArray *cda = llvm::dyn_cast( - i->getInitializer()); - if (cda && cda->isString()) { - varType = ArrayType::create( - converter->llvmTypeToType(cda->getElementType()), - ArrayType::Dimensions(1, cda->getNumElements())); - varInit = toConstString(cda); - } else { - // Not a string, so just convert the initializer. - varInit = converter->llvmValueToExpression(i->getInitializer()); - } - } - - // Default conversion. - if (!varType) { - varType = converter->llvmTypeToType(i->getType()->getContainedType(0)); - } - if (!varInit) { - varInit = converter->getInitializer(&*i); - } - - std::string varName(varsHandler->getValueName(&*i)); - ShPtr var(varsHandler->getVariableByName(varName)); - var->setType(varType); - if (isExternal(&*i)) { - var->markAsExternal(); - } - resModule->addGlobalVar(var, varInit); - } - - varsHandler->stopConvertingGlobalVars(); -} - -/** -* @brief Visits function declarations of the current module and stores them into -* the resulting module. -*/ -void OrigLLVMIR2BIRConverter::visitAndAddFunctionDeclarations() { - // For each function... - for (auto &f : *llvmModule) { - // Add only a declaration. - visitAndAddFunction(f, true); - } -} - -/** -* @brief Visits the given function and stores it into the resulting module. -* -* @param[in] f Function to be visited. -* @param[in] onlyDeclaration If @c true, only the declaration of @a f is added. -*/ -void OrigLLVMIR2BIRConverter::visitAndAddFunction(llvm::Function &f, - bool onlyDeclaration) { - // Gather all the needed information about the function. - ShPtr funcRetType = converter->llvmTypeToType(f.getReturnType()); - std::string funcName = varsHandler->getValueName(&f); - VarVector funcParams = getFunctionParams(f); - ShPtr funcBody = onlyDeclaration ? - ShPtr() : getFunctionBody(f); - VarSet localVars = varsHandler->getLocalVars(); - bool isVarArg = llvm::cast(f.getFunctionType())->isVarArg(); - - // When the function already exists, we have to update it, not replace it - // (some code may already use the original function, e.g. via function - // pointers). - ShPtr func; - if ((func = resModule->getFuncByName(funcName))) { - func->setRetType(funcRetType); - func->setParams(funcParams); - func->setLocalVars(localVars); - func->setBody(funcBody); - func->setVarArg(isVarArg); - } else { - func = Function::create( - funcRetType, funcName, funcParams, localVars, funcBody, isVarArg - ); - } - - // If the function is a definition, insert VarDefStmts at the beginning of - // the function. - if (func->isDefinition()) { - // To produce a deterministic output, sort the local variables by their - // name. Indeed, recall that variables are stored in a set that orders - // them by their address which may differ from run to run. - VarSet localVarsSet(func->getLocalVars()); - VarVector localVarsVector(localVarsSet.begin(), localVarsSet.end()); - sortByName(localVarsVector); - - // Insert VarDefStmts at the beginning of the function. Notice that to - // sort the emitted VarDefStmts from A to Z, we have to do this in - // reverse. We use prependStatement() to do this. - for (auto i = localVarsVector.rbegin(), e = localVarsVector.rend(); - i != e; ++i) { - func->getBody()->prependStatement(VarDefStmt::create(*i)); - } - } - - resModule->addFunc(func); -} - -/** -* @brief Makes all identifiers valid by replacing invalid characters with valid -* characters. -* -* TODO This should probably be HLL specific. -*/ -void OrigLLVMIR2BIRConverter::makeIdentifiersValid() { - // For every global variable... - for (auto i = resModule->global_var_begin(), - e = resModule->global_var_end(); i != e; ++i) { - (*i)->getVar()->setName(makeIdentifierValid((*i)->getVar()->getName())); - } - - // For every function... - for (auto i = resModule->func_begin(), e = resModule->func_end(); - i != e; ++i) { - // Function name. - (*i)->setName(makeIdentifierValid((*i)->getName())); - - // Local variables, including parameters. - VarSet localVars((*i)->getLocalVars(true)); - for (auto &var : localVars) { - var->setName(makeIdentifierValid(var->getName())); - } - } -} - -/** -* @brief Generates missing statements into the given function's body. -*/ -void OrigLLVMIR2BIRConverter::generateMissingStatements(ShPtr funcBody) { - // We need to create a copy of gotoStmtsToPatch because it can be extended - // during the generation of the missing statements. - GotoStmtsToPatch gotoStmtsToPatchCopy; - do { - gotoStmtsToPatchCopy = gotoStmtsToPatch; - - // Generate statements for every goto target for which we do not have a - // corresponding statement yet. - for (const auto &p : gotoStmtsToPatchCopy) { - if (isBBMissingStatements(p.second)) { - generateMissingStatementsForBB(funcBody, p.second); - } - } - } while (gotoStmtsToPatch.size() != gotoStmtsToPatchCopy.size()); -} - -/** -* @brief Returns @c true if there are missing statements for @a bb. -*/ -bool OrigLLVMIR2BIRConverter::isBBMissingStatements(llvm::BasicBlock *bb) { - return !mapHasKey(bbStmtMap, bb); -} - -/** -* @brief Generates missing statements for the given basic block into the given -* function's body. -*/ -void OrigLLVMIR2BIRConverter::generateMissingStatementsForBB( - ShPtr funcBody, llvm::BasicBlock *bb) { - ShPtr missingStmts(visitBasicBlockOrLoop(bb)); - - // We put the generated statements after the current function's body. - Statement::mergeStatements(funcBody, missingStmts); -} - -/** -* @brief Adds a goto target to be patched. -*/ -void OrigLLVMIR2BIRConverter::addGotoStmtToPatch(ShPtr gotoStmt, - llvm::BasicBlock *bb) { - // Add it only if it has not yet been added. - auto p = std::make_pair(gotoStmt, bb); - if (!hasItem(gotoStmtsToPatch, p)) { - gotoStmtsToPatch.push_back(std::move(p)); - } -} - -/** -* @brief Patches the targets of goto statements. -*/ -void OrigLLVMIR2BIRConverter::patchTargetsOfGotoStmts() { - for (const auto &p : gotoStmtsToPatch) { - auto targetStmtIter = bbStmtMap.find(p.second); - ASSERT_MSG(targetStmtIter != bbStmtMap.end(), - "There is no BIR statement for LLVM basic block `" << - p.second->getName() << "`."); - p.first->setTarget(targetStmtIter->second); - setGotoTargetLabel(targetStmtIter->second, p.second); - } - gotoStmtsToPatch.clear(); -} - -/** -* @brief Sets a proper label of a goto target that is in the given basic block. -*/ -void OrigLLVMIR2BIRConverter::setGotoTargetLabel(ShPtr target, - const llvm::BasicBlock *targetBB) { - labelsHandler->setGotoTargetLabel(target, targetBB); -} - -/** -* @brief Returns all the parameters of the given function. -* -* It doesn't matter whether @a f is a declaration or a definition. -* -* If @a f is a definition, this function also stores all @a f's parameters -* into @c varHandlers' local variables list. -*/ -VarVector OrigLLVMIR2BIRConverter::getFunctionParams(llvm::Function &f) { - VarVector funcParams; - // For each parameter... - for (auto i = f.arg_begin(), e = f.arg_end(); i != e; ++i) { - std::string varName = varsHandler->getValueName(&*i); - ShPtr var(Variable::create(varName, - converter->llvmTypeToType(i->getType()))); - varsHandler->addLocalVar(var); - funcParams.push_back(var); - } - return funcParams; -} - -/** -* @brief Returns the body of the given function. -* -* If the function is a declaration, the null pointer is returned. -*/ -ShPtr OrigLLVMIR2BIRConverter::getFunctionBody(llvm::Function &f) { - if (f.isDeclaration()) { - return ShPtr(); - } - - // Store the types of all local variables allocated by allocas. This is - // needed to properly generate the function's body. - for (auto i = inst_begin(f), e = inst_end(f); i != e; ++i) { - if (const llvm::AllocaInst *ai = LLVMSupport::isDirectAlloca(&*i)) { - varsHandler->addAllocatedVarType(&*i, ai->getAllocatedType()); - } - } - - ShPtr funcBody(visitBasicBlockOrLoop(&f.front())); - generateMissingStatements(funcBody); - patchTargetsOfGotoStmts(); - return funcBody; -} - -/** -* @brief Generates the end of a loop. -* -* @param[in] currBB Current basic block, from which the emission of the loop -* end is done. -* @param[in] loopHeader Header of the loop. -* @param[in] loopEnd End of the loop. -* @param[in] cond `if` condition (if it is satisfied, do another loop -* iteration). -* @param[in] isCondNegated @c true if @a cond should be negated, @c false -* otherwise. -* @param[in] justPHICopies @c true if just PHI copies should be generated (no -* `if`s etc.), @c false otherwise. -* -* This function should be only called from visitBranchInst(). -* -* @par Preconditions -* - @a loopHeader, @a loopEnd, and @a cond are non-null -*/ -ShPtr OrigLLVMIR2BIRConverter::getLoopEnd(llvm::BasicBlock *currBB, - llvm::BasicBlock *loopHeader, llvm::BasicBlock *loopEnd, - llvm::Value *cond, bool isCondNegated, bool justPHICopies) { - PRECONDITION_NON_NULL(loopHeader); - PRECONDITION_NON_NULL(loopEnd); - PRECONDITION_NON_NULL(cond); - - // Store the information about the loop end (we need to generate it after - // the loop). - lastLoopExitBB = loopEnd; - - // We are going to generate the following structure: - // - // if [not] cond: // not iff isCondNegated - // PHI copies for loopEnd - // break - // PHI copies for loopHeader - // continue - // - - // If loopEnd contains just an unconditional branch to X, we will need to - // generate also PHI copies for X. This has to be done because of the - // simplification of loop exits in the IndVarSimplify pass. To this end, - // use the following variable loopEndSucc to determine whether additional - // PHI copies have to be generated (they have to be generated iff - // loopEndSucc is not the null pointer). - llvm::BasicBlock *loopEndSucc = nullptr; - if (llvm::BranchInst *bi = llvm::dyn_cast(&*(loopEnd->begin()))) { - if (bi->getNumSuccessors() == 1) { - loopEndSucc = bi->getSuccessor(0); - } - } - - ShPtr phiCopiesHeader(getPHICopiesForSuccessor(currBB, - loopHeader)); - ShPtr phiCopiesEnd; - if (loopEndSucc) { - phiCopiesEnd = getPHICopiesForSuccessor(loopEnd, loopEndSucc); - } else { - phiCopiesEnd = getPHICopiesForSuccessor(currBB, loopEnd); - } - - if (justPHICopies) { - // Generate just PHI copies without any additional code. - ShPtr phiCopies = Statement::mergeStatements( - phiCopiesHeader, phiCopiesEnd); - return addDebugCommentToStatement(phiCopies, "PHI copies at the loop end"); - } - - // `if` part - ShPtr ifCond(converter->llvmValueToExpression(cond)); - if (isCondNegated) { - ifCond = ExpressionNegater::negate(ifCond); - } - - // Depending on the current location (are we in a loop/switch and the - // target is another loop/switch?), we either generate `break` or `goto`. - ShPtr endOfIfPart; - std::string ifTargetAddress(labelsHandler->getLabel(loopEnd)); - if (loopHeader == currLoopBB && !generatingSwitchStmt) { - // Breaking out of the current loop while not being in a switch - // statement -> generate a `break` statement. - endOfIfPart = BreakStmt::create(); - endOfIfPart->setMetadata("break -> " + ifTargetAddress); - } else { - // Breaking out of some outer loop or we are in a switch statement -> - // generate a `goto` statement. - endOfIfPart = GotoStmt::create(EmptyStmt::create()); - addGotoStmtToPatch(ucast(endOfIfPart), loopEnd); - endOfIfPart->setMetadata("break (via goto) -> " + ifTargetAddress); - } - ShPtr ifBody(Statement::mergeStatements( - phiCopiesEnd, endOfIfPart)); - - // after `if` part - // The following `continue` or `goto` statements HAVE to be generated - // because there may be some code after them (after a previous - // if/else-branch). For example, consider the following code. - // - // if c1: - // if c2: - // ... - // break - // ... - // continue # This `continue` has to be generated. - // ... - // return 0 - // - // Of course, they may be removed in subsequent optimizations. - ShPtr afterIf; - std::string afterIfTargetAddress(labelsHandler->getLabel(loopHeader)); - if (loopHeader == currLoopBB) { - // Going to the beginning of the current loop -> generate a `continue` - // statement. - afterIf = ContinueStmt::create(); - afterIf->setMetadata("continue -> " + afterIfTargetAddress); - } else { - // Going to the beginning of some outer loop -> generate a `goto` - // statement. - // - // We cannot generate a `continue` statement because doing so would - // make the program go to the beginning of the current loop, not to the - // beginning of the current loop. - // - // Since the outer loop is not available at the time of generating this - // statement (we use recursion to do the conversion of LLVM IR to BIR), - // we need to use some stub target, and patch the target statement - // afterwards. - afterIf = GotoStmt::create(EmptyStmt::create()); - addGotoStmtToPatch(ucast(afterIf), loopHeader); - afterIf->setMetadata("continue (via goto) -> " + afterIfTargetAddress); - } - afterIf = Statement::mergeStatements(phiCopiesHeader, afterIf); - - return IfStmt::create(ifCond, ifBody, afterIf); -} - -/** -* @brief Returns PHI copies for the given basic block and its successor. -* -* @return @c true if some PHI copies were generated, @c false otherwise. -* -* PHI copies for induction variables (if there are any) are not generated -* because they are useless. This is, however, done only if -* isOptimizableToForLoop() returns @c true. -* -* @par Preconditions -* - both @a currBB and @a succ are non-null -*/ -ShPtr OrigLLVMIR2BIRConverter::getPHICopiesForSuccessor( - llvm::BasicBlock *currBB, llvm::BasicBlock *succ) const { - PRECONDITION_NON_NULL(currBB); - PRECONDITION_NON_NULL(succ); - - // First, check whether currBB is a predecessor of succ; otherwise, do - // not generate anything. - if (!LLVMSupport::isPredecessorOf(currBB, succ)) { - return ShPtr(); - } - - // Generate all the needed PHI copies. - ShPtr phiCopies; - ShPtr lastPHICopy; - // For each PHI node in succ... - for (auto i = succ->begin(); llvm::isa(i); ++i) { - llvm::PHINode *pn = llvm::cast(i); - llvm::Value *iv = pn->getIncomingValueForBlock(currBB); - if (llvm::isa(iv)) { - continue; - } - - // Do not generate PHI copies for induction variables. - if (llvm::Loop *l = branchInfo->getLoopFor(succ)) { - if (branchInfo->isOptimizableToForLoop(l) && - l->getCanonicalInductionVariable() == pn) { - continue; - } - } - - ShPtr lhs(varsHandler->getVariableByName( - varsHandler->getValueName(&*i))); - ShPtr rhs(converter->llvmValueToExpression(iv)); - ShPtr phiCopy(AssignStmt::create(lhs, rhs)); - if (!phiCopies) { - phiCopies = phiCopy; - lastPHICopy = phiCopy; - } else { - lastPHICopy->setSuccessor(phiCopy); - lastPHICopy = phiCopy; - } - } - return phiCopies; -} - -/** -* @brief Generates an `if` statement for a conditional branch to @a bb1 and @a -* bb2, depending on @a cond, including a `goto` statement(s). -* -* @param[in] bb1 First target of the branch instruction. -* @param[in] bb2 Second target of the branch instruction. -* @param[in] source Source basic block from which we are branching to @a bb1 -* and @a bb2. -* @param[in] cond Branch condition. -* @param[in] negateCond If @c true, it negates @a cond. -* -* If a `goto` to @a bb2 is also required, it is generated instead of generating -* @a bb2 directly. -* -* @par Preconditions -* - mapHasKey(bbStmtMap, bb1) -* - branchInfo->isGotoNecessary(source, bb1) -*/ -ShPtr OrigLLVMIR2BIRConverter::generateGotoForConditionalBranch( - llvm::BasicBlock *bb1, llvm::BasicBlock *bb2, llvm::BasicBlock *source, - llvm::Value *cond, bool negateCond) { - // We are going to generate the following structure: - // - // if cond: // possibly negated, depending on negateCond - // PHI copies for bb1 - // goto bb1 - // PHI copies for bb2 - // bb2 - // - // If there should be a goto to bb2 instead of bb2, we generate a goto. - // - ShPtr ifCond(converter->llvmValueToExpression(cond)); - if (negateCond) { - ifCond = ExpressionNegater::negate(ifCond); - } - - // Generate code for bb1. - ShPtr phiCopiesBB1(getPHICopiesForSuccessor(source, bb1)); - auto gotoTarget = bbStmtMap[bb1]; - ShPtr ifBody(Statement::mergeStatements(phiCopiesBB1, - GotoStmt::create(gotoTarget))); - setGotoTargetLabel(gotoTarget, bb1); - ShPtr ifStmt(IfStmt::create(ifCond, ifBody)); - - // Generate code for bb2. - ShPtr phiCopiesBB2(getPHICopiesForSuccessor(source, bb2)); - ShPtr afterIf; - if (mapHasKey(bbStmtMap, bb2) && - branchInfo->isGotoNecessary(source, bb2)) { - // A goto is necessary for bb2, too. - auto gotoTarget = bbStmtMap[bb2]; - afterIf = Statement::mergeStatements(phiCopiesBB2, - GotoStmt::create(gotoTarget)); - setGotoTargetLabel(gotoTarget, bb2); - } else { - // No goto is needed for bb2, so generate it directly after the if - // statement. - afterIf = Statement::mergeStatements(phiCopiesBB2, - visitBasicBlockOrLoop(bb2)); - } - ifStmt->setSuccessor(afterIf); - - return ifStmt; -} - -/** -* @brief Returns the initial value of the induction variable of the given loop. -* -* If the loop does not have a unique induction variable, it returns the null -* pointer. -* -* @par Preconditions -* - @a l is non-null -*/ -llvm::Value *OrigLLVMIR2BIRConverter::getInitialValueOfIndVar(const llvm::Loop *l) const { - PRECONDITION_NON_NULL(l); - - if (!l->getCanonicalInductionVariable()) { - // No induction variable. - return nullptr; - } - - // Get the pre-header of l. - llvm::BasicBlock *preHeader = nullptr; - llvm::BasicBlock *header = l->getBlocks()[0]; - for (auto i = pred_begin(header), e = pred_end(header); i != e; ++i) { - if (branchInfo->getLoopFor(*i) != l) { - preHeader = *i; - break; - } - } - ASSERT_MSG(preHeader, "every loop has to have a pre-header"); - - // Get the initial value of the induction variable. - llvm::PHINode *pn = llvm::cast(l->getCanonicalInductionVariable()); - return pn->getIncomingValueForBlock(preHeader); -} - -/** -* @brief Generates the given basic block either as a loop (if it is a loop), or -* as a normal basic block (in the case it is not a loop). -* -* @param[in] bb Basic block. -* @param[in] genTerm If @c true, then it also generates the terminator. -* TODO What about loops? -* -* @par Preconditions -* - @a bb is non-null -*/ -ShPtr OrigLLVMIR2BIRConverter::visitBasicBlockOrLoop(llvm::BasicBlock *bb, - bool genTerm) { - PRECONDITION_NON_NULL(bb); - - // Check whether bb has been processed too many times. If so, then do not - // process it again to obviate a possible infinite loop. Even though the - // basic support of goto statements is done, it may work improperly in some - // cases. This is why there is the following check to avoid infinite - // recursion. The number 25 below has no greater meaning; it's just a - // number that popped into my mind (and all backend tests pass correctly). - if (++processedBBs[bb] > 25) { - ShPtr emptyStmt(EmptyStmt::create()); - addDebugCommentToStatement(emptyStmt, - "Detected a possible infinite recursion (goto support failed); quitting..."); - return emptyStmt; - } - - if (llvm::Loop *l = branchInfo->getLoopFor(bb)) { - if (l->getHeader() == bb) { - return visitLoop(l); - } - } - return visitBasicBlock(bb, genTerm); -} - -/** -* @brief Adds @a stmtToAdd to a block of statements starting with @a firstStmt. -* -* @param[in] stmtToAdd Statement to be added. -* @param[out] firstStmt First statement of the block. -* @param[in,out] prevStmt Predecessor of @a stmtToAdd. -* -* After this function is called, @a prevStmt and @a firstStmt are properly set, -* depending on whether @a prevStmt is the null pointer. -* -* If @a stmtToAdd has successors, they are also added to the block. -*/ -void OrigLLVMIR2BIRConverter::addStatementToStatementBlock(ShPtr stmtToAdd, - ShPtr &firstStmt, ShPtr &prevStmt) { - // Move to the end of stmtToAdd to properly set prevStmt (stmtToAdd may - // have successors). - ShPtr lastStmt(Statement::getLastStatement(stmtToAdd)); - - // Properly set prevStmt and firstStmt. - if (!prevStmt) { - firstStmt = stmtToAdd; - } else { - prevStmt->setSuccessor(stmtToAdd); - } - prevStmt = lastStmt; -} - -/** -* @brief Adds @a debugComment to @a stmt. -* -* @param[in] stmt Statement to which @a debugComment is added. -* @param[in] debugComment Debug comment to be added. -* -* @return A statement with @a debugComment attached to it. -* -* If @a stmt already has an attached debug message, it creates a new empty -* statement, attaches @a debugComment to it, and prepends the new statement to -* @a stmt. -* -* If @a stmt is the null pointer, it creates a new empty statement and attaches -* @a debugComment to it. -*/ -ShPtr OrigLLVMIR2BIRConverter::addDebugCommentToStatement( - ShPtr stmt, std::string debugComment) { - if (!stmt) { - ShPtr emptyStmt(EmptyStmt::create()); - emptyStmt->setMetadata(debugComment); - return emptyStmt; - } - - if (!stmt->getMetadata().empty()) { - // stmt already has a debug message. - ShPtr emptyStmt(EmptyStmt::create()); - emptyStmt->setMetadata(debugComment); - return Statement::mergeStatements(emptyStmt, stmt); - } - - // stmt doesn't have any attached debug message. - stmt->setMetadata(debugComment); - return stmt; -} - -/** -* @brief Returns the default switch block for @a bb. -* -* @param[in] bb Basic block representing the default switch block. -* @param[in] succ Successor of @a bb (if there is any), the null pointer -* otherwise. -* -* @par Preconditions -* - @a bb is non-null -*/ -ShPtr OrigLLVMIR2BIRConverter::getDefaultSwitchBlock( - llvm::BasicBlock *bb, llvm::BasicBlock *succ) { - PRECONDITION_NON_NULL(bb); - - // Is there a fall-through? - llvm::BranchInst *bi = llvm::dyn_cast(bb->getTerminator()); - if (bi && bi->getSuccessor(0) == succ) { - // There is a fall-through. However, if the successor begins with a PHI - // node and ends with a return/unreachable statement, do not generate the - // fall-through. - // - // The following example illustrates the problem. - // switch expr: - // default: - // ... - // result = tea // PHI copy. - // case 1: - // result = 0 // PHI copy. - // ... - // return result - // - // TODO What if the successor doesn't end with a return/unreachable - // statement? - if (llvm::isa(*succ->begin()) && LLVMSupport::endsWithRetOrUnreach( - succ, true)) { - ShPtr defaultBlock = visitBasicBlockOrLoop(bb); - return Statement::mergeStatements(defaultBlock, - BreakStmt::create()); - } - - // Generate a fall-through. - ShPtr defaultBlock = visitBasicBlockOrLoop(bb, false); - ShPtr phiCopies(getPHICopiesForSuccessor(bb, succ)); - return Statement::mergeStatements(defaultBlock, phiCopies); - } - - // There is no fall-through. - ShPtr defaultBlock = visitBasicBlockOrLoop(bb); - return Statement::mergeStatements(defaultBlock, - BreakStmt::create()); -} - -/** -* @brief Returns an expression for the given switch-case value. -*/ -ShPtr OrigLLVMIR2BIRConverter::getSwitchCaseExpression(llvm::Value *v) { - // Simple values, like integers, may be converted directly. This is new in - // LLVM 3.4; prior to LLVM 3.4, the value was always an array (see the - // comment below). - if (llvm::ConstantInt *ci = llvm::dyn_cast(v)) { - return converter->llvmValueToExpression(ci); - } - - // As of LLVM 3.3 (or 3.2, I don't know exactly, but in 3.1, this is not - // the case), the value is an array of the form - // - // [, ...] - // - // That is, a case value is composed of ranges. - // - if (llvm::ConstantAggregateZero *caz = llvm::dyn_cast(v)) { - // This indicates a zero. - llvm::IntegerType *intType(llvm::dyn_cast( - caz->getType()->getContainedType(0)->getContainedType(0))); - ASSERT_MSG(intType, "The type should be integral."); - return ConstInt::create(0, intType->getBitWidth(), intType->getSignBit()); - } - - // It is a range. - llvm::ConstantArray *ca = llvm::dyn_cast(v); - ASSERT_MSG(ca, "The value should be an array."); - if (ca->getNumOperands() != 1) { - // TODO Handle this case (more than one range in a case). - printWarningMessage("Found a switch instruction with more ranges in " - "one of its cases (", *ca, ")."); - } - llvm::ConstantDataSequential *switchCaseRange(llvm::dyn_cast( - ca->getOperand(0))); - ASSERT_MSG(switchCaseRange, "The case range should be an array."); - ShPtr lowerBound(converter->llvmConstantToExpression( - switchCaseRange->getElementAsConstant(0))); - if (switchCaseRange->getElementAsConstant(1)) { - ShPtr upperBound(converter->llvmConstantToExpression( - switchCaseRange->getElementAsConstant(1))); - if (!lowerBound->isEqualTo(upperBound)) { - // TODO Handle this case (the lower bound differs from the upper bound). - printWarningMessage("Found a switch case with a range having its lower " - "bound different from the upper bound (", *switchCaseRange, ")."); - } - } - return lowerBound; -} - -/** -* @brief Generates code for the given basic block @a bb. -* -* @param[in] bb Basic block. -* @param[in] genTerm If @c true, then it also generates the terminator. -* -* @par Preconditions -* - @a bb is non-null -*/ -ShPtr OrigLLVMIR2BIRConverter::visitBasicBlock(llvm::BasicBlock *bb, - bool genTerm) { - PRECONDITION_NON_NULL(bb); - - ShPtr firstStmt; // The first statement. - ShPtr prevStmt; // The previous statement. - ShPtr currStmt; // The current statement. - - // Generate all the instructions in the basic block. - for (auto i = bb->begin(), e = --bb->end(); i != e; ++i) { - // Skip PHI nodes. - if (llvm::isa(*i)) { - continue; - } - - // If the instruction accesses a local variable allocated by an alloca - // instruction which hasn't been defined yet, define it. - if (llvm::isa(i) || llvm::isa(i) || - llvm::isa(i) || llvm::isa(i)) { - // Check all operands of the instruction. - for (unsigned j = 0, e = i->getNumOperands(); j < e; ++j) { - llvm::Value *varLLVM = i->getOperand(j); - - // Skip global variables. - if (llvm::isa(varLLVM)) { - continue; - } - - // Skip temporary variables, functions, and possibly other - // types of variables. - if (!LLVMSupport::isDirectAlloca(varLLVM)) { - continue; - } - - // Skip already defined local variables. - std::string varName = varsHandler->getValueName(varLLVM); - if (varsHandler->localVarExists(varName)) { - continue; - } - - // We have a winner, so generate a variable-definition - // statement for it. - - // Get the variable's type. This needs to be obtained from the - // alloca instruction; otherwise, the type might not match. - llvm::Type *varType = varsHandler->getAllocatedVarType(varLLVM); - - // Define the variable. - ShPtr var(Variable::create(varName, - converter->llvmTypeToType(varType))); - varsHandler->addLocalVar(var); - - // Generate the statement. - // - // We create an AssignStmt instead of a VarDefStmt, and - // optimize assignments to definitions later - // (VarDefStmtOptimizer). This simplifies the conversion. - ShPtr init(converter->getDefaultInitializer( - varType)); - currStmt = AssignStmt::create(var, init); - - // We want to prevent optimization of variables used in - // volatile load/store operations, so mark such variables as - // external. - if (auto loadInst = llvm::dyn_cast(i)) { - if (loadInst->isVolatile()) { - var->markAsExternal(); - } - } - - // If we have just generated the first statement, map the - // currently processed basic block's label to it. - if (!firstStmt && !mapHasKey(bbStmtMap, bb)) { - bbStmtMap[bb] = currStmt; - } - - addStatementToStatementBlock(currStmt, firstStmt, prevStmt); - } - } - - // Skip inlinable instructions and direct allocas. - if (LLVMSupport::isInlinableInst(&*i) || LLVMSupport::isDirectAlloca(&*i)) { - continue; - } - - // If (1) we're generating the body of a loop with an induction - // variable and (2) the variable on the left-hand side is used - // only in the exit condition for this loop, do not generate any - // code. - if (llvm::Loop *l = branchInfo->getLoopFor(bb)) { - // TODO Is the following condition sufficient? - if (branchInfo->isOptimizableToForLoop(l) && !llvm::isa(i) && - !llvm::isa(i) && !llvm::isa(i) && !i->hasName()) { - unsigned usesExcludingExit = 0; - for (auto j = i->user_begin(), e = i->user_end(); j != e; ++j) { - if (*j == l->getCanonicalInductionVariable() || - // TODO HACK - varsHandler->getValueName(*j).substr(0, 8) == "exitcond") { - usesExcludingExit = 0; - break; - } - usesExcludingExit++; - } - if (usesExcludingExit == 0) { - continue; - } - } - } - - if (llvm::StoreInst *si = llvm::dyn_cast(i)) { - // Mark the accessed variable as defined because after this - // instruction, it has a value. - llvm::Value *var = si->getOperand(1); - std::string varName = varsHandler->getValueName(var); - if (!llvm::isa(var)) { - varsHandler->addLocalVar(Variable::create(varName, - converter->llvmTypeToType(var->getType()->getContainedType(0)))); - } - } - - // If there is an l-value which is used later in the code, generate an - // assignment statement; otherwise, generate whatever *i is. - // - // TODO - // However, if the current instruction is an insertvalue instruction, - // generate it separately; that is, use - // converter->llvmInstructionToValue() rather than - // converter->llvmValueToExpression(). This is because the visitation - // function for this instruction creates two assignment statements. - if (i->getType() != llvm::Type::getVoidTy(bb->getContext()) && - !LLVMSupport::isInlineAsm(&*i) && !llvm::isa(*i)) { - ShPtr lhs(converter->llvmValueToExpression(&*i)); - ShPtr rhs(converter->llvmInstructionToValue(*i)); - // Since rhs might be an instance of CallStmt, we need to check - // this. If this is the case, then we have to use the underlying - // call instead. - if (ShPtr callStmt = cast(rhs)) { - if (i->user_begin() != i->user_end()) { - // The left-hand side is used, so assign the result of the - // call statement to the left-hand side. - currStmt = AssignStmt::create(lhs, callStmt->getCall()); - } else { - // There are no uses of the left-hand side, so generate - // just the call statement, without the assignment. - // Otherwise, we would introduce an unused assignment. - currStmt = callStmt; - } - } else { - currStmt = AssignStmt::create(lhs, cast(rhs)); - } - } else { - currStmt = cast(visit(*i)); - } - - // If we have just generated the first statement, map the - // currently processed basic block's label to it. - if (!firstStmt && !mapHasKey(bbStmtMap, bb)) { - bbStmtMap[bb] = currStmt; - } - - addStatementToStatementBlock(currStmt, firstStmt, prevStmt); - } - - llvm::TerminatorInst *ti = bb->getTerminator(); - if (genTerm) { - currStmt = cast(visit(*ti)); - addStatementToStatementBlock(currStmt, firstStmt, prevStmt); - } else if (llvm::BranchInst *bi = llvm::dyn_cast(ti)) { - if (!bi->isConditional()) { - // Just for debugging purposes. - currStmt = EmptyStmt::create(); - currStmt->setMetadata("branch -> " + - labelsHandler->getLabel(bi->getSuccessor(0))); - addStatementToStatementBlock(currStmt, firstStmt, prevStmt); - } - } - - firstStmt = addDebugCommentToStatement(firstStmt, - labelsHandler->getLabel(bb)); - bbStmtMap[bb] = firstStmt; - - return firstStmt; -} - -/** -* @brief Generates code for the given loop @a l. -* -* @par Preconditions -* - @a l is non-null -*/ -ShPtr OrigLLVMIR2BIRConverter::visitLoop(llvm::Loop *l) { - PRECONDITION_NON_NULL(l); - - // Get the loop's body. - branchInfo->startGeneratingLoop(l); - llvm::BasicBlock *bb = l->getBlocks()[0]; - llvm::Loop *bbLoop = branchInfo->getLoopFor(bb); - ShPtr loopBody; - if (bbLoop == l) { - // Since there may be nested loops, store the original basic block of - // the currently generated loop. - llvm::BasicBlock *oldCurrLoopBB = currLoopBB; - - // Set the new one. - currLoopBB = bb; - - // Get the body. - loopBody = visitBasicBlock(bb); - - // Restore the original basic block of the currently generated loop. - currLoopBB = oldCurrLoopBB; - } else if (bb == bbLoop->getHeader() && bbLoop->getParentLoop() == l) { - loopBody = visitLoop(bbLoop); - } - branchInfo->endGeneratingLoop(); - - // Get the basic block(s) after the loop (if there are any). - ShPtr afterLoop; - if (lastLoopExitBB) { - // We need to zero it before the emission. - llvm::BasicBlock *exitBB = lastLoopExitBB; - lastLoopExitBB = nullptr; - afterLoop = visitBasicBlockOrLoop(exitBB); - } - - // Create a loop of a proper type and return it. - ShPtr generatedLoop; - if (branchInfo->isOptimizableToForLoop(l)) { - // Return a for loop. - llvm::PHINode *indVarLLVM = l->getCanonicalInductionVariable(); - std::string indVarName = varsHandler->getValueName(indVarLLVM); - ShPtr indVar(varsHandler->getVariableByName(indVarName)); - if (isa(indVar->getType())) { - indVar->setType(converter->llvmTypeToType(indVarLLVM->getType())); - } - - llvm::Value *iv = getInitialValueOfIndVar(l); - ShPtr startValue(converter->llvmValueToExpression(iv)); - ShPtr endCond(LtOpExpr::create(indVar, - AddOpExpr::create(startValue, branchInfo->getTripCount(l)))); - - // TODO Is the used number of bits (32) correct? - ShPtr step(ConstInt::create(1, 32)); - generatedLoop = ForLoopStmt::create(indVar, - startValue, endCond, step, loopBody, afterLoop); - } else { - // We know neither the induction variable nor the trip count, so return - // a general `while True` loop. It may be converted into a for loop in - // subsequent optimizations. - ShPtr cond(ConstBool::create(true)); - generatedLoop = WhileLoopStmt::create(cond, loopBody, afterLoop); - } - bbStmtMap[bb] = generatedLoop; - return generatedLoop; -} - -ShPtr OrigLLVMIR2BIRConverter::visitCallInst(llvm::CallInst &i) { - return converter->llvmCallInstToCallStmt(i); -} - -ShPtr OrigLLVMIR2BIRConverter::visitGetElementPtrInst(llvm::GetElementPtrInst &i) { - return converter->llvmGEPInstToExpression(i); -} - -ShPtr OrigLLVMIR2BIRConverter::visitLoadInst(llvm::LoadInst &i) { - return converter->llvmLoadInstToExpression(i); -} - -ShPtr OrigLLVMIR2BIRConverter::visitStoreInst(llvm::StoreInst &i) { - return converter->llvmStoreInstToAssignStmt(i); -} - -ShPtr OrigLLVMIR2BIRConverter::visitAllocaInst(llvm::AllocaInst &i) { - return converter->llvmAllocaInstToExpression(i); -} - -ShPtr OrigLLVMIR2BIRConverter::visitCastInst(llvm::CastInst &i) { - return converter->llvmInstructionToValue(i); -} - -ShPtr OrigLLVMIR2BIRConverter::visitInsertValueInst(llvm::InsertValueInst &i) { - return converter->llvmInsertValueInstToStatement(i); -} - -ShPtr OrigLLVMIR2BIRConverter::visitExtractValueInst(llvm::ExtractValueInst &i) { - return converter->llvmExtractValueInstToExpression(i); -} - -ShPtr OrigLLVMIR2BIRConverter::visitUnreachableInst(llvm::UnreachableInst &i) { - return UnreachableStmt::create(); -} - -ShPtr OrigLLVMIR2BIRConverter::visitInstruction(llvm::Instruction &i) { - printErrorMessage("OrigLLVMIR2BIRConverter does not know about:", i); - llvm_unreachable(0); - return ShPtr(); -} - -ShPtr OrigLLVMIR2BIRConverter::visitBranchInst(llvm::BranchInst &i) { - if (!i.isConditional()) { - // An unconditional branch. - llvm::BasicBlock *succ = i.getSuccessor(0); - ShPtr phiCopies = getPHICopiesForSuccessor(i.getParent(), succ); - - ShPtr debugCommentStmt(EmptyStmt::create()); - debugCommentStmt->setMetadata("branch -> " + - labelsHandler->getLabel(succ)); - phiCopies = Statement::mergeStatements(phiCopies, debugCommentStmt); - - // If the branch target is the header of the outer loop, generate just - // 'break'. - if (branchInfo->isSuccHeaderOfOuterLoop(i.getParent(), succ)) { - ShPtr breakStmt(BreakStmt::create()); - return Statement::mergeStatements(phiCopies, breakStmt); - } - - // If the branch target is the header of the current loop, generate just - // 'continue'. - if (branchInfo->isSuccHeaderOfInnerLoop(i.getParent(), succ)) { - ShPtr continueStmt(ContinueStmt::create()); - return Statement::mergeStatements(phiCopies, continueStmt); - } - - // If the branch target ends with a return statement and we're in a - // loop, generate it here. - if (LLVMSupport::endsWithRetOrUnreach(succ) && branchInfo->generatingLoop()) { - ShPtr block(visitBasicBlockOrLoop(succ)); - return Statement::mergeStatements(phiCopies, block); - } - - // If a goto statement is necessary, generate it. We also have to check - // that there is a statement corresponding to the target; otherwise, we - // just emit a fall-through. - if (mapHasKey(bbStmtMap, succ) && - branchInfo->isGotoNecessary(i.getParent(), succ)) { - auto gotoTarget = bbStmtMap[succ]; - ShPtr gotoStmt(GotoStmt::create(gotoTarget)); - setGotoTargetLabel(gotoTarget, succ); - return Statement::mergeStatements(phiCopies, gotoStmt); - } - - // If the branch target is not yet in a state of to be generated, - // generate it here. - if (branchInfo->branchStackTop() != succ) { - ShPtr block(visitBasicBlockOrLoop(succ)); - return Statement::mergeStatements(phiCopies, block); - } - - // There should be a fall-through. - return phiCopies; - } - - // The conditional branch instruction is of the following form: - // br cond, bb1, bb2 - llvm::Value *cond = i.getCondition(); - llvm::BasicBlock *bb1 = i.getSuccessor(0); - llvm::BasicBlock *bb2 = i.getSuccessor(1); - llvm::BasicBlock *cbd = branchInfo->findCommonBranchDestination(bb1, bb2); - - // First, check whether bb1 or bb2 is a jump to the header of a loop. If - // so, then bb2 or bb1, respectively, has to be a jump to the end of the - // loop. - llvm::Loop *currLoop = branchInfo->getLoopFor(i.getParent()); - if (currLoop && branchInfo->isLoopHeader(bb1, currLoop)) { - if (branchInfo->isOptimizableToForLoop(currLoop)) { - ShPtr loopEnd(getLoopEnd(i.getParent(), bb1, - bb2, cond, true, true)); - - // Put a continue statement, including a debug comment, after the - // loop end. - ShPtr debugComment(ContinueStmt::create()); - debugComment->setMetadata("loop " + - labelsHandler->getLabel(bb1) + " end"); - return Statement::mergeStatements(loopEnd, debugComment); - } else { - return getLoopEnd(i.getParent(), bb1, bb2, cond, true); - } - } - // Likewise for bb2. - else if (currLoop && branchInfo->isLoopHeader(bb2, currLoop)) { - if (branchInfo->isOptimizableToForLoop(currLoop)) { - ShPtr loopEnd(getLoopEnd(i.getParent(), bb2, bb1, - cond, false, true)); - - // Put a continue statement, including a debug comment, after the - // loop end. - ShPtr debugComment(ContinueStmt::create()); - debugComment->setMetadata("loop " + - labelsHandler->getLabel(bb2) + " end"); - return Statement::mergeStatements(loopEnd, debugComment); - } else { - return getLoopEnd(i.getParent(), bb2, bb1, cond); - } - } - - // Check whether a goto statement is necessary for some of the branches. - if (mapHasKey(bbStmtMap, bb1) && - branchInfo->isGotoNecessary(i.getParent(), bb1)) { - // A goto is necessary for bb1. - return generateGotoForConditionalBranch(bb1, bb2, i.getParent(), cond); - } else if (mapHasKey(bbStmtMap, bb2) && - branchInfo->isGotoNecessary(i.getParent(), bb2)) { - // A goto is necessary for bb2. - return generateGotoForConditionalBranch(bb2, bb1, i.getParent(), cond, true); - } - - // Handle special cases. - - // Special case (1) - // ---------------- - // if cond: - // bb1 - // bb2 == commonBranchDest from the previous branch - if (bb2 == branchInfo->branchStackTop()) { - // Generate only the body of bb1 since bb2 will be generated later. - ShPtr phiCopies(getPHICopiesForSuccessor(i.getParent(), bb1)); - - ShPtr ifBody(visitBasicBlockOrLoop(bb1)); - ifBody = Statement::mergeStatements(phiCopies, ifBody); - - ShPtr ifCond(converter->llvmValueToExpression(cond)); - ShPtr ifStmt(IfStmt::create(ifCond, ifBody)); - - // If there are some PHI nodes in the successor, generate PHI copies - // for them into an else clause. - if (llvm::isa(bb2->begin())) { - ShPtr elseBody(getPHICopiesForSuccessor(i.getParent(), bb2)); - ifStmt->setElseClause(elseBody); - } - - return ifStmt; - } - // Special case (2) - // ---------------- - // if cond: - // bb2 - // bb1 == commonBranchDest from the previous branch - else if (bb1 == branchInfo->branchStackTop()) { - ShPtr ifCond(ExpressionNegater::negate( - converter->llvmValueToExpression(cond))); - - // Generate only the body of bb2 since bb1 will be generated later. - ShPtr phiCopies(getPHICopiesForSuccessor(i.getParent(), bb2)); - - ShPtr ifBody(visitBasicBlockOrLoop(bb2)); - ifBody = Statement::mergeStatements(phiCopies, ifBody); - - ShPtr ifStmt(IfStmt::create(ifCond, ifBody)); - - // If there are some PHI nodes in the successor, generate PHI copies - // for them into an else clause. - if (llvm::isa(bb1->begin())) { - ShPtr elseBody(getPHICopiesForSuccessor(i.getParent(), bb1)); - ifStmt->setElseClause(elseBody); - } - - // If the common branch destination is not in a state to be generated, - // generate it here. More specifically, generate it if the branch stack - // contains just cbd. - // TODO PHI copies? - ShPtr afterIf; - if (branchInfo->branchStackSize() == 1) { - afterIf = visitBasicBlockOrLoop(branchInfo->branchStackTop()); - } - ifStmt->setSuccessor(afterIf); - return ifStmt; - } - // Special case (3) - // ---------------- - // if cond: - // bb1 that ends with return/unreachable - // bb2 that ends with a branch to bb1 - else if (LLVMSupport::endsWithRetOrUnreach(bb1) && - llvm::isa(bb2->getTerminator()) && - !llvm::dyn_cast(bb2->getTerminator())->isConditional() && - llvm::dyn_cast(bb2->getTerminator())->getSuccessor(0) == bb1) { - // This pattern is rather irritating to decompile, so negate the - // condition and switch bb1 with bb2. - ShPtr ifCond(ExpressionNegater::negate( - converter->llvmValueToExpression(cond))); - - // Since we switched bb1 with bb2, generate bb2 first. - ShPtr ifBody(visitBasicBlockOrLoop(bb2, false)); - ShPtr phiCopies(getPHICopiesForSuccessor(bb2, bb1)); - if (phiCopies) { - // There are some PHI copies, so we need to move the last empty - // statement from ifBody after appended PHI copies. This way, - // instead of generating, e.g. - // - // tomato = lemon - // # branch -> block - // grape1 = tomato - // - // we generate - // - // tomato = lemon - // grape1 = tomato - // # branch -> block - // - ShPtr lastStmtFromIfBody(Statement::getLastStatement( - ifBody)); - if (isa(lastStmtFromIfBody)) { - // To prevent loops in the resulting BIR, move the empty - // statement only if there are also some other statements in - // the if's body. - if (ifBody != lastStmtFromIfBody) { - Statement::removeStatement(lastStmtFromIfBody); - ifBody = Statement::mergeStatements(ifBody, phiCopies); - ifBody = Statement::mergeStatements(ifBody, lastStmtFromIfBody); - } else { - ifBody = Statement::mergeStatements(ifBody, phiCopies); - } - } - } else { - ifBody = Statement::mergeStatements(ifBody, phiCopies); - } - - ShPtr ifStmt(IfStmt::create(ifCond, ifBody)); - - // If there are some PHI nodes in the successor, generate PHI copies - // for them into an else clause. - if (llvm::isa(bb1->begin())) { - ShPtr elseBody(getPHICopiesForSuccessor(i.getParent(), bb1)); - ifStmt->setElseClause(elseBody); - } - - // Now generate bb1. - ifStmt->setSuccessor(visitBasicBlockOrLoop(bb1)); - - return ifStmt; - } - // Special case (4) - // ---------------- - // if cond: - // bb1 that ends with return/unreachable - // bb2 - // - // The call to endWithSameUncondBranch() is done to generate less amount of - // redundant code. - else if (LLVMSupport::endsWithRetOrUnreach(bb1, false) && - !LLVMSupport::endWithSameUncondBranch(bb1, bb2)) { - ShPtr ifCond(converter->llvmValueToExpression(cond)); - - // Check whether bb2 starts with the same return statement - // as bb1 ends. If so, then the return statement in bb1 - // is redundant, so do not generate it. - bool genTerm = true; - llvm::ReturnInst *ri1 = llvm::dyn_cast(bb1->getTerminator()); - llvm::ReturnInst *ri2 = llvm::dyn_cast(bb2->begin()); - if (ri1 && ri2) { - llvm::Value *rv1 = ri1->getReturnValue(); - llvm::Value *rv2 = ri2->getReturnValue(); - if (rv1 && rv2 && rv1 == rv2) { - genTerm = false; - } - } - - // Generate the body of the if statement (first basic block). - ShPtr phiCopies(getPHICopiesForSuccessor(i.getParent(), bb1)); - - ShPtr ifBody(visitBasicBlockOrLoop(bb1, genTerm)); - ifBody = Statement::mergeStatements(phiCopies, ifBody); - - ShPtr ifStmt(IfStmt::create(ifCond, ifBody)); - - // If there are some PHI nodes in the successor, generate PHI copies - // for them into an else clause. - if (llvm::isa(bb2->begin())) { - ShPtr elseBody(getPHICopiesForSuccessor(i.getParent(), bb2)); - ifStmt->setElseClause(elseBody); - } - - // Generate the second basic block. - ShPtr afterIf(visitBasicBlockOrLoop(bb2)); - ifStmt->setSuccessor(afterIf); - return ifStmt; - } - // A general case - // -------------- - // if cond: - // bb1 - // ... (other basic blocks, possibly nested) - // else: - // bb2 - // ... (other basic blocks, possibly nested) - // commonBranchDest - else { - // No goto statement is necessary. - ShPtr phiCopiesCBD; - if (cbd) { - phiCopiesCBD = getPHICopiesForSuccessor(i.getParent(), cbd); - } - - ShPtr ifStmt; - - // if cond: - // bb1 ending with a non-return instruction - // else: - // bb2 ending with return - // cbd - if (cbd != bb1 && cbd != bb2 && llvm::isa(bb2->getTerminator()) && - !llvm::isa(bb1->getTerminator())) { - // Since bb2 ends with a return instruction, negate the condition - // and switch bb1 with bb2 to prevent emission of the following - // type of code: - // - // if c1: - // if c2: - // if c3: - // A - // else: - // B - // return - // else: - // C - // return - // else: - // D - // return - // - // Instead, we generate the following code: - // - // if not c1: - // D - // return - // if not c2: - // C - // return - // if not c3: - // B - // return - // A - // - // This is done to decrease the nesting of `if` statements. - ShPtr ifCond(ExpressionNegater::negate( - converter->llvmValueToExpression(cond))); - - ShPtr phiCopies(getPHICopiesForSuccessor( - i.getParent(), bb2)); - // Since bb2 ends with a return instruction, we don't need to push - // cbd onto the branch stack. - ShPtr ifBody(visitBasicBlockOrLoop(bb2)); - ifBody = Statement::mergeStatements(phiCopies, ifBody); - - ifStmt = IfStmt::create(ifCond, ifBody); - - // If there are some PHI nodes in the successor, generate PHI copies - // for them after the if clause. - ShPtr phiCopiesBB1; - if (llvm::isa(bb1->begin())) { - phiCopiesBB1 = getPHICopiesForSuccessor(i.getParent(), bb1); - } - - // Now generate bb1. - ShPtr afterIf(visitBasicBlockOrLoop(bb1)); - ifStmt->setSuccessor(Statement::mergeStatements(phiCopiesBB1, afterIf)); - } else if (cbd != bb1) { - ShPtr ifCond(converter->llvmValueToExpression(cond)); - - ShPtr phiCopiesBB1(getPHICopiesForSuccessor(i.getParent(), bb1)); - branchInfo->branchStackPush(cbd); - ShPtr ifBody(Statement::mergeStatements( - phiCopiesBB1, visitBasicBlockOrLoop(bb1))); - branchInfo->branchStackPop(); - - ifStmt = IfStmt::create(ifCond, ifBody); - - if (cbd != bb2) { - ShPtr phiCopiesBB2(getPHICopiesForSuccessor(i.getParent(), bb2)); - branchInfo->branchStackPush(cbd); - ShPtr elseBody(Statement::mergeStatements(phiCopiesBB2, - visitBasicBlockOrLoop(bb2))); - branchInfo->branchStackPop(); - - ifStmt->setElseClause(elseBody); - } - // cbd == bb1 - } else if (cbd != bb2) { - ShPtr ifCond(ExpressionNegater::negate( - converter->llvmValueToExpression(cond))); - - ShPtr phiCopies(getPHICopiesForSuccessor(i.getParent(), bb2)); - branchInfo->branchStackPush(cbd); - ShPtr ifBody(visitBasicBlockOrLoop(bb2)); - ifBody = Statement::mergeStatements(phiCopies, ifBody); - branchInfo->branchStackPop(); - - ifStmt = IfStmt::create(ifCond, ifBody); - // cbd == bb1 == bb2 - } else { - FAIL("cbd == bb1 == bb2, this should never happen"); - return ShPtr(); - } - - // Do not generate cbd if it is the header of the current loop. - ShPtr afterIf; - if (cbd && !branchInfo->isSuccHeaderOfInnerLoop(i.getParent(), cbd)) { - afterIf = visitBasicBlockOrLoop(cbd); - } - - return Statement::mergeStatements( - Statement::mergeStatements(phiCopiesCBD, ifStmt), - afterIf); - } -} - -ShPtr OrigLLVMIR2BIRConverter::visitSwitchInst(llvm::SwitchInst &si) { - // Find the common switch destination, i.e. which basic block is the - // successor of the switch, no matter which case is taken. This information - // is then used in the same way as when emitting nested if-else blocks. - llvm::BasicBlock *csd = branchInfo->findCommonSwitchDestination(&si); - branchInfo->branchStackPush(csd); - - ShPtr switchStmt(SwitchStmt::create( - converter->llvmValueToExpression(si.getCondition()))); - generatingSwitchStmt = true; - - // If the default branch has only a single predecessor, generate it as the - // first clause of the switch statement. If it has two predecessors, - // generate it after the second predecessor. Otherwise, if it has more than - // one predecessor, generate it after the switch instruction. - bool defaultBBGenerated = false; - llvm::BasicBlock *defaultBB = si.getDefaultDest(); - if (defaultBB->getUniquePredecessor()) { - switchStmt->addDefaultClause(getDefaultSwitchBlock( - defaultBB, si.getNumOperands() >= 4 ? - llvm::cast(si.getOperand(3)) : nullptr)); - defaultBBGenerated = true; - } - - // Generate all cases in the switch. For every case, there are two operands: - // i: the case condition - // i+1: the case body - for (unsigned i = 2, e = si.getNumOperands(); i < e; i += 2) { - // Case expression. - ShPtr caseExpr(getSwitchCaseExpression(si.getOperand(i))); - - // Case body. - llvm::BasicBlock *bb = llvm::cast(si.getOperand(i + 1)); - llvm::Instruction *bbTerm = bb->getTerminator(); - if (LLVMSupport::endsWithRetOrUnreach(bb, false)) { - // There is no need to generate a break statement. - ShPtr phiCopies(getPHICopiesForSuccessor(si.getParent(), bb)); - ShPtr block(Statement::mergeStatements(phiCopies, - visitBasicBlockOrLoop(bb))); - switchStmt->addClause(caseExpr, block); - } else if (!defaultBBGenerated && llvm::isa(bbTerm) && - llvm::cast(bbTerm)->getSuccessor(0) == defaultBB && - LLVMSupport::getNumberOfUniquePredecessors(defaultBB) == 2) { - // Fall through to the default case block. - - // Generate the case block. - ShPtr block(visitBasicBlockOrLoop(bb, false)); - ShPtr phiCopies(getPHICopiesForSuccessor(bb, defaultBB)); - switchStmt->addClause(caseExpr, - Statement::mergeStatements(block, phiCopies)); - - // Generate the default case block. - ShPtr defaultBlock(getDefaultSwitchBlock(defaultBB, (i + 2) < e ? - llvm::cast(si.getOperand(i + 3)) : nullptr)); - switchStmt->addDefaultClause(defaultBlock); - defaultBBGenerated = true; - } else if ((i + 2) < e && llvm::isa(bbTerm) && - llvm::cast(bbTerm)->getSuccessor(0) == si.getOperand(i + 3)) { - // Fall through to the next case (not the default one). - ShPtr block(visitBasicBlockOrLoop(bb, false)); - ShPtr phiCopies(getPHICopiesForSuccessor(bb, - llvm::cast(si.getOperand(i + 3)))); - switchStmt->addClause(caseExpr, - Statement::mergeStatements(block, phiCopies)); - } else { - // There is no fall through, so also generate a break statement; - // however, only if the instruction that bb ends with is not a - // return statement (otherwise, it would be redundant). - ShPtr phiCopies(getPHICopiesForSuccessor(si.getParent(), bb)); - ShPtr block(Statement::mergeStatements(phiCopies, - visitBasicBlockOrLoop(bb))); - if (!llvm::isa(bb->getTerminator())) { - block = Statement::mergeStatements(block, - BreakStmt::create()); - } - switchStmt->addClause(caseExpr, block); - } - } - - branchInfo->branchStackPop(); - - // Check whether the default basic block has been generated. - if (!defaultBBGenerated) { - // It hasn't been generated yet. There are two known situations where - // this may happen, (1) and (2), discussed next. - if (defaultBB == si.getParent()) { - // (1) The default basic block is the same basic block in which - // this switch statement is. A goto statement is necessary here. - ShPtr gotoStmt; - if (mapHasKey(bbStmtMap, si.getParent())) { - auto gotoTarget = bbStmtMap[si.getParent()]; - gotoStmt = GotoStmt::create(gotoTarget); - setGotoTargetLabel(gotoTarget, si.getParent()); - } else if (defaultBB->getFirstNonPHI() == &si) { - // The goto target is the switch statement itself. - bbStmtMap[si.getParent()] = switchStmt; - gotoStmt = GotoStmt::create(switchStmt); - setGotoTargetLabel(switchStmt, si.getParent()); - } else { - // We do not have a mapping of si.getParent() in bbStmtMap, so - // the goto statement will need to be patched. - gotoStmt = GotoStmt::create(EmptyStmt::create()); - addGotoStmtToPatch(ucast(gotoStmt), si.getParent()); - } - - ShPtr phiCopies(getPHICopiesForSuccessor(si.getParent(), - si.getParent())); - ShPtr defaultBlock(Statement::mergeStatements( - phiCopies, gotoStmt)); - switchStmt->addDefaultClause(defaultBlock); - } else { - // (2) It is a common branch destination of some cases in the switch. - // Therefore, generate it as the successor of the switch statement. - // TODO PHI copies? - switchStmt->setSuccessor(visitBasicBlockOrLoop(defaultBB)); - } - } else if (csd && csd != defaultBB) { - // TODO PHI copies? - // Generate the basic blocks after the switch statement (if there are - // any). - switchStmt->setSuccessor(visitBasicBlockOrLoop(csd)); - } - - generatingSwitchStmt = false; - - // The switch statement has to be preceded by PHI copies for the default - // clause. - ShPtr phiCopiesSwitch(getPHICopiesForSuccessor(si.getParent(), - si.getDefaultDest())); - return Statement::mergeStatements(phiCopiesSwitch, switchStmt); -} - -ShPtr OrigLLVMIR2BIRConverter::visitReturnInst(llvm::ReturnInst &i) { - return converter->llvmReturnInstToReturnStmt(i); -} - -} // namespace llvmir2hll -} // namespace retdec diff --git a/src/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter/llvm_branch_info.cpp b/src/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter/llvm_branch_info.cpp deleted file mode 100644 index fd9662a747..0000000000 --- a/src/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter/llvm_branch_info.cpp +++ /dev/null @@ -1,591 +0,0 @@ -/** -* @file src/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter/llvm_branch_info.cpp -* @brief Implementation of LLVMBranchInfo. -* @copyright (c) 2017 Avast Software, licensed under the MIT license -*/ - -#include - -#include -#include -#include -#include - -#include "retdec/llvmir2hll/ir/const_int.h" -#include "retdec/llvmir2hll/ir/expression.h" -#include "retdec/llvmir2hll/ir/module.h" -#include "retdec/llvmir2hll/ir/statement.h" -#include "retdec/llvmir2hll/ir/variable.h" -#include "retdec/llvmir2hll/llvm/llvm_support.h" -#include "retdec/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter/llvm_branch_info.h" -#include "retdec/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter/llvm_converter.h" -#include "retdec/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter/vars_handler.h" -#include "retdec/llvmir2hll/support/debug.h" -#include "retdec/llvm-support/diagnostics.h" -#include "retdec/utils/container.h" - -using namespace retdec::llvm_support; - -using retdec::utils::clear; -using retdec::utils::hasItem; - -namespace retdec { -namespace llvmir2hll { - -namespace { - -// To produce deterministic results, we need to order the basic blocks -// by their name. -struct ByNameComparator { - bool operator()(const llvm::BasicBlock *b1, const llvm::BasicBlock *b2) const { - return b1->getName() < b2->getName(); - } -}; -using BBToCountMap = std::map; - -} // anonymous namespace - -/** -* @brief Constructs a new informer. -* -* @param[in] converter Converter from LLVM values to values in the backend IR. -* @param[in] varsHandler Handler of variables created during decompilation. -*/ -LLVMBranchInfo::LLVMBranchInfo(ShPtr converter, - ShPtr varsHandler): - loopInfo(nullptr), converter(converter), varsHandler(varsHandler), - loopStack(), branchStack() {} - -/** -* @brief Destructs the variables handler. -*/ -LLVMBranchInfo::~LLVMBranchInfo() {} - -/** -* @brief Initializes the informer. -* -* @param[in] li Information about loops from LLVM. -* -* This function has to be called before the informer is used. -*/ -void LLVMBranchInfo::init(llvm::LoopInfo *li) { - loopInfo = li; - loopStack.clear(); - clear(branchStack); - branchStack.push(nullptr); // A bottom marker. -} - -/** -* @brief Pushes @a bb onto the branch stack. -*/ -void LLVMBranchInfo::branchStackPush(llvm::BasicBlock *bb) { - branchStack.push(bb); -} - -/** -* @brief Returns the topmost basic block from the branch stack. -* -* If there are no items in the stack, it returns the null pointer. -*/ -llvm::BasicBlock *LLVMBranchInfo::branchStackTop() const { - return branchStack.top(); -} - -/** -* @brief Returns the size of the branch stack. -*/ -std::size_t LLVMBranchInfo::branchStackSize() const { - return branchStack.size() - 1; // Do not include the bottom marker. -} - -/** -* @brief Removes the topmost basic block from the branch stack. -* -* @par Preconditions -* - the stack is not empty -*/ -void LLVMBranchInfo::branchStackPop() { - // Do not include the bottom marker. - ASSERT_MSG(branchStack.size() > 1, "cannot pop from an empty stack"); - - branchStack.pop(); -} - -/** -* @brief Returns @c true if we are currently generating a loop, @c false -* otherwise. -*/ -bool LLVMBranchInfo::generatingLoop() const { - return !loopStack.empty(); -} - -/** -* @brief Marks the information that the loop @a l is being currently generated. -* -* This function has to be called whenever a new loop is started being -* generated; otherwise, some member functions might return incorrect -* information. -* -* @par Preconditions -* - @a l is non-null -*/ -void LLVMBranchInfo::startGeneratingLoop(llvm::Loop *l) { - PRECONDITION_NON_NULL(l); - - loopStack.push_front(l); -} - -/** -* @brief Marks the information that the currently generated loop has been -* generated. -* -* This function has to be called whenever a loop has been generated; otherwise, -* some member functions might return incorrect information. -*/ -void LLVMBranchInfo::endGeneratingLoop() { - loopStack.pop_front(); -} - -/** -* @brief Returns @c true if @a bb is the header of @a loop, @c false otherwise. -* -* @par Preconditions -* - @a bb and @a loop are non-null -*/ -bool LLVMBranchInfo::isLoopHeader(llvm::BasicBlock *bb, llvm::Loop *loop) const { - PRECONDITION_NON_NULL(bb); - PRECONDITION_NON_NULL(loop); - - return loopInfo->isLoopHeader(bb) && loopInfo->getLoopFor(bb) == loop; -} - -/** -* @brief Returns @c true if the given loop @a l can be optimized into a for -* loop, @c false otherwise. -* -* "Optimized" means that instead of "while True", we can generate a for loop. -* -* @par Preconditions -* - @a l is non-null -*/ -bool LLVMBranchInfo::isOptimizableToForLoop(const llvm::Loop *l) const { - PRECONDITION_NON_NULL(l); - - // We need to know the induction variable and the trip count. - return l->getCanonicalInductionVariable() && getTripCount(l); -} - -/** -* @brief Returns @c true if @a succ is the header of an inner loop in which @a -* bb is, @c false otherwise. -* -* @par Preconditions -* - both @a bb and @a succ are non-null -*/ -bool LLVMBranchInfo::isSuccHeaderOfInnerLoop(llvm::BasicBlock *bb, - llvm::BasicBlock *succ) const { - PRECONDITION_NON_NULL(bb); - PRECONDITION_NON_NULL(succ); - - if (loopStack.empty()) { - return false; - } - - auto innerLoop = loopStack.front(); - return innerLoop->getHeader() == succ; -} - -/** -* @brief Returns @c true if @a succ is the header of an outer (NOT inner) loop -* in which @a bb is, @c false otherwise. -* -* @par Preconditions -* - both @a bb and @a succ are non-null -*/ -bool LLVMBranchInfo::isSuccHeaderOfOuterLoop(llvm::BasicBlock *bb, - llvm::BasicBlock *succ) const { - PRECONDITION_NON_NULL(bb); - PRECONDITION_NON_NULL(succ); - - if (loopStack.size() < 2) { - return false; - } - - // We need the second topmost loop. - auto outerLoop = *(++loopStack.begin()); - return outerLoop->getHeader() == succ; -} - -/** -* @brief Returns the common branch destination of @a bb1 and @a bb2. -* -* Given two basic blocks, @a bb1 and @a bb2, this function returns the common -* branch destination of branches in @a bb1 and @a bb2. If there is no common -* branch destination, the null pointer is returned. -* -* For example, lets have the following piece of code. -* -* @code -* if (cond) { -* bb1: -* ... -* goto lab; -* } else { -* bb2: -* ... -* goto lab; -* } -* lab: -* ... -* @endcode -* -* Then, for @a bb1 and @a bb2, this function returns @c lab. -* -* @par Preconditions -* - both @a bb1 and @a bb2 are non-null -*/ -llvm::BasicBlock *LLVMBranchInfo::findCommonBranchDestination( - llvm::BasicBlock *bb1, llvm::BasicBlock *bb2) const { - PRECONDITION_NON_NULL(bb1); - PRECONDITION_NON_NULL(bb2); - - // We're going to perform two simultaneous BFSs (breadth-first searches), - // starting from bb1 and bb2, respectively. Therefore, we traverse the - // control-flow graph, level by level, until we find a basic block which is - // our common node. If we finish traversing the graph without encountering - // a common basic block, then there is no such block. - BFSQueue bfsQueue1, bfsQueue2; - BBSet bfsProcessedBBs1, bfsProcessedBBs2; - - bfsQueue1.push(bb1); - bfsQueue2.push(bb2); - while (!bfsQueue1.empty() || !bfsQueue2.empty()) { - llvm::BasicBlock *poppedBB1 = nullptr; - if (!bfsQueue1.empty()) { - poppedBB1 = bfsQueue1.front(); - bfsQueue1.pop(); - } - - llvm::BasicBlock *poppedBB2 = nullptr; - if (!bfsQueue2.empty()) { - poppedBB2 = bfsQueue2.front(); - bfsQueue2.pop(); - } - - // Check whether we've found the common branch destination. - if (poppedBB1 == poppedBB2 || - hasItem(bfsProcessedBBs2, poppedBB1)) { - return poppedBB1; - } else if (hasItem(bfsProcessedBBs1, poppedBB2)) { - return poppedBB2; - } - - if (poppedBB1) { - processAndPushBasicBlock(poppedBB1, bfsQueue1, bfsProcessedBBs1); - } - if (poppedBB2) { - processAndPushBasicBlock(poppedBB2, bfsQueue2, bfsProcessedBBs2); - } - } - - // The common branch destination was not found. - return nullptr; -} - -/** -* @brief Processes the given basic block in a BFS search. -* -* Checks whether @a poppedBB hasn't been traversed yet. If it hasn't, the -* function pushes its successors to @a bfsQueue and marks it as a processed -* basic block in @a bfsProcessedBBs. The only pushed successors are the ones -* that are not the header of a loop in which @a poppedBB is. This ensures -* proper code emission. -* -* @par Preconditions -* - @a poppedBB is non-null -*/ -void LLVMBranchInfo::processAndPushBasicBlock(llvm::BasicBlock *poppedBB, - BFSQueue &bfsQueue, BBSet &bfsProcessedBBs) const { - PRECONDITION_NON_NULL(poppedBB); - - // Is the popped basic block a new one, i.e. we have not traversed it - // yet? This ensures that the BFS algorithm will eventually end. - if (!hasItem(bfsProcessedBBs, poppedBB)) { - // It is, so add its successors (that are not the header of a loop in - // which poppedBB is) into the queue. - if (auto bi = llvm::dyn_cast(poppedBB->getTerminator())) { - auto numOfSuccessors = bi->getNumSuccessors(); - for (decltype(numOfSuccessors) i = 0; i < numOfSuccessors; ++i) { - auto succ = bi->getSuccessor(i); - if (!isSuccHeaderOfInnerLoop(poppedBB, succ)) { - bfsQueue.push(succ); - } - } - } - - bfsProcessedBBs.insert(poppedBB); - } -} - -/** -* @brief Returns @c true if a goto statement is necessary when branching from -* @a srcBB to @a dstBB. -* -* @param[in] srcBB Source basic block from which we are jumping. -* @param[in] dstBB Destination basic block to which we are jumping. -* -* @par Preconditions -* - both @a srcBB and @a dstBB are non-null -* - a statement in the backend IR corresponding to @a dstBB has already been -* emitted -*/ -bool LLVMBranchInfo::isGotoNecessary(llvm::BasicBlock *srcBB, - llvm::BasicBlock *dstBB) const { - PRECONDITION_NON_NULL(srcBB); - PRECONDITION_NON_NULL(dstBB); - - // If the source node is accessible form the destination node, we need a - // goto. - return isAccessible(srcBB, dstBB); -} - -/** -* @brief Returns @c true if @a bb is accessible from the basic block @a from. -* -* @par Preconditions -* - both @a bb and @a from are non-null -*/ -bool LLVMBranchInfo::isAccessible(llvm::BasicBlock *bb, llvm::BasicBlock *from) const { - PRECONDITION_NON_NULL(bb); - PRECONDITION_NON_NULL(from); - - BBSet visitedBlocks; - return isAccessibleImpl(bb, from, visitedBlocks); -} - -/** -* @brief Tries to find a common destination of branches in the given switch -* instruction. -* -* For example, the following switch statement has @c bb as the common -* destination of branches: -* -* @code -* switch (x) { -* case 1: -* ... -* break -* case 2: -* ... -* return -* case 3: -* ... -* break -* } -* bb -* @endcode -* -* If there is no common destination, the null pointer is returned. -* -* @par Preconditions -* - @a si is non-null -*/ -llvm::BasicBlock *LLVMBranchInfo::findCommonSwitchDestination( - llvm::SwitchInst *si) const { - PRECONDITION_NON_NULL(si); - - // Case (1): - // If the default switch branch has three or more predecessors, then it is - // a common switch destination. Note that if it has one or two successors, - // then it doesn't need to be a common switch destination. - auto defaultBB = si->getDefaultDest(); - if (LLVMSupport::getNumberOfUniquePredecessors(defaultBB) >= 3) { - return defaultBB; - } - - // Case (2): - // Get basic blocks of all cases. - std::vector switchCases; - for (unsigned i = 2, e = si->getNumOperands(); i < e; i += 2) { - switchCases.push_back(llvm::cast(si->getOperand(i+1))); - } - // Go through all the gathered basic blocks and check their terminators. If - // their terminator is a branch, and this branch is not to the next case, - // add all targets of the branch into a map. We then select a target with - // the most predecessors. - BBToCountMap primaryBranchTargets; - for (unsigned i = 2, e = si->getNumOperands(); i < e; i += 2) { - auto bb = llvm::cast(si->getOperand(i+1)); - auto bbTerm = bb->getTerminator(); - if (auto bi = llvm::dyn_cast(bbTerm)) { - for (unsigned j = 0; j < bi->getNumSuccessors(); ++j) { - if ((i + 2) < e && bi->getSuccessor(j) != si->getOperand(i + 3)) { - primaryBranchTargets[bi->getSuccessor(j)]++; - } - } - } - } - // Add also the primary target of the default branch; however, do this only - // if the target is not any of the case bodies. - if (auto bi = llvm::dyn_cast(defaultBB->getTerminator())) { - for (unsigned j = 0; j < bi->getNumSuccessors(); ++j) { - if (hasItem(switchCases, defaultBB)) { - primaryBranchTargets[bi->getSuccessor(j)]++; - } - } - } - // Check which target was counted the most times and return it. - llvm::BasicBlock *csd = nullptr; // common switch destination - unsigned csdNumOfOccurrences = 0; - for (auto &p: primaryBranchTargets) { - if (p.second > csdNumOfOccurrences) { - csd = p.first; - csdNumOfOccurrences = p.second; - } - } - return csd; -} - -/** -* @brief Returns the trip count of the given loop @a l. -* -* Returns a loop-invariant constant integer indicating the number of times the -* loop will be executed. Note that this means that the backedge of the loop -* executes N-1 times. If the trip count cannot be determined, the function -* returns 0. -* -* The IndVarSimplify pass transforms loops to have a form that this -* function easily understands. -*/ -ShPtr LLVMBranchInfo::getTripCount(const llvm::Loop *l) const { - // The implementation is based on the implementation of - // Loop::getTripCount() from LLVM 2.8; since LLVM 3.1, it has been removed. - // TODO Use SCEV for this purpose? - // https://llvm.org/viewvc/llvm-project?view=rev&revision=145262 says - // that this functionality has been moved to SCEV. - - // Canonical loops will end with a 'cmp ne i, v', where i is the incremented - // canonical induction variable and v is the trip count of the loop. - auto iv = l->getCanonicalInductionVariable(); - if (!iv || iv->getNumIncomingValues() != 2) { - return {}; - } - - bool p0InLoop = l->contains(iv->getIncomingBlock(0)); - auto inc = iv->getIncomingValue(!p0InLoop); - auto backedgeBlock = iv->getIncomingBlock(!p0InLoop); - - if (auto bi = llvm::dyn_cast(backedgeBlock->getTerminator())) { - if (bi->isConditional()) { - if (auto ici = llvm::dyn_cast(bi->getCondition())) { - if (ici->getOperand(0) == inc) { - if (bi->getSuccessor(0) == l->getHeader()) { - if (ici->getPredicate() == llvm::ICmpInst::ICMP_NE) { - if (auto ci = llvm::dyn_cast( - ici->getOperand(1))) { - return ConstInt::create(ci->getValue()); - } - } - } else if (ici->getPredicate() == llvm::ICmpInst::ICMP_EQ) { - if (auto ci = llvm::dyn_cast( - ici->getOperand(1))) { - return ConstInt::create(ci->getValue()); - } - } - } - } - } - } - - // The trip count cannot be determined. - return {}; -} - -/** -* @brief Returns the innermost loop that @a bb lives in. -* -* If @a bb is in no loop (for example, it is the entry node), the null pointer -* is returned. -* -* @par Preconditions -* - @a bb is non-null -*/ -llvm::Loop *LLVMBranchInfo::getLoopFor(const llvm::BasicBlock *bb) const { - PRECONDITION_NON_NULL(bb); - - return loopInfo->getLoopFor(bb); -} - -/** -* @brief Returns @c true if @a bb is accessible from the basic block @a from. -* -* @param[in] bb Searched basic block. -* @param[in] from Basic block from which we start the search. -* @param[in] visitedBlocks Set of already visited blocks. -* -* This function is an implementation of isAccessible(). It may recursively call -* itself. -* -* @par Preconditions -* - both @a bb and @a from are non-null -*/ -bool LLVMBranchInfo::isAccessibleImpl(llvm::BasicBlock *bb, llvm::BasicBlock *from, - BBSet &visitedBlocks) const { - PRECONDITION_NON_NULL(bb); - PRECONDITION_NON_NULL(from); - - // Check whether we have found the statement we are looking for. - if (bb == from) { - return true; - } - - // Check whether we have already checked this statement. If so, return to - // avoid infinite recursion. - if (hasItem(visitedBlocks, from)) { - return false; - } - visitedBlocks.insert(from); - - // - // Visit all successors of the source basic block. - // - - // BranchInst. - if (auto bi = llvm::dyn_cast(from->getTerminator())) { - auto numOfSuccessors = bi->getNumSuccessors(); - // It suffices if the basic block we are looking for is accessible - // from one of the successors. - for (decltype(numOfSuccessors) i = 0; i < numOfSuccessors; ++i) { - llvm::BasicBlock *succ = bi->getSuccessor(i); - if (isAccessibleImpl(bb, succ, visitedBlocks)) { - return true; - } - } - return false; - } - - // SwitchInst. - if (auto si = llvm::dyn_cast(from->getTerminator())) { - // It suffices if the basic block we are looking for is accessible - // from one of the switch cases. - - // First, check the default clause. - if (isAccessibleImpl(bb, si->getDefaultDest(), visitedBlocks)) { - return true; - } - - // Then, check all other cases. - for (unsigned i = 2, e = si->getNumOperands(); i < e; i += 2) { - if (isAccessibleImpl(bb, llvm::cast(si->getOperand(i + 1)), - visitedBlocks)) { - return true; - } - } - } - - // Other terminators, like the return or unreachable statements. - return false; -} - -} // namespace llvmir2hll -} // namespace retdec diff --git a/src/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter/llvm_converter.cpp b/src/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter/llvm_converter.cpp deleted file mode 100644 index f6d298cd9e..0000000000 --- a/src/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter/llvm_converter.cpp +++ /dev/null @@ -1,1413 +0,0 @@ -/** -* @file src/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter/llvm_converter.cpp -* @brief Implementation LLVMConverter. -* @copyright (c) 2017 Avast Software, licensed under the MIT license -*/ - -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include "retdec/llvmir2hll/ir/add_op_expr.h" -#include "retdec/llvmir2hll/ir/address_op_expr.h" -#include "retdec/llvmir2hll/ir/and_op_expr.h" -#include "retdec/llvmir2hll/ir/array_index_op_expr.h" -#include "retdec/llvmir2hll/ir/array_type.h" -#include "retdec/llvmir2hll/ir/assign_stmt.h" -#include "retdec/llvmir2hll/ir/binary_op_expr.h" -#include "retdec/llvmir2hll/ir/bit_and_op_expr.h" -#include "retdec/llvmir2hll/ir/bit_cast_expr.h" -#include "retdec/llvmir2hll/ir/bit_or_op_expr.h" -#include "retdec/llvmir2hll/ir/bit_shl_op_expr.h" -#include "retdec/llvmir2hll/ir/bit_shr_op_expr.h" -#include "retdec/llvmir2hll/ir/bit_xor_op_expr.h" -#include "retdec/llvmir2hll/ir/call_expr.h" -#include "retdec/llvmir2hll/ir/call_stmt.h" -#include "retdec/llvmir2hll/ir/const_array.h" -#include "retdec/llvmir2hll/ir/const_bool.h" -#include "retdec/llvmir2hll/ir/const_float.h" -#include "retdec/llvmir2hll/ir/const_int.h" -#include "retdec/llvmir2hll/ir/const_null_pointer.h" -#include "retdec/llvmir2hll/ir/const_string.h" -#include "retdec/llvmir2hll/ir/const_struct.h" -#include "retdec/llvmir2hll/ir/deref_op_expr.h" -#include "retdec/llvmir2hll/ir/div_op_expr.h" -#include "retdec/llvmir2hll/ir/eq_op_expr.h" -#include "retdec/llvmir2hll/ir/expression.h" -#include "retdec/llvmir2hll/ir/ext_cast_expr.h" -#include "retdec/llvmir2hll/ir/float_type.h" -#include "retdec/llvmir2hll/ir/fp_to_int_cast_expr.h" -#include "retdec/llvmir2hll/ir/function_type.h" -#include "retdec/llvmir2hll/ir/gt_eq_op_expr.h" -#include "retdec/llvmir2hll/ir/gt_op_expr.h" -#include "retdec/llvmir2hll/ir/int_to_fp_cast_expr.h" -#include "retdec/llvmir2hll/ir/int_to_ptr_cast_expr.h" -#include "retdec/llvmir2hll/ir/int_type.h" -#include "retdec/llvmir2hll/ir/lt_eq_op_expr.h" -#include "retdec/llvmir2hll/ir/lt_op_expr.h" -#include "retdec/llvmir2hll/ir/mod_op_expr.h" -#include "retdec/llvmir2hll/ir/module.h" -#include "retdec/llvmir2hll/ir/mul_op_expr.h" -#include "retdec/llvmir2hll/ir/neg_op_expr.h" -#include "retdec/llvmir2hll/ir/neq_op_expr.h" -#include "retdec/llvmir2hll/ir/or_op_expr.h" -#include "retdec/llvmir2hll/ir/pointer_type.h" -#include "retdec/llvmir2hll/ir/ptr_to_int_cast_expr.h" -#include "retdec/llvmir2hll/ir/return_stmt.h" -#include "retdec/llvmir2hll/ir/string_type.h" -#include "retdec/llvmir2hll/ir/struct_index_op_expr.h" -#include "retdec/llvmir2hll/ir/struct_type.h" -#include "retdec/llvmir2hll/ir/sub_op_expr.h" -#include "retdec/llvmir2hll/ir/ternary_op_expr.h" -#include "retdec/llvmir2hll/ir/trunc_cast_expr.h" -#include "retdec/llvmir2hll/ir/type.h" -#include "retdec/llvmir2hll/ir/unknown_type.h" -#include "retdec/llvmir2hll/ir/var_def_stmt.h" -#include "retdec/llvmir2hll/ir/variable.h" -#include "retdec/llvmir2hll/ir/void_type.h" -#include "retdec/llvmir2hll/llvm/llvm_support.h" -#include "retdec/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter/llvm_converter.h" -#include "retdec/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter/vars_handler.h" -#include "retdec/llvmir2hll/llvm/string_conversions.h" -#include "retdec/llvmir2hll/support/debug.h" -#include "retdec/llvmir2hll/support/types.h" -#include "retdec/llvm-support/diagnostics.h" -#include "retdec/utils/container.h" - -using namespace retdec::llvm_support; - -using retdec::utils::mapHasKey; - -namespace retdec { -namespace llvmir2hll { -namespace { - -/** -* @brief Returns @c true if the specified LLVM value's name needs to have its -* address taken in order to get a value of the correct type. -* -* @par Preconditions -* - @a v is non-null -*/ -bool isAddressExposed(const llvm::Value *v) { - PRECONDITION_NON_NULL(v); - - return LLVMSupport::isDirectAlloca(v) || llvm::isa(v); -} - -/** -* @brief Creates a constant array from the given initializer. -*/ -ShPtr createInitializedConstArray(const ExprVector &values) { - // Do not use llvmTypeToType() because it considers all string types as - // char * (a pointer). We want to have StringType in such cases. - ArrayType::Dimensions dims{values.size()}; - ShPtr arrayType = ArrayType::create(values.front()->getType(), dims); - return ConstArray::create(values, arrayType); -} - -} // anonymous namespace - -/** -* @brief Constructs a new converter. -* -* See create() for more information. -*/ -LLVMConverter::LLVMConverter(llvm::Module *module, ShPtr resModule, - ShPtr varsHandler): - module(module), resModule(resModule), varsHandler(varsHandler), - llvmTypeToTypeMap(), optionStrictFPUSemantics(false) {} - -/** -* @brief Destructs the converter. -*/ -LLVMConverter::~LLVMConverter() {} - -/** -* @brief Converts the given LLVM constant @a c into an expression in the -* backend IR. -* -* @par Preconditions -* - @a c is non-null and corresponds to an expression -*/ -ShPtr LLVMConverter::llvmConstantToExpression(llvm::Constant *c) { - PRECONDITION_NON_NULL(c); - - // Expression - if (llvm::ConstantExpr *ce = llvm::dyn_cast(c)) { - ShPtr op(llvmValueToExpression(ce->getOperand(0))); - switch (ce->getOpcode()) { - // Casts - case llvm::Instruction::Trunc: - return TruncCastExpr::create(op, llvmTypeToType(ce->getType())); - - case llvm::Instruction::ZExt: - return ExtCastExpr::create(op, llvmTypeToType(ce->getType())); - - case llvm::Instruction::SExt: - return ExtCastExpr::create(op, llvmTypeToType(ce->getType(), true), - ExtCastExpr::Variant::SExt); - - case llvm::Instruction::FPTrunc: - return TruncCastExpr::create(op, llvmTypeToType(ce->getType())); - - case llvm::Instruction::FPExt: - return ExtCastExpr::create(op, llvmTypeToType(ce->getType()), - ExtCastExpr::Variant::FPExt); - - case llvm::Instruction::UIToFP: - return IntToFPCastExpr::create(op, llvmTypeToType(ce->getType())); - - case llvm::Instruction::SIToFP: - return IntToFPCastExpr::create(op, llvmTypeToType(ce->getType()), - IntToFPCastExpr::Variant::SIToFP); - - case llvm::Instruction::FPToUI: - return FPToIntCastExpr::create(op, llvmTypeToType(ce->getType())); - - case llvm::Instruction::FPToSI: - return FPToIntCastExpr::create(op, llvmTypeToType(ce->getType(), true)); - - case llvm::Instruction::PtrToInt: - return PtrToIntCastExpr::create(op, llvmTypeToType(ce->getType())); - - case llvm::Instruction::IntToPtr: - return IntToPtrCastExpr::create(op, llvmTypeToType(ce->getType())); - - case llvm::Instruction::BitCast: - case llvm::Instruction::AddrSpaceCast: - // TODO: Address space casts are treated like bit casts, there might - // be a better way to deal with them. - return BitCastExpr::create(op, llvmTypeToType(ce->getType())); - - case llvm::Instruction::GetElementPtr: - return llvmGEPExpressionToExpressionInternal(ce->getOperand(0), - gep_type_begin(c), gep_type_end(c)); - - // Select - case llvm::Instruction::Select: { - ShPtr cond(llvmValueToExpression(ce->getOperand(0))); - ShPtr trueValue(llvmValueToExpression(ce->getOperand(1))); - ShPtr falseValue(llvmValueToExpression(ce->getOperand(2))); - return TernaryOpExpr::create(cond, trueValue, falseValue); - } - - // Add - case llvm::Instruction::Add: - case llvm::Instruction::FAdd: { - ShPtr op1(llvmValueToExpression(ce->getOperand(0))); - ShPtr op2(llvmValueToExpression(ce->getOperand(1))); - return AddOpExpr::create(op1, op2); - } - - // Sub - case llvm::Instruction::Sub: - case llvm::Instruction::FSub: { - ShPtr op1(llvmValueToExpression(ce->getOperand(0))); - ShPtr op2(llvmValueToExpression(ce->getOperand(1))); - return SubOpExpr::create(op1, op2); - } - - // Mul - case llvm::Instruction::Mul: - case llvm::Instruction::FMul: { - ShPtr op1(llvmValueToExpression(ce->getOperand(0))); - ShPtr op2(llvmValueToExpression(ce->getOperand(1))); - return MulOpExpr::create(op1, op2); - } - - // Div - case llvm::Instruction::UDiv: - case llvm::Instruction::SDiv: - case llvm::Instruction::FDiv: { - ShPtr op1(llvmValueToExpression(ce->getOperand(0))); - ShPtr op2(llvmValueToExpression(ce->getOperand(1))); - return DivOpExpr::create(op1, op2); - } - - // Mod - case llvm::Instruction::URem: - case llvm::Instruction::SRem: - case llvm::Instruction::FRem: { - ShPtr op1(llvmValueToExpression(ce->getOperand(0))); - ShPtr op2(llvmValueToExpression(ce->getOperand(1))); - return ModOpExpr::create(op1, op2); - } - - // Cmp - case llvm::Instruction::ICmp: - case llvm::Instruction::FCmp: { - ShPtr op1(llvmValueToExpression(ce->getOperand(0))); - ShPtr op2(llvmValueToExpression(ce->getOperand(1))); - return EqOpExpr::create(op1, op2); - } - - // And - case llvm::Instruction::And: { - ShPtr op1(llvmValueToExpression(ce->getOperand(0))); - ShPtr op2(llvmValueToExpression(ce->getOperand(1))); - return BitAndOpExpr::create(op1, op2); - } - - // Or - case llvm::Instruction::Or: { - ShPtr op1(llvmValueToExpression(ce->getOperand(0))); - ShPtr op2(llvmValueToExpression(ce->getOperand(1))); - return BitOrOpExpr::create(op1, op2); - } - - // Xor - case llvm::Instruction::Xor: { - ShPtr op1(llvmValueToExpression(ce->getOperand(0))); - ShPtr op2(llvmValueToExpression(ce->getOperand(1))); - return BitXorOpExpr::create(op1, op2); - } - - // Shl - case llvm::Instruction::Shl: { - ShPtr op1(llvmValueToExpression(ce->getOperand(0))); - ShPtr op2(llvmValueToExpression(ce->getOperand(1))); - return BitShlOpExpr::create(op1, op2); - } - - // Shr - case llvm::Instruction::LShr: - case llvm::Instruction::AShr: { - ShPtr op1(llvmValueToExpression(ce->getOperand(0))); - ShPtr op2(llvmValueToExpression(ce->getOperand(1))); - return BitShrOpExpr::create(op1, op2, - ce->getOpcode() == llvm::Instruction::LShr ? - BitShrOpExpr::Variant::Logical : - BitShrOpExpr::Variant::Arithmetical - ); - } - - default: - printErrorMessage("Unknown constant expression type: ", *ce); - llvm_unreachable(0); - break; - } - } else if (llvm::isa(c) && c->getType()->isSingleValueType()) { - return getDefaultInitializer(c->getType()); - } - - // Int - if (llvm::ConstantInt *cInt = llvm::dyn_cast(c)) { - // If the constant is only on a single bit (i.e. it is of the i1 type), - // make it a boolean constant instead of an integer constant. - if (cInt->getBitWidth() == 1) { - return ConstBool::create(!cInt->isZero()); - } - return ConstInt::create(cInt->getValue()); - } - - // Float - if (llvm::ConstantFP *cFP = llvm::dyn_cast(c)) { - return ConstFloat::create(cFP->getValueAPF()); - } - - // Array - if (llvm::ConstantArray *ca = llvm::dyn_cast(c)) { - return llvmConstantArrayToExpression(ca); - } - - // Constant aggregate zero - if (llvm::ConstantAggregateZero *caz = llvm::dyn_cast(c)) { - return getDefaultInitializer(caz->getType()); - } - - // Undefined value - if (llvm::UndefValue *uv = llvm::dyn_cast(c)) { - return getDefaultInitializer(uv->getType()); - } - - // Constant data array - if (llvm::ConstantDataArray *cda = llvm::dyn_cast(c)) { - if (cda->isString()) { - return toConstString(cda); - } - return llvmConstantDataSequentialToConstArray(cda); - } - - // Constant data sequential - if (llvm::ConstantDataSequential *cds = llvm::dyn_cast(c)) { - return llvmConstantDataSequentialToConstArray(cds); - } - - // Address of a block. - if (llvm::isa(c)) { - // TODO Add support for this type of a constant. - printErrorMessage("Unsupported constant ", *c, " of type llvm::BlockAddress*."); - llvm_unreachable(0); - } - - // Other - switch (c->getType()->getTypeID()) { - case llvm::Type::FloatTyID: - case llvm::Type::DoubleTyID: - case llvm::Type::X86_FP80TyID: - case llvm::Type::PPC_FP128TyID: - case llvm::Type::FP128TyID: { - llvm::ConstantFP *cFP = llvm::cast(c); - return ConstFloat::create(cFP->getValueAPF()); - } - - case llvm::Type::PointerTyID: { - if (llvm::ConstantPointerNull *cpn = llvm::dyn_cast(c)) { - ShPtr pointerType(cast(llvmTypeToType(cpn->getType()))); - ASSERT_MSG(pointerType, "got a pointer which is of a non-pointer type"); - return ConstNullPointer::create(pointerType); - } - - llvm::GlobalVariable *gvar = llvm::dyn_cast(c); - if (gvar && gvar->hasInitializer()) { - if (llvm::ConstantArray *ca = llvm::dyn_cast( - gvar->getInitializer())) { - return llvmConstantArrayToExpression(ca); - } - } - - return llvmValueToExpression(c); - } - - case llvm::Type::StructTyID: - if (llvm::isa(c) || llvm::isa(c)) { - llvm::StructType *st = llvm::cast(c->getType()); - if (st->getNumElements()) { - ConstStruct::Type constValue; - for (unsigned i = 0, e = st->getNumElements(); i != e; ++i) { - // TODO Is the used number of bits (32) correct? - constValue.push_back(ConstStruct::Item( - ConstInt::create(i, 32), llvmConstantToExpression( - llvm::Constant::getNullValue(st->getElementType(i))))); - } - return ConstStruct::create(constValue, - cast(llvmTypeToType(st))); - } - } else { - ConstStruct::Type constValue; - for (unsigned i = 0, e = c->getNumOperands(); i != e; ++i) { - // TODO Is the used number of bits (32) correct? - constValue.push_back(ConstStruct::Item( - ConstInt::create(i, 32), llvmConstantToExpression( - llvm::cast(c->getOperand(i))))); - } - return ConstStruct::create(constValue, - cast(llvmTypeToType(c->getType()))); - } - break; - - default: - printErrorMessage("Unknown constant type: ", *c, " (ID: ", - c->getType()->getTypeID(), ")"); - llvm_unreachable(0); - break; - } - - FAIL("the constant `" << *c << - "` does not correspond to an expression; this should never happen"); - return ShPtr(); -} - -/** -* @brief Converts the given LLVM value @a v into an expression in the backend IR. -* -* @par Preconditions -* - @a v is non-null and corresponds to an expression -*/ -ShPtr LLVMConverter::llvmValueToExpression(llvm::Value *v) { - PRECONDITION_NON_NULL(v); - - ShPtr expr = llvmValueToExpressionInternal(v); - if (!isAddressExposed(v)) { - return expr; - } - - // If the expression is a string literal, then instead of &"string", return - // just "string". - if (isa(expr)) { - return expr; - } - - return ShPtr(AddressOpExpr::create(expr)); -} - -/** -* @brief Converts the given LLVM value @a v into an expression in the backend -* IR. -* -* @par Preconditions -* - @a v is non-null and corresponds to an expression -* -* TODO Highlight the difference between this function and -* llvmValueToExpression(). -*/ -ShPtr LLVMConverter::llvmValueToExpressionInternal(llvm::Value *v) { - PRECONDITION_NON_NULL(v); - - if (llvm::Instruction *i = llvm::dyn_cast(v)) { - // Should we inline this instruction? - if (LLVMSupport::isInlinableInst(i) && !LLVMSupport::isDirectAlloca(i)) { - return cast(visit(*i)); - } - } - - if (llvm::Constant *c = llvm::dyn_cast(v)) { - // The conversion depends on whether the constant is a global value or - // not. - if (llvm::isa(c)) { - // Check if the value is a string literal. If so, return it. - if (llvm::GlobalVariable *gv = llvm::dyn_cast(c)) { - if (resModule->isGlobalVarStoringStringLiteral(gv->getName()) || - stores8BitStringLiteral(gv)) { - return getInitializerAsConstString(gv); - } - } - } else { - return llvmConstantToExpression(c); - } - } - - // It is nothing from above, return a variable. - std::string operandName = varsHandler->getValueName(v); - ShPtr var = varsHandler->getVariableByName(operandName); - if (isa(var->getType())) { - llvm::Type *fixedType = v->getType(); - if (llvm::Type *allocatedType = varsHandler->getAllocatedVarType(v)) { - fixedType = allocatedType; - } - var->setType(llvmTypeToType(fixedType)); - } - return var; -} - -/** -* @brief Returns the result of dereferencing the specified operand with '*'. -* -* This is equivalent to generating a dereference operator and then using -* llvmValueToExpression(), but avoids excess syntax in some cases. -* -* @par Preconditions -* - @a v is non-null and has a default initializer -*/ -ShPtr LLVMConverter::llvmValueToExpressionDeref(llvm::Value *v) { - PRECONDITION_NON_NULL(v); - - if (isAddressExposed(v)) { - // Already something with its address exposed. - return llvmValueToExpressionInternal(v); - } - return DerefOpExpr::create(llvmValueToExpression(v)); -} - -/** -* @brief Converts the given LLVM GetElementPtr expression into an expression in -* the backend IR. -* -* @param[in] ptr GetElementPtr expression. -* @param[in] i Iterator to the first index. -* @param[in] e Iterator one past the last index. -* -* @par Preconditions -* - @a ptr is non-null and is a GetElementPtr expression -*/ -ShPtr LLVMConverter::llvmGEPExpressionToExpressionInternal( - llvm::Value *ptr, llvm::gep_type_iterator i, llvm::gep_type_iterator e) { - PRECONDITION_NON_NULL(ptr); - - // If there are no indices, just return the pointer. - if (i == e) { - return llvmValueToExpression(ptr); - } - - // If the expression is a global constant storing a string literal, return - // directly it, not an access to a variable that stores it (this makes e.g. - // printf() calls more readable). - llvm::GlobalVariable *gv = llvm::dyn_cast(ptr); - if (gv && (resModule->isGlobalVarStoringStringLiteral(gv->getName()) || - stores8BitStringLiteral(gv))) { - for (auto gi = module->global_begin(), ge = module->global_end(); - gi != ge; ++gi) { - if (gi->getName() != gv->getName()) { - continue; - } - - // We have found a matching global variable. - return getInitializerAsConstString(gv); - } - } - - // If the first index is 0 (very typical), we can do a number of - // simplifications to clean up the resulting expression. - llvm::Value *firstOp = i.getOperand(); - ShPtr currentOperand; - if (!llvm::isa(firstOp) || !llvm::cast(firstOp)->isNullValue()) { - // The first index isn't simple, transform it the hard way. - currentOperand = llvmValueToExpression(ptr); - } else { - ++i; // Skip the zero index. - - currentOperand = llvmValueToExpressionInternal(ptr); - if (i != e && (*i)->isStructTy()) { - currentOperand = StructIndexOpExpr::create(currentOperand, - ConstInt::create(llvm::cast(i.getOperand())->getValue())); - ++i; // Eat the struct index as well. - } - } - - for (; i != e; ++i) { - if ((*i)->isStructTy()) { - currentOperand = StructIndexOpExpr::create(currentOperand, - ConstInt::create(llvm::cast(i.getOperand())->getValue())); - } else { - currentOperand = ArrayIndexOpExpr::create(currentOperand, - llvmValueToExpression(i.getOperand())); - } - } - - return AddressOpExpr::create(currentOperand); -} - -/** -* @brief Returns the initializer of the given LLVM global variable @a v. -* -* If @a v doesn't have an initializer, it returns the null pointer. -* -* @par Preconditions -* - @a v is non-null -*/ -ShPtr LLVMConverter::getInitializer(llvm::GlobalVariable *v) { - PRECONDITION_NON_NULL(v); - - if (!v->hasInitializer()) { - // No initializer. - return ShPtr(); - } - - return llvmConstantToExpression(v->getInitializer()); -} - -/** -* @brief Returns the default initializer for the given LLVM type @a t. -* -* @par Preconditions -* - @a t is non-null and has a default initializer -*/ -ShPtr LLVMConverter::getDefaultInitializer(llvm::Type *t) { - PRECONDITION_NON_NULL(t); - - switch (t->getTypeID()) { - case llvm::Type::IntegerTyID: { - llvm::IntegerType *it = llvm::cast(t); - // If the bit width of the type is only on a single bit (i.e. i1), - // make it a boolean instead of an integer. - if (it->getBitWidth() == 1) { - return ConstBool::create(false); - } - return ConstInt::create(0, it->getBitWidth()); - } - - case llvm::Type::FloatTyID: - case llvm::Type::DoubleTyID: - case llvm::Type::X86_FP80TyID: - case llvm::Type::PPC_FP128TyID: - case llvm::Type::FP128TyID: - return ConstFloat::create(llvm::APFloat(0.0)); - - case llvm::Type::PointerTyID: { - ShPtr type(cast(llvmTypeToType(t))); - ASSERT_MSG(type, "got a pointer which is of a non-pointer type"); - return ConstNullPointer::create(type); - } - - case llvm::Type::StructTyID: { - // Recursively generate an initializer for a structure of the given - // type. - ConstStruct::Type constValue; - for (unsigned i = 0, n = t->getNumContainedTypes(); i < n; ++i) { - // TODO Is the used number of bits (32) correct? - constValue.push_back(ConstStruct::Item( - ConstInt::create(i, 32), - getDefaultInitializer(t->getContainedType(i)))); - } - return ConstStruct::create(constValue, - cast(llvmTypeToType(t))); - } - - case llvm::Type::ArrayTyID: { - llvm::ArrayType *arrayType = llvm::dyn_cast(t); - ShPtr birArrayType = ucast(llvmTypeToType(arrayType)); - return ConstArray::createUninitialized(birArrayType); - } - - case llvm::Type::VectorTyID: - // TODO Add support for this type. - printErrorMessage("The vector type is not supported"); - llvm_unreachable(0); - break; - - default: - printErrorMessage("Unknown initializer for a type with ID ", - t->getTypeID()); - llvm_unreachable(0); - break; - } -} - -/** -* @brief Converts the given LLVM constant array @a ca into a constant array in -* the backend IR. -* -* If @a ca is a null value (@c ca->isNullValue()), the empty array is returned. -*/ -ShPtr LLVMConverter::llvmConstantArrayToConstArray(llvm::ConstantArray *ca) { - PRECONDITION_NON_NULL(ca); - - // If the array has no initializer, return the empty array. - if (ca->isNullValue()) { - return ucast(getDefaultInitializer(ca->getType())); - } - - // It has an initializer. - ExprVector array; - for (unsigned i = 0, e = ca->getNumOperands(); i != e; ++i) { - array.push_back(llvmConstantToExpression(ca->getOperand(i))); - } - return createInitializedConstArray(array); -} - -/** -* @brief Converts the given LLVM constant array @a ca into an expression in the -* backend IR. -*/ -ShPtr LLVMConverter::llvmConstantArrayToExpression(llvm::ConstantArray *ca) { - PRECONDITION_NON_NULL(ca); - - if (is8BitStringLiteral(ca)) { - return toConstString(ca); - } - return llvmConstantArrayToConstArray(ca); -} - -/** -* @brief Converts the given LLVM constant data sequential @a cds into a constant -* array in the backend IR. -* -* If @a cds is a null value (@c cds->isNullValue()), the empty array is -* returned. -*/ -ShPtr LLVMConverter::llvmConstantDataSequentialToConstArray( - llvm::ConstantDataSequential *cds) { - PRECONDITION_NON_NULL(cds); - - // If cds has no initializer, return the empty array. - if (cds->isNullValue()) { - return ucast(getDefaultInitializer(cds->getType())); - } - - // It has an initializer. - ExprVector array; - for (unsigned i = 0, e = cds->getNumElements(); i != e; ++i) { - array.push_back(llvmConstantToExpression(cds->getElementAsConstant(i))); - } - return createInitializedConstArray(array); -} - -/** -* @brief Converts the given LLVM type @a llvmType into a type in the backend -* IR. -* -* If @a llvmType cannot be converted into a type, the @c UnknownType is -* returned. If @a llvmSigned is @c true, the created type will be signed, -* otherwise unsigned. -* -* @par Preconditions -* - @a llvmType is non-null -*/ -ShPtr LLVMConverter::llvmTypeToType(llvm::Type *llvmType, bool llvmSigned) { - PRECONDITION_NON_NULL(llvmType); - - // Since there may be recursive types (e.g. a structure contains a pointer - // to itself), before calling this function recursively, we: - // - check if llvmTypeToTypeMap[llvmType] already exists; if this is - // so, it is used instead of converting llvmType by recursively - // calling this function - // - if it doesn't exist, we create a type in our IR - // - we add a mapping of llvmType into it - // - we convert the type and return the result - // In this way, we avoid infinite recursion occurring when converting - // recursive data types. - // - // Since signed types differ from unsigned types, we use two maps: one for - // signed types, one for unsigned types. - // Signed types should be only integers. - if (llvmSigned && llvmType->isIntegerTy()) { - if (mapHasKey(llvmTypeToSignedTypeMap, llvmType)) { - return llvmTypeToSignedTypeMap[llvmType]; - } - } else { - if (mapHasKey(llvmTypeToTypeMap, llvmType)) { - return llvmTypeToTypeMap[llvmType]; - } - } - - // Function type. - if (llvmType->isFunctionTy()) { - return llvmTypeToTypeMap[llvmType] = llvmFunctionTypeToFunctionType( - llvm::dyn_cast(llvmType)); - } - - // Pointer. - if (llvm::PointerType *pt = llvm::dyn_cast(llvmType)) { - // First, we create a dummy pointer type. - ShPtr convertedType(PointerType::create( - IntType::create(1, false))); - - // Store a reference to it so it may be used in the nested - // llvmTypeToType call. - llvmTypeToTypeMap[llvmType] = convertedType; - - // Convert the nested type. - convertedType->setContainedType(llvmTypeToType(pt->getContainedType(0))); - return convertedType; - } - - // Signed integer. - if (llvmSigned && llvmType->isIntegerTy()) { - return llvmTypeToSignedTypeMap[llvmType] = IntType::create( - llvmType->getScalarSizeInBits(), true); - // Unsigned integer. - } else if (llvmType->isIntegerTy()) { - return llvmTypeToTypeMap[llvmType] = IntType::create( - llvmType->getScalarSizeInBits(), false); - } - - // Float. - if (llvmType->isFloatTy()) { - return llvmTypeToTypeMap[llvmType] = FloatType::create(32); - } else if (llvmType->isDoubleTy()) { - return llvmTypeToTypeMap[llvmType] = FloatType::create(64); - } else if (llvmType->isX86_FP80Ty()) { - return llvmTypeToTypeMap[llvmType] = FloatType::create(80); - } else if (llvmType->isFP128Ty() || llvmType->isPPC_FP128Ty()) { - return llvmTypeToTypeMap[llvmType] = FloatType::create(128); - } - - // Array. - if (llvmType->isArrayTy()) { - ArrayType::Dimensions arrayDims; - llvm::ArrayType *arrayType = llvm::dyn_cast(llvmType); - llvm::ArrayType *arrayTypeTmp; - do { - arrayDims.push_back(arrayType->getNumElements()); - arrayTypeTmp = arrayType; - } while ((arrayType = llvm::dyn_cast(arrayType->getContainedType(0)))); - return llvmTypeToTypeMap[llvmType] = ArrayType::create(llvmTypeToType( - arrayTypeTmp->getContainedType(0)), arrayDims); - } - - // Structure. - if (llvmType->isStructTy()) { - StructType::ElementTypes elementTypes; - llvm::StructType *structType = llvm::dyn_cast(llvmType); - for (unsigned i = 0; i < structType->getNumElements(); ++i) { - elementTypes.push_back(llvmTypeToType(structType->getElementType(i))); - } - // Because of structures containing other structures, we have to check - // whether we have already processed the structure also at this place - // (above, there are calls to llvmTypeToType). - if (mapHasKey(llvmTypeToTypeMap, llvmType)) { - return llvmTypeToTypeMap[llvmType]; - } - // StructType::getName() cannot be called on a literal, so we have to - // first check that the type has a name and if so, we use it. - return llvmTypeToTypeMap[llvmType] = StructType::create(elementTypes, - structType->hasName() ? structType->getName() : ""); - } - - // Void. - if (llvmType->isVoidTy()) - return llvmTypeToTypeMap[llvmType] = VoidType::create(); - - // Unknown type. - return llvmTypeToTypeMap[llvmType] = UnknownType::create(); -} - -/** -* @brief Converts the given LLVM function type into a type in the backend IR. -* -* @par Preconditions -* - @a llvmType is non-null -*/ -ShPtr LLVMConverter::llvmFunctionTypeToFunctionType( - llvm::FunctionType *llvmType) { - PRECONDITION_NON_NULL(llvmType); - - ShPtr funcType(FunctionType::create()); - - // Return type. - funcType->setRetType(llvmTypeToType(llvmType->getReturnType())); - - // Variable number of arguments. - funcType->setVarArg(llvmType->isVarArg()); - - // Parameters. - for (auto i = llvmType->param_begin(), e = llvmType->param_end(); - i != e; ++i) { - funcType->addParam(llvmTypeToType(*i)); - } - - return funcType; -} - -/** -* @brief Converts the given LLVM load instruction @a i into an expression in -* the backend IR. -*/ -ShPtr LLVMConverter::llvmLoadInstToExpression(llvm::LoadInst &i) { - return llvmValueToExpressionDeref(i.getOperand(0)); -} - -/** -* @brief Converts the given LLVM store instruction @a i into an assign -* statement in the backend IR. -*/ -ShPtr LLVMConverter::llvmStoreInstToAssignStmt(llvm::StoreInst &i) { - auto assignStmt = AssignStmt::create( - llvmValueToExpressionDeref(i.getPointerOperand()), - llvmValueToExpression(i.getOperand(0)) - ); - - // We want to prevent optimization of variables used in volatile load/store - // operations, so mark such variables as external. - if (i.isVolatile()) { - if (auto lhsVar = cast(assignStmt->getLhs())) { - lhsVar->markAsExternal(); - } - } - - return assignStmt; -} - -/** -* @brief Converts the given LLVM select instruction @a i into a ternary -* operator in the backend IR. -*/ -ShPtr LLVMConverter::llvmSelectInstToTernaryOp(llvm::SelectInst &i) { - ShPtr cond(llvmValueToExpression(i.getCondition())); - ShPtr trueValue(llvmValueToExpression(i.getTrueValue())); - ShPtr falseValue(llvmValueToExpression(i.getFalseValue())); - return TernaryOpExpr::create(cond, trueValue, falseValue); -} - -/** -* @brief Converts the given LLVM call instruction @a i into a call statement in -* the backend IR. -*/ -ShPtr LLVMConverter::llvmCallInstToCallStmt(llvm::CallInst &i) { - ShPtr calledExpr = llvmValueToExpression(i.getCalledValue()); - - // Obtain arguments. - ExprVector args; - unsigned argNo = 0; - llvm::CallSite cs(&i); - for (auto ai = cs.arg_begin(), ae = cs.arg_end(); ai != ae; ++ai) { - // Check if the argument is expected to be passed by value. - ShPtr arg = (i.paramHasAttr(argNo + 1, llvm::Attribute::ByVal)) ? - llvmValueToExpressionDeref(*ai) : llvmValueToExpression(*ai); - args.push_back(arg); - argNo++; - } - - ShPtr callExpr(CallExpr::create(calledExpr, args)); - return CallStmt::create(callExpr); -} - -/** -* @brief Converts the given LLVM GetElementPtr instruction @a i into an -* expression in the backend IR. -*/ -ShPtr LLVMConverter::llvmGEPInstToExpression(llvm::GetElementPtrInst &i) { - return llvmGEPExpressionToExpressionInternal(i.getPointerOperand(), - gep_type_begin(i), gep_type_end(i)); -} - -/** -* @brief Converts the given LLVM alloca instruction @a i into an initializer. -*/ -ShPtr LLVMConverter::llvmAllocaInstToExpression(llvm::AllocaInst &i) { - return getDefaultInitializer(i.getType()->getElementType()); -} - -/** -* @brief Converts the given LLVM instruction @a i into a value in the backend IR. -*/ -ShPtr LLVMConverter::llvmInstructionToValue(llvm::Instruction &i) { - return visit(i); -} - -/** -* @brief Converts the given LLVM return instruction into a return statement in -* the backend IR. -*/ -ShPtr LLVMConverter::llvmReturnInstToReturnStmt(llvm::ReturnInst &i) { - // NOTE: Do NOT try to eliminate the return statement here if this - // instruction is the last in a basic block. This approach may fail if the - // instruction is the last one prior to another case branch etc. Instead, - // use VoidReturnOptimizer. - - ShPtr retVal; - if (i.getNumOperands() > 0) { - retVal = llvmValueToExpression(i.getOperand(0)); - } - return ReturnStmt::create(retVal); -} - -/** -* @brief Converts the given LLVM binary operator @a i into an expression in -* the backend IR. -* -* @par Preconditions -* - @a i is a binary operation -*/ -ShPtr LLVMConverter::llvmBinaryOperatorToExpression(llvm::Instruction &i) { - // Binary instructions, shift instructions, setCond instructions. - PRECONDITION(!i.getType()->isPointerTy(), "it should not be a pointer"); - - // If this is a negation operation, generate it out as such. For - // floating-points, we don't want to generate "-0.0 - X". - - if (llvm::BinaryOperator::isNeg(&i)) { - ShPtr op(llvmValueToExpression( - llvm::BinaryOperator::getNegArgument(llvm::cast(&i)))); - return NegOpExpr::create(op); - } else if (llvm::BinaryOperator::isFNeg(&i)) { - ShPtr op(llvmValueToExpression( - llvm::BinaryOperator::getFNegArgument(llvm::cast(&i)))); - return NegOpExpr::create(op); - } else { - ShPtr op1(llvmValueToExpression(i.getOperand(0))); - ShPtr op2(llvmValueToExpression(i.getOperand(1))); - - switch (i.getOpcode()) { - case llvm::Instruction::Add: - case llvm::Instruction::FAdd: - return AddOpExpr::create(op1, op2); - - case llvm::Instruction::Sub: - case llvm::Instruction::FSub: - return SubOpExpr::create(op1, op2); - - case llvm::Instruction::Mul: - case llvm::Instruction::FMul: - return MulOpExpr::create(op1, op2); - - case llvm::Instruction::URem: - return ModOpExpr::create(op1, op2); - case llvm::Instruction::SRem: - return ModOpExpr::create(op1, op2, ModOpExpr::Variant::SMod); - case llvm::Instruction::FRem: - return ModOpExpr::create(op1, op2, ModOpExpr::Variant::FMod); - - case llvm::Instruction::UDiv: - return DivOpExpr::create(op1, op2); - case llvm::Instruction::SDiv: - return DivOpExpr::create(op1, op2, DivOpExpr::Variant::SDiv); - case llvm::Instruction::FDiv: - return DivOpExpr::create(op1, op2, DivOpExpr::Variant::FDiv); - - case llvm::Instruction::And: - return BitAndOpExpr::create(op1, op2); - - case llvm::Instruction::Or: - return BitOrOpExpr::create(op1, op2); - - case llvm::Instruction::Xor: - return BitXorOpExpr::create(op1, op2); - - case llvm::Instruction::Shl: - return BitShlOpExpr::create(op1, op2); - - case llvm::Instruction::AShr: - return BitShrOpExpr::create(op1, op2, BitShrOpExpr::Variant::Arithmetical); - - case llvm::Instruction::LShr: - return BitShrOpExpr::create(op1, op2, BitShrOpExpr::Variant::Logical); - - default: - llvm::errs() << "Invalid operator type: " << i << "\n"; - llvm_unreachable(0); - break; - } - } -} - -/** -* @brief Converts the given LLVM integer comparison instruction @a i into an -* expression in the backend IR. -*/ -ShPtr LLVMConverter::llvmICmpInstToExpression(llvm::ICmpInst &i) { - // Get both operands. - ShPtr op1 = llvmValueToExpression(i.getOperand(0)); - ShPtr op2 = llvmValueToExpression(i.getOperand(1)); - - // Get the operator. - ShPtr op; - switch (i.getPredicate()) { - case llvm::ICmpInst::ICMP_EQ: - return EqOpExpr::create(op1, op2); - case llvm::ICmpInst::ICMP_NE: - return NeqOpExpr::create(op1, op2); - case llvm::ICmpInst::ICMP_ULE: - return LtEqOpExpr::create(op1, op2); - case llvm::ICmpInst::ICMP_SLE: - return LtEqOpExpr::create(op1, op2, LtEqOpExpr::Variant::SCmp); - case llvm::ICmpInst::ICMP_UGE: - return GtEqOpExpr::create(op1, op2); - case llvm::ICmpInst::ICMP_SGE: - return GtEqOpExpr::create(op1, op2, GtEqOpExpr::Variant::SCmp); - case llvm::ICmpInst::ICMP_ULT: - return LtOpExpr::create(op1, op2); - case llvm::ICmpInst::ICMP_SLT: - return LtOpExpr::create(op1, op2, LtOpExpr::Variant::SCmp); - case llvm::ICmpInst::ICMP_UGT: - return GtOpExpr::create(op1, op2); - case llvm::ICmpInst::ICMP_SGT: - return GtOpExpr::create(op1, op2, GtOpExpr::Variant::SCmp); - default: - printErrorMessage("Invalid ICmp predicate ", i); - llvm_unreachable(0); - return ShPtr(); - } -} - -/** -* @brief Converts the given LLVM floating-point comparison instruction @a i -* into an expression in the backend IR. -*/ -ShPtr LLVMConverter::llvmFCmpInstToExpression(llvm::FCmpInst &i) { - if (i.getPredicate() == llvm::FCmpInst::FCMP_FALSE) { - return ConstBool::create(false); - } - if (i.getPredicate() == llvm::FCmpInst::FCMP_TRUE) { - return ConstBool::create(true); - } - - return optionStrictFPUSemantics ? - llvmFCmpBinInstToExpressionStrictFPUSemantics(i) : - llvmFCmpBinInstToExpressionNonStrictFPUSemantics(i); -} - -/** -* @brief Converts the given LLVM floating-point comparison instruction (binary -* predicate) @a i into an expression in the backend IR. -* -* Uses non-strict FPU semantics. -*/ -ShPtr LLVMConverter::llvmFCmpBinInstToExpressionNonStrictFPUSemantics( - llvm::FCmpInst &i) { - ASSERT_MSG(i.getNumOperands() >= 2, - "expected a binary predicate, got a unary predicate " << i); - - // The following switch is based on the on in - // llvmFCmpBinInstToExpressionStrictFPUSemantics() but creates a more - // simple expression. - ShPtr x(llvmValueToExpression(i.getOperand(0))); - ShPtr y(llvmValueToExpression(i.getOperand(1))); - switch (i.getPredicate()) { - case llvm::FCmpInst::FCMP_UNO: - return OrOpExpr::create( - NeqOpExpr::create(x, x), - NeqOpExpr::create(y, y)); - case llvm::FCmpInst::FCMP_ORD: - return AndOpExpr::create( - EqOpExpr::create(x, x), - EqOpExpr::create(y, y)); - case llvm::FCmpInst::FCMP_UEQ: - case llvm::FCmpInst::FCMP_OEQ: - return EqOpExpr::create(x, y); - case llvm::FCmpInst::FCMP_ULT: - case llvm::FCmpInst::FCMP_OLT: - return LtOpExpr::create(x, y); - case llvm::FCmpInst::FCMP_ULE: - case llvm::FCmpInst::FCMP_OLE: - return LtEqOpExpr::create(x, y); - case llvm::FCmpInst::FCMP_UGT: - case llvm::FCmpInst::FCMP_OGT: - return GtOpExpr::create(x, y); - case llvm::FCmpInst::FCMP_UGE: - case llvm::FCmpInst::FCMP_OGE: - return GtEqOpExpr::create(x, y); - case llvm::FCmpInst::FCMP_UNE: - case llvm::FCmpInst::FCMP_ONE: - return NeqOpExpr::create(x, y); - default: - printErrorMessage("Invalid FCmp predicate ", i); - llvm_unreachable(0); - break; - } -} - -/** -* @brief Converts the given LLVM floating-point comparison instruction (binary -* predicate) @a i into an expression in the backend IR. -* -* Uses strict FPU semantics. -*/ -ShPtr LLVMConverter::llvmFCmpBinInstToExpressionStrictFPUSemantics( - llvm::FCmpInst &i) { - ASSERT_MSG(i.getNumOperands() >= 2, - "expected a binary predicate, got a unary predicate " << i); - - // The following switch is created from functions introduced by CBackend. - ShPtr x(llvmValueToExpression(i.getOperand(0))); - ShPtr y(llvmValueToExpression(i.getOperand(1))); - switch (i.getPredicate()) { - case llvm::FCmpInst::FCMP_UNO: - // llvm_fcmp_uno(x,y) { return x != x || y != y; } - return OrOpExpr::create( - NeqOpExpr::create(x, x), - NeqOpExpr::create(y, y)); - case llvm::FCmpInst::FCMP_UEQ: - // llvm_fcmp_ueq(x,y) { return x == y || llvm_fcmp_uno(x, y); } - return OrOpExpr::create( - EqOpExpr::create(x, y), - OrOpExpr::create( - NeqOpExpr::create(x, x), - NeqOpExpr::create(y, y))); - case llvm::FCmpInst::FCMP_ULT: - // llvm_fcmp_ult(x,y) { return x < y || llvm_fcmp_uno(x, y); } - return OrOpExpr::create( - LtOpExpr::create(x, y), - OrOpExpr::create( - NeqOpExpr::create(x, x), - NeqOpExpr::create(y, y))); - case llvm::FCmpInst::FCMP_ULE: - // llvm_fcmp_ule(x,y) { return x <= y || llvm_fcmp_uno(x, y); } - return OrOpExpr::create( - LtEqOpExpr::create(x, y), - OrOpExpr::create( - NeqOpExpr::create(x, x), - NeqOpExpr::create(y, y))); - case llvm::FCmpInst::FCMP_UGT: - // llvm_fcmp_ugt(x,y) { return x > y || llvm_fcmp_uno(x, y); } - return OrOpExpr::create( - GtOpExpr::create(x, y), - OrOpExpr::create( - NeqOpExpr::create(x, x), - NeqOpExpr::create(y, y))); - case llvm::FCmpInst::FCMP_UGE: - // llvm_fcmp_uge(x,y) { return x >= y || llvm_fcmp_uno(x, y); } - return OrOpExpr::create( - GtEqOpExpr::create(x, y), - OrOpExpr::create( - NeqOpExpr::create(x, x), - NeqOpExpr::create(y, y))); - case llvm::FCmpInst::FCMP_ORD: - // llvm_fcmp_ord(x,y) { return x == x && y == y; } - return AndOpExpr::create( - EqOpExpr::create(x, x), - EqOpExpr::create(y, y)); - case llvm::FCmpInst::FCMP_ONE: - // llvm_fcmp_one(x,y) { return x != y && llvm_fcmp_ord(x, y); } - return AndOpExpr::create( - NeqOpExpr::create(x, y), - AndOpExpr::create( - EqOpExpr::create(x, x), - EqOpExpr::create(y, y))); - case llvm::FCmpInst::FCMP_UNE: - // llvm_fcmp_une(x,y) { return x != y; } - return NeqOpExpr::create(x, y); - case llvm::FCmpInst::FCMP_OEQ: - // llvm_fcmp_oeq(x,y) { return x == y ; } - return EqOpExpr::create(x, y); - case llvm::FCmpInst::FCMP_OLT: - // llvm_fcmp_olt(x,y) { return x < y ; } - return LtOpExpr::create(x, y); - case llvm::FCmpInst::FCMP_OLE: - // llvm_fcmp_ole(x,y) { return x <= y ; } - return LtEqOpExpr::create(x, y); - case llvm::FCmpInst::FCMP_OGT: - // llvm_fcmp_ogt(x,y) { return x > y ; } - return GtOpExpr::create(x, y); - case llvm::FCmpInst::FCMP_OGE: - // llvm_fcmp_oge(x,y) { return x >= y ; } - return GtEqOpExpr::create(x, y); - default: - printErrorMessage("Invalid FCmp predicate ", i); - llvm_unreachable(0); - break; - } -} - -/** -* @brief Converts the given LLVM insert value instruction @a i into a -* statement in the backend IR. -* -* The resulting statement are actually two statements. -*/ -ShPtr LLVMConverter::llvmInsertValueInstToStatement(llvm::InsertValueInst &i) { - ShPtr lhs, rhs; - - // Create the aggregate that is accessed by the instruction. - lhs = llvmValueToExpression(&i); - rhs = llvmValueToExpression(i.getOperand(0)); - ShPtr aggregateDefStmt(AssignStmt::create(lhs, rhs)); - - // Create the accesses. - lhs = generateAccessesToCompositeType( - llvm::dyn_cast(i.getOperand(0)->getType()), - i.getIndices(), - varsHandler->getVariableByName(varsHandler->getValueName(&i))); - rhs = llvmValueToExpression(i.getOperand(1)); - ShPtr aggregateAccessStmt(AssignStmt::create(lhs, rhs)); - - return Statement::mergeStatements(aggregateDefStmt, aggregateAccessStmt); -} - -/** -* @brief Converts the given LLVM extract value instruction @a i into an -* expression in the backend IR. -*/ -ShPtr LLVMConverter::llvmExtractValueInstToExpression( - llvm::ExtractValueInst &i) { - if (llvm::isa(i.getOperand(0))) { - return getDefaultInitializer(i.getType()); - } - return generateAccessesToCompositeType( - llvm::dyn_cast(i.getOperand(0)->getType()), - i.getIndices(), - varsHandler->getVariableByName( - varsHandler->getValueName(i.getOperand(0)))); -} - -/** -* @brief Generates accesses to the given composite type from the given array of -* indices, starting at the given base expression. -*/ -ShPtr LLVMConverter::generateAccessesToCompositeType( - llvm::CompositeType *ct, llvm::ArrayRef indices, - ShPtr base) { - ShPtr expr(base); - for (const auto index : indices) { - // TODO Is the used number of bits (32) correct? - ShPtr currIndex(ConstInt::create(index, 32)); - - if (llvm::isa(ct)) { - expr = StructIndexOpExpr::create(expr, currIndex); - } else { - expr = ArrayIndexOpExpr::create(expr, currIndex); - } - ct = llvm::dyn_cast(ct->getTypeAtIndex(index)); - } - return expr; -} - -ShPtr LLVMConverter::visitBinaryOperator(llvm::Instruction &i) { - return llvmBinaryOperatorToExpression(i); -} - -ShPtr LLVMConverter::visitCastInst(llvm::CastInst &i) { - ShPtr op(llvmValueToExpression(i.getOperand(0))); - switch (i.getOpcode()) { - case llvm::Instruction::Trunc: - return TruncCastExpr::create(op, llvmTypeToType(i.getType())); - case llvm::Instruction::ZExt: - return ExtCastExpr::create(op, llvmTypeToType(i.getType())); - case llvm::Instruction::SExt: - return ExtCastExpr::create(op, llvmTypeToType(i.getType(), true), - ExtCastExpr::Variant::SExt); - case llvm::Instruction::FPTrunc: - return TruncCastExpr::create(op, llvmTypeToType(i.getType())); - case llvm::Instruction::FPExt: - return ExtCastExpr::create(op, llvmTypeToType(i.getType()), - ExtCastExpr::Variant::FPExt); - case llvm::Instruction::UIToFP: - return IntToFPCastExpr::create(op, llvmTypeToType(i.getType())); - case llvm::Instruction::SIToFP: - return IntToFPCastExpr::create(op, llvmTypeToType(i.getType()), - IntToFPCastExpr::Variant::SIToFP); - case llvm::Instruction::FPToUI: - return FPToIntCastExpr::create(op, llvmTypeToType(i.getType())); - case llvm::Instruction::FPToSI: - return FPToIntCastExpr::create(op, llvmTypeToType(i.getType(), true)); - case llvm::Instruction::PtrToInt: - return PtrToIntCastExpr::create(op, llvmTypeToType(i.getType())); - case llvm::Instruction::IntToPtr: - return IntToPtrCastExpr::create(op, llvmTypeToType(i.getType())); - case llvm::Instruction::BitCast: - case llvm::Instruction::AddrSpaceCast: - // TODO: Address space casts are treated like bit casts, there might - // be a better way to deal with them. - return BitCastExpr::create(op, llvmTypeToType(i.getType())); - default: - return llvmValueToExpression(i.getOperand(0)); - } -} - -ShPtr LLVMConverter::visitCallInst(llvm::CallInst &i) { - return llvmCallInstToCallStmt(i); -} - -ShPtr LLVMConverter::visitAllocaInst(llvm::AllocaInst &i) { - return llvmAllocaInstToExpression(i); -} - -ShPtr LLVMConverter::visitInsertValueInst(llvm::InsertValueInst &i) { - return llvmInsertValueInstToStatement(i); -} - -ShPtr LLVMConverter::visitExtractValueInst(llvm::ExtractValueInst &i) { - return llvmExtractValueInstToExpression(i); -} - -ShPtr LLVMConverter::visitGetElementPtrInst(llvm::GetElementPtrInst &i) { - return llvmGEPExpressionToExpressionInternal(i.getPointerOperand(), - gep_type_begin(i), gep_type_end(i)); -} - -ShPtr LLVMConverter::visitICmpInst(llvm::ICmpInst &i) { - return llvmICmpInstToExpression(i); -} - -ShPtr LLVMConverter::visitFCmpInst(llvm::FCmpInst &i) { - return llvmFCmpInstToExpression(i); -} - -ShPtr LLVMConverter::visitLoadInst(llvm::LoadInst &i) { - return llvmLoadInstToExpression(i); -} - -ShPtr LLVMConverter::visitStoreInst(llvm::StoreInst &i) { - return llvmStoreInstToAssignStmt(i); -} - -ShPtr LLVMConverter::visitSelectInst(llvm::SelectInst &i) { - return llvmSelectInstToTernaryOp(i); -} - -ShPtr LLVMConverter::visitInstruction(llvm::Instruction &i) { - printErrorMessage("Unknown instruction:", i); // No space after ":". - llvm_unreachable(0); - return ShPtr(); -} - -/** -* @brief Enables/disables the use of strict FPU semantics. -* -* @param[in] strict If @c true, enables the use of strict FPU semantics. If @c -* false, disables the use of strict FPU semantics. -*/ -void LLVMConverter::setOptionStrictFPUSemantics(bool strict) { - optionStrictFPUSemantics = strict; -} - -} // namespace llvmir2hll -} // namespace retdec diff --git a/src/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter/vars_handler.cpp b/src/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter/vars_handler.cpp deleted file mode 100644 index 684d21d5e2..0000000000 --- a/src/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter/vars_handler.cpp +++ /dev/null @@ -1,197 +0,0 @@ -/** -* @file src/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter/vars_handler.cpp -* @brief Implementation of VarsHandler. -* @copyright (c) 2017 Avast Software, licensed under the MIT license -*/ - -#include -#include -#include - -#include "retdec/llvmir2hll/ir/function.h" -#include "retdec/llvmir2hll/ir/module.h" -#include "retdec/llvmir2hll/ir/unknown_type.h" -#include "retdec/llvmir2hll/ir/variable.h" -#include "retdec/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter/vars_handler.h" -#include "retdec/llvmir2hll/support/debug.h" -#include "retdec/llvm-support/diagnostics.h" -#include "retdec/utils/container.h" - -using namespace retdec::llvm_support; - -using retdec::utils::hasItem; -using retdec::utils::mapGetValueOrDefault; - -namespace retdec { -namespace llvmir2hll { - -/** -* @brief Constructs a new variables handler. -* -* @param[in] resModule Resulting module. -* @param[in] varNameGen Generator of variable names. -*/ -VarsHandler::VarsHandler(ShPtr resModule, ShPtr varNameGen): - resModule(resModule), varNameGen(varNameGen), anonVarNames(), - localVars(), allocatedVarTypes(), convertingGlobalVars(false) {} - -/** -* @brief Destructs the variables handler. -*/ -VarsHandler::~VarsHandler() {} - -/** -* @brief Remembers that we are going to convert global variables. -* -* This function has to be called before converting global variables. -*/ -void VarsHandler::startConvertingGlobalVars() { - convertingGlobalVars = true; -} - -/** -* @brief Remembers that we have stopped converting global variables. -* -* This function has to be called after converting global variables. -*/ -void VarsHandler::stopConvertingGlobalVars() { - convertingGlobalVars = false; -} - -/** -* @brief Resets containers and counters in the handler. -*/ -void VarsHandler::reset() { - localVars.clear(); - allocatedVarTypes.clear(); - varNameGen->restart(); - anonVarNames.clear(); -} - -/** -* @brief Returns the variable named by @a varName. -* -* @param[in] varName Name of the requested variable. -* -* If @a varName is the name of an existing global variable, local variable -* (including function parameters), or a function, this variable is returned. If -* there is no variable named @a varName, this function creates a new one, adds -* it either to resModule as a global variable (when @c convertingGlobalVars is -* @c true), or into @c localVars (when @c convertingGlobalVars is @c false), -* and returns it. -*/ -ShPtr VarsHandler::getVariableByName(const std::string &varName) { - // Try local variables (function parameters are included). - if (!convertingGlobalVars && hasItem(localVars, varName)) { - return localVars[varName]; - } - - // Try global variables (this should be done after checking local - // variables). - if (auto globVar = resModule->getGlobalVarByName(varName)) { - return globVar; - } - - // Try functions. - if (auto func = resModule->getFuncByName(varName)) { - return func->getAsVar(); - } - - // Create a new variable. - // Create the variable of UnknownType. A proper type will be set later. - auto var = Variable::create(varName, UnknownType::create()); - if (convertingGlobalVars) { - resModule->addGlobalVar(var); - } else { - localVars[varName] = var; - } - return var; -} - -/** -* @brief Returns a string representation of the given value @a v (it's name). -*/ -std::string VarsHandler::getValueName(const llvm::Value *v) { - PRECONDITION_NON_NULL(v); - - std::string varName(v->getName()); - - // If the variable does not have its original name, assign a new, unique - // name to it. - if (varName.empty()) { - if (hasItem(anonVarNames, v)) { - // This variable already has an assigned name, so use it. - varName = anonVarNames[v]; - } else { - // Generate a new name for this variable. - // varNameGen->getNextVarName() automatically resets itself when - // there are no available names left (this should not happen in - // practice, though). - anonVarNames[v] = varName = varNameGen->getNextVarName(); - } - } - - return varName; -} - -/** -* @brief Adds a new local variable. -* -* @param[in] var Variable to be added. -* -* If there already exists a local variable named @c var->getName(), this -* function does nothing. -*/ -void VarsHandler::addLocalVar(ShPtr var) { - if (!hasItem(localVars, var->getName())) { - localVars[var->getName()] = var; - } -} - -/** -* @brief Returns @c true if there is a local variable named @a varName, @c -* false otherwise. -* -* @param[in] varName Name of the local variable variable to be checked. -*/ -bool VarsHandler::localVarExists(const std::string &varName) const { - return hasItem(localVars, varName); -} - -/** -* @brief Returns all local variables, including parameters. -*/ -VarSet VarsHandler::getLocalVars() const { - VarSet result; - for (const auto &p : localVars) { - result.insert(p.second); - } - return result; -} - -/** -* @brief Adds a type for the given allocated LLVM variable. -* -* @param[in] var LLVM variable. -* @param[in] varType Type of @a var. -* -* If there already exists a var @a var, it replaces the originally stored type -* with @a varType. -*/ -void VarsHandler::addAllocatedVarType(llvm::Value *var, llvm::Type *varType) { - allocatedVarTypes[var] = varType; -} - -/** -* @brief Returns the type of @a var. -* -* @param[in] var LLVM variable. -* -* If there is no type corresponding to @a var, it returns the null pointer. -*/ -llvm::Type *VarsHandler::getAllocatedVarType(llvm::Value *var) const { - return mapGetValueOrDefault(allocatedVarTypes, var, nullptr); -} - -} // namespace llvmir2hll -} // namespace retdec diff --git a/tests/llvmir2hll/CMakeLists.txt b/tests/llvmir2hll/CMakeLists.txt index 954bb71602..8d672f4c4d 100644 --- a/tests/llvmir2hll/CMakeLists.txt +++ b/tests/llvmir2hll/CMakeLists.txt @@ -60,6 +60,7 @@ set(RETDEC_TESTS_LLVMIR2HLL_SOURCES llvm/llvmir2bir_converters/new_llvmir2bir_converter/llvm_value_converter_tests/llvm_constant_converter_tests.cpp llvm/llvmir2bir_converters/new_llvmir2bir_converter/llvm_value_converter_tests/llvm_constant_converter_tests_by_llvmir.cpp llvm/llvmir2bir_converters/new_llvmir2bir_converter/llvm_value_converter_tests/llvm_fcmp_converter_constants_tests.cpp + llvm/llvmir2bir_converters/new_llvmir2bir_converter/labels_handler_tests.cpp llvm/llvmir2bir_converters/new_llvmir2bir_converter/llvm_value_converter_tests/llvm_fcmp_converter_tests.cpp llvm/llvmir2bir_converters/new_llvmir2bir_converter/llvm_value_converter_tests/llvm_instruction_converter_constants_tests.cpp llvm/llvmir2bir_converters/new_llvmir2bir_converter/llvm_value_converter_tests/llvm_instruction_converter_tests.cpp @@ -70,8 +71,6 @@ set(RETDEC_TESTS_LLVMIR2HLL_SOURCES llvm/llvmir2bir_converters/new_llvmir2bir_converter_tests/base_tests.cpp llvm/llvmir2bir_converters/new_llvmir2bir_converter_tests/functions_tests.cpp llvm/llvmir2bir_converters/new_llvmir2bir_converter_tests/glob_vars_tests.cpp - llvm/llvmir2bir_converters/orig_llvmir2bir_converter/labels_handler_tests.cpp - llvm/llvmir2bir_converters/orig_llvmir2bir_converter_tests.cpp llvm/string_conversions_tests.cpp optimizer/optimizers/auxiliary_variables_optimizer_tests.cpp optimizer/optimizers/bit_op_to_log_op_optimizer_tests.cpp diff --git a/tests/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter/labels_handler_tests.cpp b/tests/llvmir2hll/llvm/llvmir2bir_converters/new_llvmir2bir_converter/labels_handler_tests.cpp similarity index 93% rename from tests/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter/labels_handler_tests.cpp rename to tests/llvmir2hll/llvm/llvmir2bir_converters/new_llvmir2bir_converter/labels_handler_tests.cpp index 35a23d93c6..1f03354bc9 100644 --- a/tests/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter/labels_handler_tests.cpp +++ b/tests/llvmir2hll/llvm/llvmir2bir_converters/new_llvmir2bir_converter/labels_handler_tests.cpp @@ -1,5 +1,5 @@ /** -* @file tests/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter/labels_handler_tests.cpp +* @file tests/llvmir2hll/llvm/llvmir2bir_converters/new_llvmir2bir_converter/labels_handler_tests.cpp * @brief Tests for the @c labels_handler module. * @copyright (c) 2017 Avast Software, licensed under the MIT license */ @@ -9,7 +9,7 @@ #include #include "retdec/llvmir2hll/ir/empty_stmt.h" -#include "retdec/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter/labels_handler.h" +#include "retdec/llvmir2hll/llvm/llvmir2bir_converters/new_llvmir2bir_converter/labels_handler.h" using namespace ::testing; diff --git a/tests/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter_tests.cpp b/tests/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter_tests.cpp deleted file mode 100644 index 0421e3a3a4..0000000000 --- a/tests/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter_tests.cpp +++ /dev/null @@ -1,58 +0,0 @@ -/** -* @file tests/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter_tests.cpp -* @brief Tests for the @c orig_llvmir2bir_converter module. -* @copyright (c) 2017 Avast Software, licensed under the MIT license -*/ - -#include - -#include "retdec/llvmir2hll/ir/const_int.h" -#include "retdec/llvmir2hll/ir/int_type.h" -#include "retdec/llvmir2hll/ir/module.h" -#include "retdec/llvmir2hll/ir/variable.h" -#include "retdec/llvmir2hll/llvm/llvmir2bir_converters/orig_llvmir2bir_converter.h" -#include "llvmir2hll/llvm/llvmir2bir_converter_tests.h" -#include "retdec/llvmir2hll/support/smart_ptr.h" - -using namespace ::testing; - -namespace retdec { -namespace llvmir2hll { -namespace tests { - -/** -* @brief Tests for the @c orig_llvmir2bir_converter module. -*/ -class OrigLLVMIR2BIRConverterTests: public LLVMIR2BIRConverterTests { -protected: - ShPtr convertLLVMIR2BIR(const std::string &code); -}; - -ShPtr OrigLLVMIR2BIRConverterTests::convertLLVMIR2BIR( - const std::string &code) { - return LLVMIR2BIRConverterTests::convertLLVMIR2BIR(code); -} - -// -// Global variables. -// - -TEST_F(OrigLLVMIR2BIRConverterTests, -IntegralGlobalVariableWithInitializerIsConvertedCorrectly) { - auto module = convertLLVMIR2BIR(R"( - @g = global i32 0 - )"); - - auto g = module->getGlobalVarByName("g"); - ASSERT_TRUE(g); - auto gType = cast(g->getType()); - ASSERT_TRUE(gType); - ASSERT_EQ(32, gType->getSize()); - auto gInit = cast(module->getInitForGlobalVar(g)); - ASSERT_TRUE(gInit); - ASSERT_EQ(0, gInit->getValue()); -} - -} // namespace tests -} // namespace llvmir2hll -} // namespace retdec