Skip to content

Commit

Permalink
Add ground-truth as new reopt-explore mode of operation
Browse files Browse the repository at this point in the history
```
cabal run -v0 reopt:exe:reopt-explore --
    ground-truth [<paths>] --export-fn-results <export-dir>
```

Runs Reopt's analysis on all files in `[<paths>]` and exports the protobuf results
into the `<export-dir>` directory.
The protobufs follow the schema defines in `GroundTruth/schema/blocks.proto`
and are designed to be used to compare with ground-truth binaries
compiled using https://github.com/junxzm1990/x86-sok .
  • Loading branch information
staslyakhov committed Mar 8, 2024
1 parent 0553d34 commit 24a8e51
Show file tree
Hide file tree
Showing 4 changed files with 164 additions and 4 deletions.
29 changes: 27 additions & 2 deletions reopt-explore/CommandLine.hs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
module CommandLine (getOptions, Options (..), Command (..), ResidualOptions (..), LLVMOptions (..)) where
module CommandLine (getOptions, Options (..), Command (..), ResidualOptions (..), LLVMOptions (..), GroundTruthOptions (..)) where

import Options.Applicative

Expand All @@ -9,7 +9,30 @@ data Options = Options
, optCommand :: Command
}

data Command = RunLLVM LLVMOptions | RunResidual ResidualOptions
data Command = RunLLVM LLVMOptions | RunResidual ResidualOptions | RunGroundTruth GroundTruthOptions

------------------------------------------------------------------------------------------
-- Ground Truth

data GroundTruthOptions = GroundTruthOptions
{
gtPaths :: ![FilePath]
, gtExportDir :: !(Maybe FilePath)
}

groundTruthP :: Parser Command
groundTruthP =
fmap RunGroundTruth $
GroundTruthOptions
<$> some argsP
<*> optional exportProtoP

exportProtoP :: Parser FilePath
exportProtoP =
strOption $
long "export-fn-results"
<> metavar "PATH"
<> help "Directory at which to write the ground truth protobufs."

------------------------------------------------------------------------------------------
-- Residual
Expand Down Expand Up @@ -146,6 +169,8 @@ commandP =
hsubparser $
command "residuals" (info residualP (progDesc "Print out the residuals in the given binary"))
<> command "llvm" (info llvmP (progDesc "Generate a binary via LLVM"))
<> command "ground-truth" (info groundTruthP (progDesc "Dump discovery information as protobuf to compare with ground truth"))


-- | Used to add a path at which to search for dynamic dependencies.
dynDepPathP :: Parser FilePath
Expand Down
132 changes: 132 additions & 0 deletions reopt-explore/GroundTruth.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
module GroundTruth where

import CommandLine (GroundTruthOptions (..), Options)
import Common (createLogger, findAllElfFilesInDirs)
import Data.ByteString.Char8 qualified as BSC
import Data.IORef (newIORef)
import Data.Macaw.Discovery.State
import Data.Map.Strict qualified as Map
import Data.Parameterized.Some (viewSome)
import Reopt (LoadOptions (..), ReoptOptions, emptyAnnDeclarations, parseElfHeaderInfo64, recoverX86Elf, runReoptM)
import Reopt.Events (initReoptSummary)
import Reopt.Utils.Exit (checkedReadFile, handleEitherStringWithExit, handleEitherWithExit)
import System.IO (hPutStrLn, stderr)

import Control.Lens
import Control.Lens.Internal.CTypes (Word64)
import Control.Monad (zipWithM_)
import Data.Macaw.CFG
import Data.Maybe (fromMaybe)
import Data.ProtoLens
import GroundTruth.Blocks as B
import GroundTruth.Blocks_Fields as B
import System.Directory (createDirectoryIfMissing)
import System.FilePath

exploreBinary :: ReoptOptions -> Int -> (Int, FilePath) -> IO B.Module
exploreBinary opts totalCount (idx, fPath) = do
hPutStrLn stderr
$ "["
++ show idx
++ " of "
++ show totalCount
++ "] Recovering protobuf of "
++ fPath
++ " ..."

bs <- checkedReadFile fPath

summaryRef <- newIORef $ initReoptSummary fPath
statsRef <- newIORef mempty
logger <- createLogger opts summaryRef statsRef

let annDecl = emptyAnnDeclarations

hdrInfo <- handleEitherStringWithExit $ parseElfHeaderInfo64 fPath bs

-- discoveryState: Macaw IR
-- recoveredModule: Reopt IP (aka "FnRep")
--
-- We only use Macaw IR for this analysis. Could we be missing some data from Reopt?
(_os, discoveryState, _recovOut, _recoveredModule, _constraints) <-
-- (os, _, recMod, constraints, _, logEvents) <-
handleEitherWithExit
=<< runReoptM logger (recoverX86Elf lOpts opts annDecl unnamedFunPrefix hdrInfo)

-- Encode results in protobuf schema
let functions = map (viewSome encodeFunction) (exploredFunctions discoveryState)
return
( defMessage
& B.fuc .~ functions
)
where
lOpts = LoadOptions{loadOffset = Nothing}
unnamedFunPrefix = BSC.pack "reopt"

encodeFunction :: ArchConstraints arch => DiscoveryFunInfo arch ids -> B.Function
encodeFunction function =
defMessage
& B.va .~ segmentOffsetAsInt (discoveredFunAddr function)
& B.bb .~ map encodeBlock bbs
where
bbs = Map.elems $ function ^. parsedBlocks

encodeBlock :: ArchConstraints arch => ParsedBlock arch ids -> B.BasicBlock
encodeBlock block =
defMessage
& B.va .~ blockAddress
& B.instructions .~ encodeInstruction block blockAddress
& B.child .~ encodeChild (pblockTermStmt block)
where
blockAddress = segmentOffsetAsInt (pblockAddr block) -- Use 0 for invalid address for now

encodeChild :: ArchConstraints arch => ParsedTermStmt arch ids -> [B.Child]
encodeChild term =
map
( \childAddr ->
defMessage
& B.va .~ segmentOffsetAsInt childAddr
)
(parsedTermSucc term)

-- Note: Instruction addresses in `InstructionStart` are relative to the basic block
-- We add `bbStart` as the offset to get their full address
encodeInstruction :: ArchConstraints arch => ParsedBlock arch ids -> Word64 -> [B.Instruction]
encodeInstruction block bbStart =
map
( \ins ->
defMessage
& B.va .~ (bbStart + ins)
)
(instructionsIn block) -- Use Lenses traversal?
where
-- Note that assembly instruction can consist of multiple Macaw instructions
-- Therefore, we use `InstructionStart` to determine addresses of unique assembly instruction
instructionsIn b = [memWordValue addr | (InstructionStart addr _asm) <- pblockStmts b]

runGroundTruth :: Options -> GroundTruthOptions -> ReoptOptions -> IO ()
runGroundTruth _opts gtopts ropts = do
elfFiles <- findAllElfFilesInDirs (gtPaths gtopts)
results <- mapM (exploreBinary ropts (length elfFiles)) elfFiles

-- Pretty print results
let messages = map showMessage results
mapM_ (hPutStrLn stderr) messages

let outputDir = fromMaybe "ReoptExplorePB" $ gtExportDir gtopts
createDirectoryIfMissing False outputDir

let protoBuffs = map encodeMessage results
let outputFiles = map (prepFileName outputDir . snd) elfFiles

-- Write protobuf results into files
zipWithM_ BSC.writeFile outputFiles protoBuffs
where
prepFileName dir f = dir </> replaceExtension (takeFileName f) "pb"

-- Address conversion helpers
--

-- 0 if memory is a relocation. Offset otherwise
segmentOffsetAsInt :: MemWidth w => MemSegmentOff w -> Word64
segmentOffsetAsInt mem = maybe 0 memWordValue (segoffAsAbsoluteAddr mem)
6 changes: 4 additions & 2 deletions reopt-explore/Main_explore.hs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import Reopt
import CommandLine
import Residual
import LLVM
import GroundTruth

-- -- | Summary of results from parsing the debug info of an elf file.
-- data ExploreDebugResult =
Expand Down Expand Up @@ -119,8 +120,9 @@ main = do
roDynDepDebugPaths = optDynDepDebugPath opts ++ gdbDebugDirs
}
case optCommand opts of
RunResidual gopts -> runResidual opts gopts ropts
RunLLVM lopts -> runLLVM opts lopts ropts
RunResidual gopts -> runResidual opts gopts ropts
RunLLVM lopts -> runLLVM opts lopts ropts
RunGroundTruth gtopts -> runGroundTruth opts gtopts ropts

{-
args <- getCommandLineArgs
Expand Down
1 change: 1 addition & 0 deletions reopt.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ executable reopt-explore
main-is: Main_explore.hs
other-modules: Paths_reopt
, CommandLine
, GroundTruth
, GroundTruth.Blocks
, GroundTruth.Blocks_Fields
, Residual
Expand Down

0 comments on commit 24a8e51

Please sign in to comment.