Skip to content

Commit

Permalink
Index CoverageMap and SignatureMap by codehash (for performance) (#1160)
Browse files Browse the repository at this point in the history
* WIP implementation using codehash map

* compiles and runs; significantly faster than master

* use codehash for signaturemap

* remove metadata cache

* use codeContract rather than contract; use env rather than individual pieces of env

* refactor codehash helper functions

* hlint

* cleanup

* added test (don't know if it works yet)

* Add @arcz 's suggestions

* fallback on bytecode metadata if findSrc doesn't work

* fix tests

* rename Echidna.Types.CodehashMap to Echidna.SignatureMapping
  • Loading branch information
samalws-tob authored Jan 11, 2024
1 parent 9d502be commit e0d243a
Show file tree
Hide file tree
Showing 16 changed files with 203 additions and 123 deletions.
24 changes: 6 additions & 18 deletions lib/Echidna/Campaign.hs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@

module Echidna.Campaign where

import Optics.Core hiding ((|>))

import Control.Concurrent (writeChan)
import Control.DeepSeq (force)
import Control.Monad (replicateM, when, void, forM_)
Expand All @@ -17,32 +15,32 @@ import Control.Monad.ST (RealWorld)
import Control.Monad.Trans (lift)
import Data.Binary.Get (runGetOrFail)
import Data.ByteString.Lazy qualified as LBS
import Data.IORef (readIORef, writeIORef, atomicModifyIORef')
import Data.IORef (readIORef, atomicModifyIORef')
import Data.Map qualified as Map
import Data.Map (Map, (\\))
import Data.Maybe (isJust, mapMaybe, fromMaybe, fromJust)
import Data.Maybe (isJust, mapMaybe, fromMaybe)
import Data.Set (Set)
import Data.Set qualified as Set
import Data.Text (Text)
import System.Random (mkStdGen)

import EVM (bytecode, cheatCode)
import EVM (cheatCode)
import EVM.ABI (getAbi, AbiType(AbiAddressType), AbiValue(AbiAddress))
import EVM.Types hiding (Env, Frame(state))

import Echidna.ABI
import Echidna.Exec
import Echidna.Mutator.Corpus
import Echidna.Shrink (shrinkTest)
import Echidna.Symbolic (forceBuf, forceAddr)
import Echidna.Symbolic (forceAddr)
import Echidna.Test
import Echidna.Transaction
import Echidna.Types (Gas)
import Echidna.Types.Campaign
import Echidna.Types.Corpus (Corpus, corpusSize)
import Echidna.Types.Coverage (scoveragePoints)
import Echidna.Types.Config
import Echidna.Types.Signature (makeBytecodeCache, FunctionName)
import Echidna.Types.Signature (FunctionName)
import Echidna.Types.Test
import Echidna.Types.Test qualified as Test
import Echidna.Types.Tx (TxCall(..), Tx(..), call)
Expand Down Expand Up @@ -86,12 +84,6 @@ runWorker
-> Int -- ^ Test limit for this worker
-> m (WorkerStopReason, WorkerState)
runWorker callback vm world dict workerId initialCorpus testLimit = do
metaCacheRef <- asks (.metadataCache)
fetchContractCacheRef <- asks (.fetchContractCache)
external <- liftIO $ Map.mapMaybe id <$> readIORef fetchContractCacheRef
let concretizeKeys = Map.foldrWithKey (Map.insert . forceAddr) mempty
liftIO $ writeIORef metaCacheRef (mkMemo (concretizeKeys vm.env.contracts <> external))

let
effectiveSeed = dict.defSeed + workerId
effectiveGenDict = dict { defSeed = effectiveSeed }
Expand Down Expand Up @@ -152,8 +144,6 @@ runWorker callback vm world dict workerId initialCorpus testLimit = do

continue = runUpdate (shrinkTest vm) >> lift callback >> run

mkMemo = makeBytecodeCache . map (forceBuf . fromJust . (^. bytecode)) . Map.elems

-- | Generate a new sequences of transactions, either using the corpus or with
-- randomly created transactions
randseq
Expand All @@ -163,18 +153,16 @@ randseq
-> m [Tx]
randseq deployedContracts world = do
env <- ask
memo <- liftIO $ readIORef env.metadataCache

let
mutConsts = env.cfg.campaignConf.mutConsts
txConf = env.cfg.txConf
seqLen = env.cfg.campaignConf.seqLen

-- TODO: include reproducer when optimizing
--let rs = filter (not . null) $ map (.testReproducer) $ ca._tests

-- Generate new random transactions
randTxs <- replicateM seqLen (genTx memo world txConf deployedContracts)
randTxs <- replicateM seqLen (genTx world deployedContracts)
-- Generate a random mutator
cmut <- if seqLen == 1 then seqMutatorsStateless (fromConsts mutConsts)
else seqMutatorsStateful (fromConsts mutConsts)
Expand Down
69 changes: 25 additions & 44 deletions lib/Echidna/Exec.hs
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@ import Optics.State.Operators
import Control.Monad (when, forM_)
import Control.Monad.Catch (MonadThrow(..))
import Control.Monad.State.Strict (MonadState(get, put), execState, runStateT, MonadIO(liftIO), gets, modify', execStateT)
import Control.Monad.Reader (MonadReader, asks)
import Control.Monad.Reader (MonadReader, ask, asks)
import Control.Monad.ST (ST, stToIO, RealWorld)
import Data.Bits
import Data.ByteString qualified as BS
import Data.IORef (readIORef, atomicWriteIORef, atomicModifyIORef', newIORef, writeIORef, modifyIORef')
import Data.IORef (readIORef, atomicWriteIORef, newIORef, writeIORef, modifyIORef')
import Data.Map qualified as Map
import Data.Maybe (fromMaybe, fromJust)
import Data.Text qualified as T
Expand All @@ -31,12 +31,12 @@ import EVM.Types hiding (Env)

import Echidna.Events (emptyEvents)
import Echidna.RPC (safeFetchContractFrom, safeFetchSlotFrom)
import Echidna.SourceMapping (lookupUsingCodehashOrInsert)
import Echidna.Symbolic (forceBuf)
import Echidna.Transaction
import Echidna.Types (ExecException(..), Gas, fromEVM, emptyAccount)
import Echidna.Types.Config (Env(..), EConfig(..), UIConf(..), OperationMode(..), OutputFormat(Text))
import Echidna.Types.Coverage (CoverageInfo)
import Echidna.Types.Signature (getBytecodeMetadata, lookupBytecodeMetadata)
import Echidna.Types.Solidity (SolConf(..))
import Echidna.Types.Tx (TxCall(..), Tx, TxResult(..), call, dst, initialTimestamp, initialBlockNumber, getResult)
import Echidna.Utility (getTimestamp, timePrefix)
Expand Down Expand Up @@ -121,11 +121,6 @@ execTxWith executeTx tx = do
case ret of
-- TODO: fix hevm to not return an empty contract in case of an error
Just contract | contract.code /= RuntimeCode (ConcreteRuntimeCode "") -> do
metaCacheRef <- asks (.metadataCache)
metaCache <- liftIO $ readIORef metaCacheRef
let bc = forceBuf $ fromJust (contract ^. bytecode)
liftIO $ atomicWriteIORef metaCacheRef $ Map.insert bc (getBytecodeMetadata bc) metaCache

fromEVM (continuation contract)
liftIO $ atomicWriteIORef cacheRef $ Map.insert addr (Just contract) cache
_ -> do
Expand Down Expand Up @@ -246,13 +241,11 @@ execTxWithCov
=> Tx
-> m ((VMResult RealWorld, Gas), Bool)
execTxWithCov tx = do
covRef <- asks (.coverageRef)
metaCacheRef <- asks (.metadataCache)
cache <- liftIO $ readIORef metaCacheRef
env <- ask

covContextRef <- liftIO $ newIORef (False, Nothing)

r <- execTxWith (execCov covRef covContextRef cache) tx
r <- execTxWith (execCov env covContextRef) tx

(grew, lastLoc) <- liftIO $ readIORef covContextRef

Expand All @@ -270,7 +263,7 @@ execTxWithCov tx = do
pure (r, grew || grew')
where
-- the same as EVM.exec but collects coverage, will stop on a query
execCov covRef covContextRef cache = do
execCov env covContextRef = do
vm <- get
(r, vm') <- liftIO $ loop vm
put vm'
Expand All @@ -292,35 +285,25 @@ execTxWithCov tx = do
addCoverage :: VM RealWorld -> IO ()
addCoverage !vm = do
let (pc, opIx, depth) = currentCovLoc vm
meta = currentMeta vm
cov <- readIORef covRef
case Map.lookup meta cov of
Nothing -> do
let size = BS.length . forceBuf . fromJust . view bytecode . fromJust $
Map.lookup vm.state.contract vm.env.contracts
if size > 0 then do
vec <- VMut.new size
-- We use -1 for opIx to indicate that the location was not covered
forM_ [0..size-1] $ \i -> VMut.write vec i (-1, 0, 0)

vec' <- atomicModifyIORef' covRef $ \cm ->
-- this should reduce races
case Map.lookup meta cm of
Nothing -> (Map.insert meta vec cm, vec)
Just vec' -> (cm, vec')

VMut.write vec' pc (opIx, fromIntegral depth, 0 `setBit` fromEnum Stop)

writeIORef covContextRef (True, Just (vec', pc))
else do
-- TODO: should we collect the coverage here? Even if there is no
-- bytecode for external contract, we could have a "virtual" location
-- that PC landed at and record that.
pure ()
Just vec ->
contract = currentContract vm

maybeCovVec <- lookupUsingCodehashOrInsert env.codehashMap contract env.dapp env.coverageRef $ do
let size = BS.length . forceBuf . fromJust . view bytecode $ contract
if size == 0 then pure Nothing else do
-- IO for making a new vec
vec <- VMut.new size
-- We use -1 for opIx to indicate that the location was not covered
forM_ [0..size-1] $ \i -> VMut.write vec i (-1, 0, 0)
pure $ Just vec

case maybeCovVec of
Nothing -> pure ()
Just vec -> do
-- TODO: no-op when pc is out-of-bounds. This shouldn't happen but
-- we observed this in some real-world scenarios. This is likely a
-- bug in another place, investigate.
-- ... this should be fixed now, since we use `codeContract` instead
-- of `contract` for everything; it may be safe to remove this check.
when (pc < VMut.length vec) $
VMut.read vec pc >>= \case
(_, depths, results) | depth < 64 && not (depths `testBit` depth) -> do
Expand All @@ -332,11 +315,9 @@ execTxWithCov tx = do
-- | Get the VM's current execution location
currentCovLoc vm = (vm.state.pc, fromMaybe 0 $ vmOpIx vm, length vm.frames)

-- | Get the current contract's bytecode metadata
currentMeta vm = fromMaybe (error "no contract information on coverage") $ do
buffer <- vm ^? #env % #contracts % at vm.state.codeContract % _Just % bytecode
let bc = forceBuf $ fromJust buffer
pure $ lookupBytecodeMetadata cache bc
-- | Get the current contract being executed
currentContract vm = fromMaybe (error "no contract information on coverage") $
vm ^? #env % #contracts % at vm.state.codeContract % _Just

initialVM :: Bool -> ST s (VM s)
initialVM ffi = do
Expand Down
3 changes: 1 addition & 2 deletions lib/Echidna/Output/JSON.hs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ import Data.Vector.Unboxed qualified as VU
import Numeric (showHex)

import EVM.Dapp (DappInfo)
import EVM.Types (keccak')

import Echidna.ABI (ppAbiValue, GenDict(..))
import Echidna.Events (Events, extractEvents)
Expand Down Expand Up @@ -110,7 +109,7 @@ encodeCampaign env workerStates = do
, _error = Nothing
, _tests = mapTest env.dapp <$> tests
, seed = worker0.genDict.defSeed
, coverage = Map.mapKeys (("0x" ++) . (`showHex` "") . keccak') $ VU.toList <$> frozenCov
, coverage = Map.mapKeys (("0x" ++) . (`showHex` "")) $ VU.toList <$> frozenCov
, gasInfo = Map.toList $ Map.unionsWith max ((.gasInfo) <$> workerStates)
}

Expand Down
3 changes: 1 addition & 2 deletions lib/Echidna/Output/Source.hs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ import EVM.Solidity (SourceCache(..), SrcMap, SolcContract(..))

import Echidna.Types.Coverage (OpIx, unpackTxResults, CoverageMap)
import Echidna.Types.Tx (TxResult(..))
import Echidna.Types.Signature (getBytecodeMetadata)

saveCoverages
:: [CoverageFileType]
Expand Down Expand Up @@ -163,7 +162,7 @@ srcMapCov sc covMap contracts = do
where
linesCovered :: SolcContract -> IO (Map FilePath (Map Int [TxResult]))
linesCovered c =
case Map.lookup (getBytecodeMetadata c.runtimeCode) covMap of
case Map.lookup c.runtimeCodehash covMap of
Just vec -> VU.foldl' (\acc covInfo -> case covInfo of
(-1, _, _) -> acc -- not covered
(opIx, _stackDepths, txResults) ->
Expand Down
6 changes: 3 additions & 3 deletions lib/Echidna/Solidity.hs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ import Echidna.Symbolic (forceAddr)
import Echidna.Test (createTests, isAssertionMode, isPropertyMode, isDapptestMode)
import Echidna.Types.Config (EConfig(..), Env(..))
import Echidna.Types.Signature
(ContractName, SolSignature, SignatureMap, getBytecodeMetadata, FunctionName)
(ContractName, SolSignature, SignatureMap, FunctionName)
import Echidna.Types.Solidity
import Echidna.Types.Test (EchidnaTest(..))
import Echidna.Types.Tx
Expand Down Expand Up @@ -219,11 +219,11 @@ loadSpecified env name cs = do
let filtered = filterMethods contract.contractName
solConf.methodFilter
(abiOf solConf.prefix contract)
in (getBytecodeMetadata contract.runtimeCode,) <$> NE.nonEmpty filtered)
in (contract.runtimeCodehash,) <$> NE.nonEmpty filtered)
cs
else
case NE.nonEmpty fabiOfc of
Just ne -> Map.singleton (getBytecodeMetadata mainContract.runtimeCode) ne
Just ne -> Map.singleton mainContract.runtimeCodehash ne
Nothing -> mempty

-- Set up initial VM, either with chosen contract or Etheno initialization file
Expand Down
102 changes: 102 additions & 0 deletions lib/Echidna/SourceMapping.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
module Echidna.SourceMapping where

import Control.Applicative ((<|>))
import Data.ByteString (ByteString)
import Data.ByteString qualified as BS
import Data.IORef (IORef, readIORef, atomicModifyIORef')
import Data.List (find)
import Data.Map.Strict qualified as Map
import Data.Map.Strict (Map)
import Data.Maybe (mapMaybe)
import Data.Vector qualified as V
import Echidna.Symbolic (forceWord)
import EVM.Dapp (DappInfo(..), findSrc)
import EVM.Solidity (SolcContract(..))
import EVM.Types (Contract(..), ContractCode(..), RuntimeCode(..), W256, maybeLitByte)

-- | Map from contracts' codehashes to their compile-time codehash.
-- This is relevant when the immutables solidity feature is used;
-- when this feature is not used, the map will just end up being an identity map.
-- `CodehashMap` is used in signature map and coverage map lookups.
type CodehashMap = IORef (Map W256 W256)

-- | Lookup a codehash in the `CodehashMap`.
-- In the case that it's not found, find the compile-time codehash and add it to the map.
-- This is done using hevm's `findSrc` function.
lookupCodehash :: CodehashMap -> W256 -> Contract -> DappInfo -> IO W256
lookupCodehash chmap codehash contr dapp = do
chmapVal <- readIORef chmap
case Map.lookup codehash chmapVal of
Just val -> pure val
Nothing -> do
-- hevm's `findSrc` doesn't always work, since `SolcContract.immutableReferences` isn't always populated
let solcContract = findSrc contr dapp <|> findSrcByMetadata contr dapp
originalCodehash = maybe codehash (.runtimeCodehash) solcContract
atomicModifyIORef' chmap $ (, ()) . Map.insert codehash originalCodehash
pure originalCodehash

-- | Given a map from codehash to some values of type `a`, lookup a contract in the map using its codehash.
-- In current use, the `Map W256 a` will be either a `SignatureMap` or a `CoverageMap`.
-- Returns the compile-time codehash, and the map entry if it is found.
lookupUsingCodehash :: CodehashMap -> Contract -> DappInfo -> Map W256 a -> IO (W256, Maybe a)
lookupUsingCodehash chmap contr dapp mapVal =
ifNotFound codehash $ do
codehash' <- lookupCodehash chmap codehash contr dapp
ifNotFound codehash' $
pure (codehash', Nothing)
where
codehash = forceWord contr.codehash
ifNotFound key notFoundCase = case Map.lookup key mapVal of
Nothing -> notFoundCase
Just val -> pure (key, Just val)

-- | Same as `lookupUsingCodehash`, except we add to the map if we don't find anything.
-- The `make` argument is the IO to generate a new element;
-- it is only run if nothing is found in the map.
-- In the case that `make` returns `Nothing`, the map will be unchanged.
-- Returns the map entry, if it is found or generated.
lookupUsingCodehashOrInsert :: CodehashMap -> Contract -> DappInfo -> IORef (Map W256 a) -> IO (Maybe a) -> IO (Maybe a)
lookupUsingCodehashOrInsert chmap contr dapp mapRef make = do
mapVal <- readIORef mapRef
(key, valFound) <- lookupUsingCodehash chmap contr dapp mapVal
case valFound of
Just val -> pure (Just val)
Nothing -> applyModification key =<< make
where
applyModification _ Nothing = pure Nothing
applyModification key (Just val) = atomicModifyIORef' mapRef $ modifyFn key val

-- Take care of multithreaded edge case
modifyFn key val oldMap = case Map.lookup key oldMap of
Just val' -> (oldMap, Just val')
Nothing -> (Map.insert key val oldMap, Just val)

-- | Try to find a SolcContract with a matching bytecode metadata
findSrcByMetadata :: Contract -> DappInfo -> Maybe SolcContract
findSrcByMetadata contr dapp = find compareMetadata (snd <$> Map.elems dapp.solcByHash) where
compareMetadata solc = contrMeta == Just (getBytecodeMetadata solc.runtimeCode)
contrMeta = getBytecodeMetadata <$> contrCode
contrCode = case contr.code of
(UnknownCode _) -> Nothing
(InitCode c _) -> Just c
(RuntimeCode (ConcreteRuntimeCode c)) -> Just c
(RuntimeCode (SymbolicRuntimeCode c)) -> Just $ BS.pack $ mapMaybe maybeLitByte $ V.toList c

getBytecodeMetadata :: ByteString -> ByteString
getBytecodeMetadata bs =
let stripCandidates = flip BS.breakSubstring bs <$> knownBzzrPrefixes in
case find ((/= mempty) . snd) stripCandidates of
Nothing -> bs -- if no metadata is found, return the complete bytecode
Just (_, m) -> m

knownBzzrPrefixes :: [ByteString]
knownBzzrPrefixes =
-- a1 65 "bzzr0" 0x58 0x20 (solc <= 0.5.8)
[ BS.pack [0xa1, 0x65, 98, 122, 122, 114, 48, 0x58, 0x20]
-- a2 65 "bzzr0" 0x58 0x20 (solc >= 0.5.9)
, BS.pack [0xa2, 0x65, 98, 122, 122, 114, 48, 0x58, 0x20]
-- a2 65 "bzzr1" 0x58 0x20 (solc >= 0.5.11)
, BS.pack [0xa2, 0x65, 98, 122, 122, 114, 49, 0x58, 0x20]
-- a2 64 "ipfs" 0x58 0x22 (solc >= 0.6.0)
, BS.pack [0xa2, 0x64, 0x69, 0x70, 0x66, 0x73, 0x58, 0x22]
]
Loading

0 comments on commit e0d243a

Please sign in to comment.