Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement -find-nonatomic-writes filter #9

Merged
merged 7 commits into from
Apr 1, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,11 @@ It is recommended to file an issue when picking up one of the tasks to coordinat
* [ ] Support for setting options (for example enabling/disabling tracing into subprocesses, like `strace -f`)
* [ ] Equivalent to `strace -y` (tracking origin of file descriptors, printing paths)
* [ ] Equivalent to `strace -c` (keeping counts, summary statistics)
* [ ] Something similar to `strace -y` but telling which PID is which executable from `/proc/PID/exe`
* [ ] Extraction of `PTRACE_EVENT` detail information (see section `PTRACE_SETOPTIONS` in `man 2 ptrace`)
* [ ] Filtering based on string buffer contents
* [ ] Handling of `exit()` of the direct child (grand-child daemonisation)
* [ ] Re-using strace's test suite for per-syscall tests
* [ ] other TODOs in the code
* Use it to do specific investigations in other programs:
* [ ] investigate [big GHC linker speed differences](https://github.com/nh2/hatrace/pull/9#issuecomment-477573945)
1 change: 1 addition & 0 deletions hatrace.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ library
, conduit
, containers
, directory
, filepath
, linux-ptrace
, optparse-applicative
, posix-waitpid
Expand Down
174 changes: 171 additions & 3 deletions src/System/Hatrace.hs
Original file line number Diff line number Diff line change
Expand Up @@ -48,27 +48,39 @@ module System.Hatrace
, getSyscallEnterDetails
, syscallEnterDetailsOnlyConduit
, syscallExitDetailsOnlyConduit
, FileWriteEvent(..)
, fileWritesConduit
, FileWriteBehavior(..)
, atomicWritesSink
, SyscallStopType(..)
, TraceEvent(..)
, TraceState(..)
, Syscall(..)
, SyscallArgs(..)
, sendSignal
, doesProcessHaveChildren
, getFdPath
, getExePath
-- * Re-exports
, KnownSyscall(..)
) where

import Conduit (foldlC)
import Control.Arrow (second)
import Control.Monad.IO.Class (MonadIO, liftIO)
import Control.Monad.IO.Unlift (MonadUnliftIO)
import Data.Bits ((.|.), shiftL, shiftR)
import Data.ByteString (ByteString)
import qualified Data.ByteString.Internal as BS
import qualified Data.ByteString.Internal as BSI
import Data.Conduit
import qualified Data.Conduit.List as CL
import Data.Either (partitionEithers)
import Data.List (genericLength)
import Data.Map (Map)
import qualified Data.Map as Map
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import Data.Word (Word32, Word64)
import Foreign.C.Error (Errno(..), throwErrnoIfMinus1, throwErrnoIfMinus1_, getErrno, resetErrno, eCHILD, eINVAL)
import Foreign.C.String (peekCString)
Expand All @@ -80,15 +92,17 @@ import Foreign.Marshal.Utils (withMany)
import Foreign.Ptr (Ptr, nullPtr, wordPtrToPtr)
import Foreign.Storable (peekByteOff, sizeOf)
import GHC.Stack (HasCallStack, callStack, getCallStack, prettySrcLoc)
import System.Directory (doesFileExist, findExecutable)
import System.Directory (canonicalizePath, doesFileExist, findExecutable)
import System.Exit (ExitCode(..), die)
import System.FilePath ((</>))
import System.IO.Error (modifyIOError, ioeGetLocation, ioeSetLocation)
import System.Linux.Ptrace (TracedProcess(..), peekBytes, peekNullTerminatedBytes, peekNullWordTerminatedWords, detach)
import System.Linux.Ptrace.Syscall hiding (ptrace_syscall, ptrace_detach)
import qualified System.Linux.Ptrace.Syscall as Ptrace.Syscall
import System.Linux.Ptrace.Types (Regs(..))
import System.Linux.Ptrace.X86_64Regs (X86_64Regs(..))
import System.Linux.Ptrace.X86Regs (X86Regs(..))
import System.Posix.Files (readSymbolicLink)
import System.Posix.Internals (withFilePath)
import System.Posix.Signals (Signal, sigTRAP, sigSTOP, sigTSTP, sigTTIN, sigTTOU)
import qualified System.Posix.Signals as Signals
Expand Down Expand Up @@ -773,7 +787,7 @@ readPipeFds pid pipefd = do
let fdSize = sizeOf (undefined :: CInt)
sz = 2 * fdSize
bytes <- peekBytes (TracedProcess pid) pipefd sz
let (ptr, off, _size) = BS.toForeignPtr bytes
let (ptr, off, _size) = BSI.toForeignPtr bytes
withForeignPtr ptr $ \p -> do
(,) <$> peekByteOff p off <*> peekByteOff p (off + fdSize)

Expand Down Expand Up @@ -962,6 +976,160 @@ traceForkExecvFullPath args = do
return exitCode


-- | Like the partial `T.decodeUtf8`, with `HasCallStack`.
decodeUtf8OrError :: (HasCallStack) => ByteString -> Text
decodeUtf8OrError bs = case T.decodeUtf8' bs of
Left err -> error $ "Could not decode as UTF-8: " ++ show err ++ "; ByteString was : " ++ show bs
Right text -> text


getFdPath :: CPid -> CInt -> IO FilePath
getFdPath pid fd = do
let procFdPath = "/proc/" ++ show pid ++ "/fd/" ++ show fd
readSymbolicLink procFdPath


getExePath :: CPid -> IO FilePath
getExePath pid = do
let procExePath = "/proc/" ++ show pid ++ "/exe"
readSymbolicLink procExePath


data FileWriteEvent
= FileOpen ByteString -- ^ name used to open the file
| FileWrite
| FileClose
| FileRename ByteString -- ^ new (target) name
deriving (Eq, Ord, Show)

-- | Uses raw trace events to produce more focused events aimed at analysing file writes.
-- Output events are accompanied by corresponding absolute file paths.
--
-- NOTES:
-- * only calls to `write` are currently used as a marker for writes and syscalls
-- `pwrite`, `writev`, `pwritev` are not taken into account
fileWritesConduit :: (MonadIO m) => ConduitT (CPid, TraceEvent) (FilePath, FileWriteEvent) m ()
fileWritesConduit = go
where
go =
await >>= \case
Just (pid, SyscallStop (SyscallExit (KnownSyscall syscall, syscallArgs))) -> do
detailedSyscallExit <- liftIO $ getSyscallExitDetails syscall syscallArgs pid
case detailedSyscallExit of
Right (DetailedSyscallExit_open SyscallExitDetails_open
{ enterDetail = SyscallEnterDetails_open { pathnameBS }
, fd }) ->
yieldFdEvent pid fd (FileOpen pathnameBS)
Right (DetailedSyscallExit_openat SyscallExitDetails_openat
{ enterDetail = SyscallEnterDetails_openat { pathnameBS }
, fd }) ->
yieldFdEvent pid fd (FileOpen pathnameBS)
Right (DetailedSyscallExit_creat SyscallExitDetails_creat
{ enterDetail = SyscallEnterDetails_creat { pathnameBS }
, fd }) ->
yieldFdEvent pid fd (FileOpen pathnameBS)
_ -> return ()
go
Just (pid, SyscallStop (SyscallEnter (KnownSyscall syscall, syscallArgs))) -> do
detailedSyscallEnter <- liftIO $ getSyscallEnterDetails syscall syscallArgs pid
case detailedSyscallEnter of
DetailedSyscallEnter_write SyscallEnterDetails_write { fd } ->
yieldFdEvent pid fd FileWrite
DetailedSyscallEnter_close SyscallEnterDetails_close { fd } ->
yieldFdEvent pid fd FileClose
DetailedSyscallEnter_rename SyscallEnterDetails_rename { oldpathBS, newpathBS } -> do
path <- liftIO $ resolveToPidCwd pid (T.unpack $ decodeUtf8OrError oldpathBS)
yield (path, FileRename newpathBS)
_ -> return ()
go
Just _ ->
go -- ignore other events
Nothing ->
return ()
yieldFdEvent pid fd event = do
path <- liftIO $ getFdPath pid fd
yield (path, event)

resolveToPidCwd :: Show a => a -> FilePath -> IO FilePath
resolveToPidCwd pid path = do
let procFdPath = "/proc/" ++ show pid ++ "/cwd"
wd <- liftIO $ readSymbolicLink procFdPath
canonicalizePath $ wd </> path


data FileWriteBehavior
= NoWrites
| NonatomicWrite
| AtomicWrite FilePath
-- ^ path tells temporary file name that was used
| Unexpected String
deriving (Eq, Ord, Show)

-- uses state machine implemented as recursive functions
analyzeWrites :: [FileWriteEvent] -> FileWriteBehavior
analyzeWrites es = checkOpen es
where
checkOpen events =
case events of
[] -> NoWrites
-- we could see a `close` syscall for a pipe descriptor
-- with no `open` for it thus we just ignore it
FileClose : rest -> checkOpen rest
FileOpen _ : rest -> checkWrites rest
unexpected : _ -> unexpectedEvent "FileOpen" unexpected
checkWrites events =
case events of
[] -> Unexpected $ "FileClose was expected but not seen"
FileClose : rest -> checkOpen rest
FileWrite : rest -> checkAfterWrite rest
unexpected : _ -> unexpectedEvent "FileClose or FileWrite" unexpected
checkAfterWrite events =
case events of
[] -> Unexpected $ "FileClose was expected but not seen"
FileWrite : rest -> checkAfterWrite rest
FileClose : rest -> checkRename rest
unexpected : _ -> unexpectedEvent "FileClose or FileWrite" unexpected
-- when it happens that a path gets more than 1 sequence open-write-close
-- for it we need to check whether there was a `rename` after the 1st one
-- and then check the result of the next one and combine them accordingly
-- e.g. atomic + non-atomic -> non-atomic
checkRename events =
case events of
FileRename path : rest ->
case checkOpen rest of
NoWrites ->
-- we write original path here which swapped
-- with oldpath in `atomicWritesSink`
AtomicWrite (T.unpack $ decodeUtf8OrError path)
other ->
other
noRenames ->
case checkOpen noRenames of
NoWrites -> NonatomicWrite
other -> other
unexpectedEvent expected real =
Unexpected $ "expected " ++ expected ++ ", but " ++
show real ++ " was seen"

atomicWritesSink :: (MonadIO m) => ConduitT (CPid, TraceEvent) Void m (Map FilePath FileWriteBehavior)
atomicWritesSink =
extract <$> (fileWritesConduit .| foldlC collectWrite Map.empty)
where
collectWrite :: Map FilePath [FileWriteEvent] -> (FilePath, FileWriteEvent) -> Map FilePath [FileWriteEvent]
collectWrite m (fp, e) = Map.alter (Just . maybe [e] (e:)) fp m
extract :: Map FilePath [FileWriteEvent] -> Map FilePath FileWriteBehavior
extract m =
let (noRenames, renames) =
partitionEithers . map (analyzeWrites' . second reverse) $ Map.toList m
in Map.fromList noRenames <> Map.fromList (map (second AtomicWrite) renames)
-- this function (in addition to what `analyzeWrites` does) treats atomic writes
-- in a special way: those include a rename and we need to put atomic writes under
-- a path which is a target of a corresponding rename
-- so in the end we swap path in `AtomicWrite` and its corresponding map key
analyzeWrites' (src, es) = case analyzeWrites es of
AtomicWrite target -> Right (target, src)
other -> Left (src, other)

-- | Passes through all syscalls and signals that come by,
-- printing them, including details where available.
printSyscallOrSignalNameConduit :: (MonadIO m) => ConduitT (CPid, TraceEvent) (CPid, TraceEvent) m ()
Expand Down
48 changes: 43 additions & 5 deletions src/System/Hatrace/Main.hs
Original file line number Diff line number Diff line change
Expand Up @@ -8,25 +8,36 @@
module System.Hatrace.Main where

import Control.Applicative (many)
import Options.Applicative (Parser, argument, str, metavar)
import Control.Monad (forM_, unless)
import Data.Either (partitionEithers)
import qualified Data.Map as Map
import Options.Applicative (Parser, argument, str, metavar, flag', long, help, optional)
import qualified Options.Applicative as Opts
import System.Exit (exitWith)
import System.FilePath (splitPath)

import System.Hatrace (traceForkProcess)
import System.Hatrace

data Filter =
FilterAtomicWrites
deriving (Eq, Ord, Show)

-- | Command line arguments of this program.
data CLIArgs = CLIArgs
{ cliProgram :: FilePath
, cliArgs :: [String]
, cliFilter :: Maybe Filter
} deriving (Eq, Ord, Show)


cliArgsParser :: Parser CLIArgs
cliArgsParser = do
cliProgram <- argument str (metavar "PROGRAM")
cliArgs <- many (argument str (metavar "PROGRAM_ARG"))
pure $ CLIArgs{ cliProgram, cliArgs }
cliFilter <- optional $ flag' FilterAtomicWrites
( long "find-nonatomic-writes"
<> help "find file writes without a following rename to a persistent location" )
pure $ CLIArgs{ cliProgram, cliArgs, cliFilter }


-- | Parses the command line arguments for this program.
Expand All @@ -42,7 +53,34 @@ main = do
CLIArgs
{ cliProgram
, cliArgs
, cliFilter
} <- parseArgs

exitCode <- traceForkProcess cliProgram cliArgs
exitWith exitCode
case cliFilter of
Nothing -> do
exitCode <- traceForkProcess cliProgram cliArgs
exitWith exitCode
Just FilterAtomicWrites -> do
argv <- procToArgv cliProgram cliArgs
(exitCode, entries) <- sourceTraceForkExecvFullPathWithSink argv atomicWritesSink
let ignoredPath [] = error "paths are not supposed to be empty"
ignoredPath ('/':fromRoot) =
head (splitPath fromRoot) `elem` ["proc/", "dev/", "sys/"]
ignoredPath _ = True -- pipes and other special paths
(nonatomic, bad) = partitionEithers . Map.elems $
Map.mapMaybeWithKey maybeNonatomicOrBad $
Map.filterWithKey (\fp _ -> not $ ignoredPath fp) entries
unless (null nonatomic) $ do
putStrLn "The following files were written nonatomically by the program:"
forM_ nonatomic $ \p -> putStrLn $ " - " ++ show p
unless (null bad) $ do
putStrLn "The following files could not be properly analyzed:"
forM_ bad $ \(p, e) -> do
putStrLn $ " - " ++ show p ++ ": " ++ e
exitWith exitCode

maybeNonatomicOrBad :: FilePath -> FileWriteBehavior -> Maybe (Either FilePath (FilePath, String))
maybeNonatomicOrBad _ NoWrites = Nothing
maybeNonatomicOrBad _ (AtomicWrite _) = Nothing
maybeNonatomicOrBad fp NonatomicWrite = Just (Left fp)
maybeNonatomicOrBad fp (Unexpected err) = Just (Right (fp, err))
26 changes: 26 additions & 0 deletions test/HatraceSpec.hs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import qualified Data.ByteString as BS
import Data.Conduit
import qualified Data.Conduit.Combinators as CC
import qualified Data.Conduit.List as CL
import qualified Data.Map as Map
import Data.Set (Set)
import qualified Data.Set as Set
import qualified Data.Text as T
Expand Down Expand Up @@ -236,13 +237,17 @@ spec = before_ assertNoChildren $ do
-- directly from the terminal, or want to give your own GHC (see below).
let program = "env"
let ghc = "ghc"
-- For my fixed GHC
-- let ghc = "/raid/src/ghc/ghc-atomic-writes/_build/stage1/bin/ghc"
-- So that a custom path can be given conveniently when testing a patch.
-- You probably want to set GHC_PACKAGE_PATH below when doing that so that
-- your custom GHC works even under `stack test`.
let isPatchedGhc = ghc /= "ghc"
let args =
[ ghc
-- For my fixed GHC
-- Note it's very important that GHC_PACKAGE_PATH does not end with a '/',
-- see https://gitlab.haskell.org/ghc/ghc/issues/16360
-- [ "GHC_PACKAGE_PATH=/raid/src/ghc/ghc-atomic-writes/_build/stage1/lib/package.conf.d", ghc
, "--make"
, "-outputdir", "example-programs-build/"
Expand Down Expand Up @@ -340,6 +345,27 @@ spec = before_ assertNoChildren $ do
it "can be used to check whether programs handle EINTR correctly" $ do
pendingWith "implement test that uses PTRACE_INTERRUPT in every syscall"

it "observes atomic write in a program" $ do
makeAtomicWriteExample
tmpFile <- emptySystemTempFile "test-output"
argv <- procToArgv "example-programs-build/atomic-write" ["atomic", "10", tmpFile]
(exitCode, writes) <-
sourceTraceForkExecvFullPathWithSink argv atomicWritesSink
exitCode `shouldBe` ExitSuccess
case Map.lookup tmpFile writes of
Just (AtomicWrite _) -> return ()
other -> error $ "atomic write for " ++ show tmpFile ++
" was expected but found " ++ show other

it "catches non-atomic write in a program" $ do
makeAtomicWriteExample
tmpFile <- emptySystemTempFile "test-output"
argv <- procToArgv "example-programs-build/atomic-write" ["non-atomic", "10", tmpFile]
(exitCode, writes) <-
sourceTraceForkExecvFullPathWithSink argv atomicWritesSink
exitCode `shouldBe` ExitSuccess
Map.lookup tmpFile writes `shouldBe` Just NonatomicWrite

describe "per-syscall tests" $ do

describe "read" $ do
Expand Down