Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

proc: changed windows backend to deal with simultaneous breakpoints #598

Merged
merged 5 commits into from
Oct 22, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions _fixtures/issue594.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
package main

import (
"fmt"
"runtime"
)

func dontsegfault() {
var p *int
func() int {
defer func() {
recover()
}()
return *p
}()
}

func main() {
dontsegfault()
runtime.Breakpoint()
fmt.Println("should stop here")
}
2 changes: 1 addition & 1 deletion proc/disasm_amd64.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ var windowsPrologue3 = instrseq{x86asm.MOV, x86asm.MOV, x86asm.MOV, x86asm.CMP,
var unixPrologue = instrseq{x86asm.MOV, x86asm.LEA, x86asm.CMP, x86asm.JBE}
var unixPrologue2 = instrseq{x86asm.MOV, x86asm.CMP, x86asm.JBE}
var unixPrologue3 = instrseq{x86asm.MOV, x86asm.MOV, x86asm.CMP, x86asm.JE}
var prologues = []instrseq{windowsPrologue, windowsPrologue2, unixPrologue, unixPrologue2, unixPrologue3}
var prologues = []instrseq{windowsPrologue, windowsPrologue2, windowsPrologue3, unixPrologue, unixPrologue2, unixPrologue3}

// FirstPCAfterPrologue returns the address of the first instruction after the prologue for function fn
// If sameline is set FirstPCAfterPrologue will always return an address associated with the same line as fn.Entry
Expand Down
33 changes: 25 additions & 8 deletions proc/proc_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -801,7 +801,10 @@ func TestStacktraceGoroutine(t *testing.T) {

for i, g := range gs {
locations, err := g.Stacktrace(40)
assertNoError(err, t, "GoroutineStacktrace()")
if err != nil {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This needs a comment / explanation / TODO about why we're ignoring errors.

// On windows we do not have frame information for goroutines doing system calls.
continue
}

if stackMatch(mainStack, locations, false) {
mainCount++
Expand Down Expand Up @@ -2084,8 +2087,8 @@ func TestStepConcurrentDirect(t *testing.T) {
}

func nextInProgress(p *Process) bool {
for _, th := range p.Threads {
if th.CurrentBreakpoint != nil && th.CurrentBreakpoint.Internal() {
for _, bp := range p.Breakpoints {
if bp.Internal() {
return true
}
}
Expand Down Expand Up @@ -2135,13 +2138,13 @@ func TestStepConcurrentPtr(t *testing.T) {
assertNoError(p.Continue(), t, "Continue()")
}

f, ln = currentLineNumber(p, t)
if ln != 13 {
t.Fatalf("Step did not step into function call (13): %s:%d (gid: %d)", f, ln, p.SelectedGoroutine.ID)
if p.SelectedGoroutine.ID != gid {
t.Fatalf("Step switched goroutines (wanted: %d got: %d)", gid, p.SelectedGoroutine.ID)
}

if p.SelectedGoroutine.ID != gid {
t.Fatalf("Step switched goroutines (%d %d)", gid, p.SelectedGoroutine.ID)
f, ln = currentLineNumber(p, t)
if ln != 13 {
t.Fatalf("Step did not step into function call (13): %s:%d", f, ln)
}

count++
Expand Down Expand Up @@ -2197,3 +2200,17 @@ func TestStepOnCallPtrInstr(t *testing.T) {
}
})
}

func TestIssue594(t *testing.T) {
// Exceptions that aren't caused by breakpoints should be propagated
// back to the target.
// In particular the target should be able to cause a nil pointer
// dereference panic and recover from it.
withTestProcess("issue594", t, func(p *Process, fixture protest.Fixture) {
assertNoError(p.Continue(), t, "Continue()")
f, ln := currentLineNumber(p, t)
if ln != 21 {
t.Fatalf("Program stopped at %s:%d, expected :21", f, ln)
}
})
}
154 changes: 117 additions & 37 deletions proc/proc_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,19 +91,24 @@ func Launch(cmd []string) (*Process, error) {
si.StdOutput = sys.Handle(fd[1])
si.StdErr = sys.Handle(fd[2])
pi := new(sys.ProcessInformation)
err = sys.CreateProcess(argv0, cmdLine, nil, nil, true, _DEBUG_ONLY_THIS_PROCESS, nil, nil, si, pi)

dbp := New(0)
dbp.execPtraceFunc(func() {
err = sys.CreateProcess(argv0, cmdLine, nil, nil, true, _DEBUG_ONLY_THIS_PROCESS, nil, nil, si, pi)
})
if err != nil {
return nil, err
}
sys.CloseHandle(sys.Handle(pi.Process))
sys.CloseHandle(sys.Handle(pi.Thread))

return newDebugProcess(int(pi.ProcessId), argv0Go)
dbp.Pid = int(pi.ProcessId)

return newDebugProcess(dbp, argv0Go)
}

// newDebugProcess prepares process pid for debugging.
func newDebugProcess(pid int, exepath string) (*Process, error) {
dbp := New(pid)
func newDebugProcess(dbp *Process, exepath string) (*Process, error) {
// It should not actually be possible for the
// call to waitForDebugEvent to fail, since Windows
// will always fire a CREATE_PROCESS_DEBUG_EVENT event
Expand All @@ -112,7 +117,7 @@ func newDebugProcess(pid int, exepath string) (*Process, error) {
var err error
var tid, exitCode int
dbp.execPtraceFunc(func() {
tid, exitCode, err = dbp.waitForDebugEvent()
tid, exitCode, err = dbp.waitForDebugEvent(waitBlocking)
})
if err != nil {
return nil, err
Expand All @@ -121,6 +126,22 @@ func newDebugProcess(pid int, exepath string) (*Process, error) {
dbp.postExit()
return nil, ProcessExitedError{Pid: dbp.Pid, Status: exitCode}
}
// Suspend all threads so that the call to _ContinueDebugEvent will
// not resume the target.
for _, thread := range dbp.Threads {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you want to start process with all its threads suspended, you can pass CREATE_SUSPENDED into CreateProcess.

Copy link
Member Author

@aarzilli aarzilli Jul 21, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It doesn't work, the first WaitForDebugEvent call never returns (I'm not saying there is no way to make it work, just that I don't know how).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems like maybe that is unnecessary (the first WaitForDebugEvent). I assume it wouldn't trigger an event as the process is simply created, but never executed any instructions. This is distinct from how we handle process creation on UNIX systems where we wait for that first PTRACE_TRACEME call, which triggers a signal that we wait for.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It doesn't work, the first WaitForDebugEvent call never returns

Fair enough. I don't know much about debugging to be useful here.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Need to add a comment here as to why we're suspending and then continuing. It's hard to tell from here that we're trying to drain all events.

It's probably worth putting this logic into a well named function so it's apparent to the reader what's going on.

_, err := _SuspendThread(thread.os.hThread)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to wrap this in execPtraceFunc as well, correct? I assume it has the same origin thread restrictions.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@lukehoban code didn't, their documentation doesn't mention it, and it doesn't seem to cause problems.

if err != nil {
return nil, err
}
}

dbp.execPtraceFunc(func() {
err = _ContinueDebugEvent(uint32(dbp.Pid), uint32(dbp.os.breakThread), _DBG_CONTINUE)
})
if err != nil {
return nil, err
}

return initializeDebugProcess(dbp, exepath, false)
}

Expand Down Expand Up @@ -169,7 +190,7 @@ func Attach(pid int) (*Process, error) {
if err != nil {
return nil, err
}
return newDebugProcess(pid, exepath)
return newDebugProcess(New(pid), exepath)
}

// Kill kills the process.
Expand Down Expand Up @@ -198,7 +219,7 @@ func (dbp *Process) updateThreadList() error {
return nil
}

func (dbp *Process) addThread(hThread syscall.Handle, threadID int, attach bool) (*Thread, error) {
func (dbp *Process) addThread(hThread syscall.Handle, threadID int, attach, suspendNewThreads bool) (*Thread, error) {
if thread, ok := dbp.Threads[threadID]; ok {
return thread, nil
}
Expand All @@ -212,6 +233,12 @@ func (dbp *Process) addThread(hThread syscall.Handle, threadID int, attach bool)
if dbp.CurrentThread == nil {
dbp.SwitchThread(thread.ID)
}
if suspendNewThreads {
_, err := _SuspendThread(thread.os.hThread)
if err != nil {
return nil, err
}
}
return thread, nil
}

Expand Down Expand Up @@ -402,12 +429,24 @@ func dwarfFromPE(f *pe.File) (*dwarf.Data, error) {
return dwarf.New(abbrev, nil, nil, info, line, nil, nil, str)
}

func (dbp *Process) waitForDebugEvent() (threadID, exitCode int, err error) {
type waitForDebugEventFlags int

const (
waitBlocking waitForDebugEventFlags = 1 << iota
waitSuspendNewThreads
)

func (dbp *Process) waitForDebugEvent(flags waitForDebugEventFlags) (threadID, exitCode int, err error) {
var debugEvent _DEBUG_EVENT
shouldExit := false
for {
continueStatus := uint32(_DBG_CONTINUE)
var milliseconds uint32 = 0
if flags&waitBlocking != 0 {
milliseconds = syscall.INFINITE
}
// Wait for a debug event...
err := _WaitForDebugEvent(&debugEvent, syscall.INFINITE)
err := _WaitForDebugEvent(&debugEvent, milliseconds)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wander what the err is when milliseconds is set to 0 and you have no debug events pending.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The semaphore timeout period has expired.

if err != nil {
return 0, 0, err
}
Expand All @@ -425,14 +464,14 @@ func (dbp *Process) waitForDebugEvent() (threadID, exitCode int, err error) {
}
}
dbp.os.hProcess = debugInfo.Process
_, err = dbp.addThread(debugInfo.Thread, int(debugEvent.ThreadId), false)
_, err = dbp.addThread(debugInfo.Thread, int(debugEvent.ThreadId), false, flags&waitSuspendNewThreads != 0)
if err != nil {
return 0, 0, err
}
break
case _CREATE_THREAD_DEBUG_EVENT:
debugInfo := (*_CREATE_THREAD_DEBUG_INFO)(unionPtr)
_, err = dbp.addThread(debugInfo.Thread, int(debugEvent.ThreadId), false)
_, err = dbp.addThread(debugInfo.Thread, int(debugEvent.ThreadId), false, flags&waitSuspendNewThreads != 0)
if err != nil {
return 0, 0, err
}
Expand All @@ -458,9 +497,14 @@ func (dbp *Process) waitForDebugEvent() (threadID, exitCode int, err error) {
case _RIP_EVENT:
break
case _EXCEPTION_DEBUG_EVENT:
tid := int(debugEvent.ThreadId)
dbp.os.breakThread = tid
return tid, 0, nil
exception := (*_EXCEPTION_DEBUG_INFO)(unionPtr)
if code := exception.ExceptionRecord.ExceptionCode; code == _EXCEPTION_BREAKPOINT || code == _EXCEPTION_SINGLE_STEP {
tid := int(debugEvent.ThreadId)
dbp.os.breakThread = tid
return tid, 0, nil
} else {
continueStatus = _DBG_EXCEPTION_NOT_HANDLED
}
case _EXIT_PROCESS_DEBUG_EVENT:
debugInfo := (*_EXIT_PROCESS_DEBUG_INFO)(unionPtr)
exitCode = int(debugInfo.ExitCode)
Expand All @@ -470,7 +514,7 @@ func (dbp *Process) waitForDebugEvent() (threadID, exitCode int, err error) {
}

// .. and then continue unless we received an event that indicated we should break into debugger.
err = _ContinueDebugEvent(debugEvent.ProcessId, debugEvent.ThreadId, _DBG_CONTINUE)
err = _ContinueDebugEvent(debugEvent.ProcessId, debugEvent.ThreadId, continueStatus)
if err != nil {
return 0, 0, err
}
Expand All @@ -485,7 +529,7 @@ func (dbp *Process) trapWait(pid int) (*Thread, error) {
var err error
var tid, exitCode int
dbp.execPtraceFunc(func() {
tid, exitCode, err = dbp.waitForDebugEvent()
tid, exitCode, err = dbp.waitForDebugEvent(waitBlocking)
})
if err != nil {
return nil, err
Expand All @@ -507,39 +551,75 @@ func (dbp *Process) wait(pid, options int) (int, *sys.WaitStatus, error) {
}

func (dbp *Process) setCurrentBreakpoints(trapthread *Thread) error {
// TODO: In theory, we should also be setting the breakpoints on other
// threads that happen to have hit this BP. But doing so leads to periodic
// failures in the TestBreakpointsCounts test with hit counts being too high,
// which can be traced back to occurrences of multiple threads hitting a BP
// at the same time.
// While the debug event that stopped the target was being propagated
// other target threads could generate other debug events.
// After this function we need to know about all the threads
// stopped on a breakpoint. To do that we first suspend all target
// threads and then repeatedly call _ContinueDebugEvent followed by
// waitForDebugEvent in non-blocking mode.
// We need to explicitly call SuspendThread because otherwise the
// call to _ContinueDebugEvent will resume execution of some of the
// target threads.

err := trapthread.SetCurrentBreakpoint()
if err != nil {
return err
}

// My guess is that Windows will correctly trigger multiple DEBUG_EVENT's
// in this case, one for each thread, so we should only handle the BP hit
// on the thread that the debugger was evented on.
for _, thread := range dbp.Threads {
thread.running = false
_, err := _SuspendThread(thread.os.hThread)
if err != nil {
return err
}
}

for {
var err error
var tid int
dbp.execPtraceFunc(func() {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't like the idea of this function having the side effect of potentially causing the inferior to execute instructions. We continue and then immediately execute a non-blocking wait, which will return an error when we do not have any pending events. That means we never stop this thread, correct?

Is is not possible to just loop through WaitForDebugEvent in non-blocking mode until we have no events? I assume all events would be queued and we shouldn't have to continue the inferior.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The loop on dbp.Threads calls _SuspendTherad on every thread so the inferior shouldn't be able to execute any instructions.

WaitForDebugEvent doesn't seem to return anything unless we call ContinueDebugEvent in between.

err = _ContinueDebugEvent(uint32(dbp.Pid), uint32(dbp.os.breakThread), _DBG_CONTINUE)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the only thing that sticks out to me in this patch set. Here, we're continuing a thread and waiting for a debug event, however in the meantime it's possible that the thread will be executing instructions. This function should not have that side effect.

A non-blocking wait for debug event should suffice, yeah? If there's no events we simply move on. I don't understand the need to continue a thread in a function that is setting state on our thread objects.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we suspend all threads (which we did above this line) ContinueDebugEvent doesn't resume execution, it just acknowledges that we have processed the event we received. I couldn't figure out any other way of doing this that would work with the windows API. If we don't do this later on on the Continue procedure while we are singlestepping CurrentThread over its breakpoint instruction we receive events for other threads and we are not equipped to dealing with them there.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, so if I understand correctly, it behaves similar to the Darwin API, where you can suspend a thread multiple times, and have to resume an equal number of times for it to actually execute instructions. Is that correct, from your understanding? If so, could you add a comment / link to docs to make it clear we're not actually continuing execution.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it's like the darwin API, however I don't actually ever suspend the thread more than once, when one of the threads generates a debug event windows will halt the execution of all threads but this state doesn't count as suspended, it's weird.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added a comment describing what we are doing on that function.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks. I think maybe we should alias _DBG_CONTINUE to _DBG_EXCEPTION_HANDLED. When reading up on the Windows debug API the other day, IIRC, that existed for 32 bit, but not 64 bit. However, semantically it's more accurate to what we're trying to accomplish, and makes it more explicit from the callers point of view.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Googling DBG_EXCEPTION_HANDLED leads me to this blog post which suggests that at some point DBG_EXCEPTION_HANDLED and DBG_CONTINUE did different things and had different values, I think appropriating the name as a synonym muddles the water but if you insist I'll switch.

if err == nil {
tid, _, _ = dbp.waitForDebugEvent(waitSuspendNewThreads)
}
})
if err != nil {
return err
}
if tid == 0 {
break
}
err = dbp.Threads[tid].SetCurrentBreakpoint()
if err != nil {
return err
}
}

return trapthread.SetCurrentBreakpoint()
return nil
}

func (dbp *Process) exitGuard(err error) error {
return err
}

func (dbp *Process) resume() error {
// Only resume the thread that broke into the debugger
thread := dbp.Threads[dbp.os.breakThread]
// This relies on the same assumptions as dbp.setCurrentBreakpoints
if thread.CurrentBreakpoint != nil {
if err := thread.StepInstruction(); err != nil {
return err
for _, thread := range dbp.Threads {
if thread.CurrentBreakpoint != nil {
if err := thread.StepInstruction(); err != nil {
return err
}
thread.CurrentBreakpoint = nil
}
thread.CurrentBreakpoint = nil
}
// In case we are now on a different thread, make sure we resume
// the thread that is broken.
thread = dbp.Threads[dbp.os.breakThread]
if err := thread.resume(); err != nil {
return err

for _, thread := range dbp.Threads {
thread.running = true
_, err := _ResumeThread(thread.os.hThread)
if err != nil {
return err
}
}

return nil
}

Expand Down
19 changes: 19 additions & 0 deletions proc/syscall_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,20 @@ type _LOAD_DLL_DEBUG_INFO struct {
Unicode uint16
}

type _EXCEPTION_DEBUG_INFO struct {
ExceptionRecord _EXCEPTION_RECORD
FirstChance uint32
}

type _EXCEPTION_RECORD struct {
ExceptionCode uint32
ExceptionFlags uint32
ExceptionRecord *_EXCEPTION_RECORD
ExceptionAddress uintptr
NumberParameters uint32
ExceptionInformation [_EXCEPTION_MAXIMUM_PARAMETERS]uintptr
}

const (
_ThreadBasicInformation = 0

Expand All @@ -72,6 +86,11 @@ const (

// DEBUG_ONLY_THIS_PROCESS tracks https://msdn.microsoft.com/en-us/library/windows/desktop/ms684863(v=vs.85).aspx
_DEBUG_ONLY_THIS_PROCESS = 0x00000002

_EXCEPTION_BREAKPOINT = 0x80000003
_EXCEPTION_SINGLE_STEP = 0x80000004

_EXCEPTION_MAXIMUM_PARAMETERS = 15
)

func _NT_SUCCESS(x _NTSTATUS) bool {
Expand Down
Loading