From 4107515419625ff4581a189e882cd4ee97aee473 Mon Sep 17 00:00:00 2001 From: Ian Denhardt Date: Fri, 30 Sep 2022 18:51:50 -0400 Subject: [PATCH 1/2] Fix a deadlock. It is illegal to resolve promises while holding connection locks, and I was seeing TestHandleReturn_regression deadlock every 6th run or so; changing this fixes it. Note: since this makes the resolution itself and the manipulation of the questions table non-atomic, the ordering is critical: 1. First we remove the entry from the table. This means that if the remote vat sends us messages pertaining to that question id, they will simply be rejected instead being acted on. This shouldn't happen normally, but maybe a malicious vat could get us to panic or something. 2. Second, resolve the promise. After this is complete, we can be sure the client won't be used. 3. Then, and only then, we can return the question id to the pool for re-use. --- rpc/rpc.go | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/rpc/rpc.go b/rpc/rpc.go index 26732ac4..42b502bb 100644 --- a/rpc/rpc.go +++ b/rpc/rpc.go @@ -225,13 +225,14 @@ func (c *Conn) Bootstrap(ctx context.Context) (bc capnp.Client) { }, func(err error) { defer c.tasks.Done() - c.mu.Lock() - defer c.mu.Unlock() - if err != nil { - c.questions[q.id] = nil - c.questionID.remove(uint32(q.id)) + syncutil.With(&c.mu, func() { + c.questions[q.id] = nil + }) q.bootstrapPromise.Reject(exc.Annotate("rpc", "bootstrap", err)) + syncutil.With(&c.mu, func() { + c.questionID.remove(uint32(q.id)) + }) return } From d6d161780cebc7f72b72839b1b6a4dd1de336169 Mon Sep 17 00:00:00 2001 From: Ian Denhardt Date: Fri, 30 Sep 2022 19:09:53 -0400 Subject: [PATCH 2/2] Fix similar ordering issues. Unlike the previous commit, I haven't observed the consequences of these, but the same logic & ordering constraints apply. --- rpc/import.go | 4 +++- rpc/question.go | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/rpc/import.go b/rpc/import.go index 1bca6187..fa8b98dc 100644 --- a/rpc/import.go +++ b/rpc/import.go @@ -108,8 +108,10 @@ func (ic *importClient) Send(ctx context.Context, s capnp.Send) (*capnp.Answer, if err != nil { ic.c.questions[q.id] = nil + syncutil.Without(&ic.c.mu, func() { + q.p.Reject(rpcerr.Failedf("send message: %w", err)) + }) ic.c.questionID.remove(uint32(q.id)) - q.p.Reject(rpcerr.Failedf("send message: %w", err)) return } diff --git a/rpc/question.go b/rpc/question.go index 01997d69..8d3330e8 100644 --- a/rpc/question.go +++ b/rpc/question.go @@ -160,9 +160,11 @@ func (q *question) PipelineSend(ctx context.Context, transform []capnp.PipelineO if err != nil { syncutil.With(&q.c.mu, func() { q.c.questions[q2.id] = nil - q.c.questionID.remove(uint32(q2.id)) }) q2.p.Reject(rpcerr.Failedf("send message: %w", err)) + syncutil.With(&q.c.mu, func() { + q.c.questionID.remove(uint32(q2.id)) + }) return }