Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

use event bus for monitoring peer connections and protocol updates #536

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 0 additions & 26 deletions comm.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,19 +98,6 @@ func (p *PubSub) handleNewStream(s network.Stream) {
}
}

func (p *PubSub) notifyPeerDead(pid peer.ID) {
p.peerDeadPrioLk.RLock()
p.peerDeadMx.Lock()
p.peerDeadPend[pid] = struct{}{}
p.peerDeadMx.Unlock()
p.peerDeadPrioLk.RUnlock()

select {
case p.peerDead <- struct{}{}:
default:
}
}

func (p *PubSub) handleNewPeer(ctx context.Context, pid peer.ID, outgoing <-chan *RPC) {
s, err := p.host.NewStream(p.ctx, pid, p.rt.Protocols()...)
if err != nil {
Expand All @@ -125,7 +112,6 @@ func (p *PubSub) handleNewPeer(ctx context.Context, pid peer.ID, outgoing <-chan
}

go p.handleSendingMessages(ctx, s, outgoing)
go p.handlePeerDead(s)
select {
case p.newPeerStream <- s:
case <-ctx.Done():
Expand All @@ -141,18 +127,6 @@ func (p *PubSub) handleNewPeerWithBackoff(ctx context.Context, pid peer.ID, back
}
}

func (p *PubSub) handlePeerDead(s network.Stream) {
pid := s.Conn().RemotePeer()

_, err := s.Read([]byte{0})
if err == nil {
log.Debugf("unexpected message from %s", pid)
}

s.Reset()
p.notifyPeerDead(pid)
}

func (p *PubSub) handleSendingMessages(ctx context.Context, s network.Stream, outgoing <-chan *RPC) {
writeRpc := func(rpc *RPC) error {
size := uint64(rpc.Size())
Expand Down
132 changes: 87 additions & 45 deletions notify.go
Original file line number Diff line number Diff line change
@@ -1,75 +1,117 @@
package pubsub

import (
"fmt"

"github.com/libp2p/go-libp2p/core/event"
"github.com/libp2p/go-libp2p/core/network"
"github.com/libp2p/go-libp2p/core/peer"
ma "github.com/multiformats/go-multiaddr"
"github.com/libp2p/go-libp2p/p2p/host/eventbus"
)

var _ network.Notifiee = (*PubSubNotif)(nil)

type PubSubNotif PubSub

func (p *PubSubNotif) OpenedStream(n network.Network, s network.Stream) {
}

func (p *PubSubNotif) ClosedStream(n network.Network, s network.Stream) {
}

func (p *PubSubNotif) Connected(n network.Network, c network.Conn) {
// ignore transient connections
if c.Stat().Transient {
return
func (p *PubSubNotif) startMonitoring() error {
sub, err := p.host.EventBus().Subscribe([]interface{}{
new(event.EvtPeerConnectednessChanged),
new(event.EvtPeerProtocolsUpdated),
}, eventbus.Name("libp2p/pubsub/notify"))
if err != nil {
return fmt.Errorf("unable to subscribe to EventBus: %w", err)
}

go func() {
p.newPeersPrioLk.RLock()
p.newPeersMx.Lock()
p.newPeersPend[c.RemotePeer()] = struct{}{}
p.newPeersMx.Unlock()
p.newPeersPrioLk.RUnlock()
defer sub.Close()

for {
var e interface{}
select {
case <-p.ctx.Done():
return
case e = <-sub.Out():
}

select {
case p.newPeers <- struct{}{}:
default:
switch evt := e.(type) {
case event.EvtPeerConnectednessChanged:
switch evt.Connectedness {
case network.Connected:
go p.AddPeers(evt.Peer)
case network.NotConnected:
go p.RemovePeers(evt.Peer)
}
case event.EvtPeerProtocolsUpdated:
supportedProtocols := p.rt.Protocols()

protocol_loop:
for _, addedProtocol := range evt.Added {
for _, wantedProtocol := range supportedProtocols {
if wantedProtocol == addedProtocol {
go p.AddPeers(evt.Peer)
break protocol_loop
}
}
}
}
}
}()
}

func (p *PubSubNotif) Disconnected(n network.Network, c network.Conn) {
}

func (p *PubSubNotif) Listen(n network.Network, _ ma.Multiaddr) {
}

func (p *PubSubNotif) ListenClose(n network.Network, _ ma.Multiaddr) {
return nil
}

func (p *PubSubNotif) Initialize() {
isTransient := func(pid peer.ID) bool {
for _, c := range p.host.Network().ConnsToPeer(pid) {
if !c.Stat().Transient {
return false
}
Comment on lines -48 to -53
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We want to keep this code around; please don't remove it and run it right after initializing the bus and before starting the monitoring goroutine.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So the initialization code is still there (below), but I believe it is racey.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See comments below.

func (p *PubSubNotif) isTransient(pid peer.ID) bool {
for _, c := range p.host.Network().ConnsToPeer(pid) {
if !c.Stat().Transient {
return false
}

return true
}

return true
}

func (p *PubSubNotif) AddPeers(peers ...peer.ID) {
p.newPeersPrioLk.RLock()
p.newPeersMx.Lock()
for _, pid := range p.host.Network().Peers() {
if isTransient(pid) {
continue
}

p.newPeersPend[pid] = struct{}{}
for _, pid := range peers {
if !p.isTransient(pid) && p.host.Network().Connectedness(pid) == network.Connected {
p.newPeersPend[pid] = struct{}{}
}
}

// do we need to update ?
haveNewPeer := len(p.newPeersPend) > 0

p.newPeersMx.Unlock()
p.newPeersPrioLk.RUnlock()

select {
case p.newPeers <- struct{}{}:
default:
if haveNewPeer {
select {
case p.newPeers <- struct{}{}:
default:
}
}
}

func (p *PubSubNotif) RemovePeers(peers ...peer.ID) {
p.peerDeadPrioLk.RLock()
p.peerDeadMx.Lock()

for _, pid := range peers {
if !p.isTransient(pid) && p.host.Network().Connectedness(pid) == network.NotConnected {
p.peerDeadPend[pid] = struct{}{}
}
}

// do we need to update ?
haveDeadPeer := len(p.peerDeadPend) > 0

p.peerDeadMx.Unlock()
p.peerDeadPrioLk.RUnlock()

if haveDeadPeer {
select {
case p.peerDead <- struct{}{}:
default:
}
}
}
76 changes: 76 additions & 0 deletions notify_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
package pubsub

import (
"context"
"testing"
"time"

"github.com/libp2p/go-libp2p/p2p/protocol/identify"
)

func TestNotifyPeerProtocolsUpdated(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()

hosts := getNetHosts(t, ctx, 2)

// Initialize id services.
{
ids1, err := identify.NewIDService(hosts[0])
if err != nil {
t.Fatal(err)
}
ids1.Start()
defer ids1.Close()

ids2, err := identify.NewIDService(hosts[1])
if err != nil {
t.Fatal(err)
}
ids2.Start()
defer ids2.Close()
}

psubs0 := getPubsub(ctx, hosts[0])
connect(t, hosts[0], hosts[1])
// Delay to make sure that peers are connected.
<-time.After(time.Millisecond * 100)
psubs1 := getPubsub(ctx, hosts[1])

// Pubsub 0 joins topic "test".
topic0, err := psubs0.Join("test")
if err != nil {
t.Fatal(err)
}
defer topic0.Close()

sub0, err := topic0.Subscribe()
if err != nil {
t.Fatal(err)
}
defer sub0.Cancel()

// Pubsub 1 joins topic "test".
topic1, err := psubs1.Join("test")
if err != nil {
t.Fatal(err)
}
defer topic1.Close()

sub1, err := topic1.Subscribe()
if err != nil {
t.Fatal(err)
}
defer sub1.Cancel()

// Delay before checking results (similar to most tests).
<-time.After(time.Millisecond * 100)

if len(topic0.ListPeers()) == 0 {
t.Fatalf("topic0 should at least have 1 peer")
}

if len(topic1.ListPeers()) == 0 {
t.Fatalf("topic1 should at least have 1 peer")
}
}
10 changes: 8 additions & 2 deletions pubsub.go
Original file line number Diff line number Diff line change
Expand Up @@ -327,13 +327,19 @@ func NewPubSub(ctx context.Context, h host.Host, rt PubSubRouter, opts ...Option
h.SetStreamHandler(id, ps.handleNewStream)
}
}
h.Network().Notify((*PubSubNotif)(ps))

// start monitoring for new peers
notify := (*PubSubNotif)(ps)
if err := notify.startMonitoring(); err != nil {
return nil, fmt.Errorf("unable to start pubsub monitoring: %w", err)
}

ps.val.Start(ps)

go ps.processLoop(ctx)

(*PubSubNotif)(ps).Initialize()
// add current peers to notify system
notify.AddPeers(h.Network().Peers()...)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah ok, it is still here.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is racey; it needs to happen inside startMonitoring, after we have initialized the bus but before we have spawned the monitoring goroutine.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the latest commit, the following test passes:

go test -v -run=TestSimpleDiscovery -count=1 .

As you suggest, we could move AddPeers to inside startMonitoring before the goroutine as p.AddPeers(p.host.Network().Peers()...) . But with this change, TestSimpleDiscovery hangs and times out. What do you think?


return ps, nil
}
Expand Down