diff --git a/config/internal.go b/config/internal.go index a860c02c9cc..40070b11b52 100644 --- a/config/internal.go +++ b/config/internal.go @@ -2,9 +2,10 @@ package config type Internal struct { // All marked as omitempty since we are expecting to make changes to all subcomponents of Internal - Bitswap *InternalBitswap `json:",omitempty"` - UnixFSShardingSizeThreshold *OptionalString `json:",omitempty"` - Libp2pForceReachability *OptionalString `json:",omitempty"` + Bitswap *InternalBitswap `json:",omitempty"` + UnixFSShardingSizeThreshold *OptionalString `json:",omitempty"` + Libp2pForceReachability *OptionalString `json:",omitempty"` + BackupBootstrapInterval *OptionalDuration `json:",omitempty"` } type InternalBitswap struct { diff --git a/core/bootstrap/bootstrap.go b/core/bootstrap/bootstrap.go index daa0a44d3d5..b566e0e978e 100644 --- a/core/bootstrap/bootstrap.go +++ b/core/bootstrap/bootstrap.go @@ -3,16 +3,16 @@ package bootstrap import ( "context" "errors" - "fmt" "io" "math/rand" "sync" + "sync/atomic" "time" logging "github.com/ipfs/go-log" "github.com/jbenet/goprocess" - "github.com/jbenet/goprocess/context" - "github.com/jbenet/goprocess/periodic" + goprocessctx "github.com/jbenet/goprocess/context" + periodicproc "github.com/jbenet/goprocess/periodic" "github.com/libp2p/go-libp2p/core/host" "github.com/libp2p/go-libp2p/core/network" "github.com/libp2p/go-libp2p/core/peer" @@ -50,13 +50,26 @@ type BootstrapConfig struct { // for the bootstrap process to use. This makes it possible for clients // to control the peers the process uses at any moment. BootstrapPeers func() []peer.AddrInfo + + // BackupBootstrapInterval governs the periodic interval at which the node will + // attempt to save connected nodes to use as temporary bootstrap peers. + BackupBootstrapInterval time.Duration + + // MaxBackupBootstrapSize controls the maximum number of peers we're saving + // as backup bootstrap peers. + MaxBackupBootstrapSize int + + SaveBackupBootstrapPeers func(context.Context, []peer.AddrInfo) + LoadBackupBootstrapPeers func(context.Context) []peer.AddrInfo } // DefaultBootstrapConfig specifies default sane parameters for bootstrapping. var DefaultBootstrapConfig = BootstrapConfig{ - MinPeerThreshold: 4, - Period: 30 * time.Second, - ConnectionTimeout: (30 * time.Second) / 3, // Perod / 3 + MinPeerThreshold: 4, + Period: 30 * time.Second, + ConnectionTimeout: (30 * time.Second) / 3, // Perod / 3 + BackupBootstrapInterval: 1 * time.Hour, + MaxBackupBootstrapSize: 20, } func BootstrapConfigWithPeers(pis []peer.AddrInfo) BootstrapConfig { @@ -90,6 +103,9 @@ func Bootstrap(id peer.ID, host host.Host, rt routing.Routing, cfg BootstrapConf log.Debugf("%s bootstrap error: %s", id, err) } + // Exit the first call (triggered independently by `proc.Go`, not `Tick`) + // only after being done with the *single* Routing.Bootstrap call. Following + // periodic calls (`Tick`) will not block on this. <-doneWithRound } @@ -108,9 +124,100 @@ func Bootstrap(id peer.ID, host host.Host, rt routing.Routing, cfg BootstrapConf doneWithRound <- struct{}{} close(doneWithRound) // it no longer blocks periodic + + startSavePeersAsTemporaryBootstrapProc(cfg, host, proc) + return proc, nil } +// Aside of the main bootstrap process we also run a secondary one that saves +// connected peers as a backup measure if we can't connect to the official +// bootstrap ones. These peers will serve as *temporary* bootstrap nodes. +func startSavePeersAsTemporaryBootstrapProc(cfg BootstrapConfig, host host.Host, bootstrapProc goprocess.Process) { + savePeersFn := func(worker goprocess.Process) { + ctx := goprocessctx.OnClosingContext(worker) + + if err := saveConnectedPeersAsTemporaryBootstrap(ctx, host, cfg); err != nil { + log.Debugf("saveConnectedPeersAsTemporaryBootstrap error: %s", err) + } + } + savePeersProc := periodicproc.Tick(cfg.BackupBootstrapInterval, savePeersFn) + + // When the main bootstrap process ends also terminate the 'save connected + // peers' ones. Coupling the two seems the easiest way to handle this backup + // process without additional complexity. + go func() { + <-bootstrapProc.Closing() + savePeersProc.Close() + }() + + // Run the first round now (after the first bootstrap process has finished) + // as the SavePeersPeriod can be much longer than bootstrap. + savePeersProc.Go(savePeersFn) +} + +func saveConnectedPeersAsTemporaryBootstrap(ctx context.Context, host host.Host, cfg BootstrapConfig) error { + // Randomize the list of connected peers, we don't prioritize anyone. + connectedPeers := randomizeList(host.Network().Peers()) + + bootstrapPeers := cfg.BootstrapPeers() + backupPeers := make([]peer.AddrInfo, 0, cfg.MaxBackupBootstrapSize) + + // Choose peers to save and filter out the ones that are already bootstrap nodes. + for _, p := range connectedPeers { + found := false + for _, bootstrapPeer := range bootstrapPeers { + if p == bootstrapPeer.ID { + found = true + break + } + } + if !found { + backupPeers = append(backupPeers, peer.AddrInfo{ + ID: p, + Addrs: host.Network().Peerstore().Addrs(p), + }) + } + + if len(backupPeers) >= cfg.MaxBackupBootstrapSize { + break + } + } + + // If we didn't reach the target number use previously stored connected peers. + if len(backupPeers) < cfg.MaxBackupBootstrapSize { + oldSavedPeers := cfg.LoadBackupBootstrapPeers(ctx) + log.Debugf("missing %d peers to reach backup bootstrap target of %d, trying from previous list of %d saved peers", + cfg.MaxBackupBootstrapSize-len(backupPeers), cfg.MaxBackupBootstrapSize, len(oldSavedPeers)) + + // Add some of the old saved peers. Ensure we don't duplicate them. + for _, p := range oldSavedPeers { + found := false + for _, sp := range backupPeers { + if p.ID == sp.ID { + found = true + break + } + } + + if !found { + backupPeers = append(backupPeers, p) + } + + if len(backupPeers) >= cfg.MaxBackupBootstrapSize { + break + } + } + } + + cfg.SaveBackupBootstrapPeers(ctx, backupPeers) + log.Debugf("saved %d peers (of %d target) as bootstrap backup in the config", len(backupPeers), cfg.MaxBackupBootstrapSize) + return nil +} + +// Connect to as many peers needed to reach the BootstrapConfig.MinPeerThreshold. +// Peers can be original bootstrap or temporary ones (drawn from a list of +// persisted previously connected peers). func bootstrapRound(ctx context.Context, host host.Host, cfg BootstrapConfig) error { ctx, cancel := context.WithTimeout(ctx, cfg.ConnectionTimeout) @@ -127,35 +234,58 @@ func bootstrapRound(ctx context.Context, host host.Host, cfg BootstrapConfig) er id, len(connected), cfg.MinPeerThreshold) return nil } - numToDial := cfg.MinPeerThreshold - len(connected) + numToDial := cfg.MinPeerThreshold - len(connected) // numToDial > 0 - // filter out bootstrap nodes we are already connected to - var notConnected []peer.AddrInfo - for _, p := range peers { - if host.Network().Connectedness(p.ID) != network.Connected { - notConnected = append(notConnected, p) + if len(peers) > 0 { + numToDial -= int(peersConnect(ctx, host, peers, numToDial, true)) + if numToDial <= 0 { + return nil } } - // if connected to all bootstrap peer candidates, exit - if len(notConnected) < 1 { - log.Debugf("%s no more bootstrap peers to create %d connections", id, numToDial) - return ErrNotEnoughBootstrapPeers + log.Debugf("not enough bootstrap peers to fill the remaining target of %d connections, trying backup list", numToDial) + + tempBootstrapPeers := cfg.LoadBackupBootstrapPeers(ctx) + if len(tempBootstrapPeers) > 0 { + numToDial -= int(peersConnect(ctx, host, tempBootstrapPeers, numToDial, false)) + if numToDial <= 0 { + return nil + } } - // connect to a random susbset of bootstrap candidates - randSubset := randomSubsetOfPeers(notConnected, numToDial) + log.Debugf("tried both original bootstrap peers and temporary ones but still missing target of %d connections", numToDial) - log.Debugf("%s bootstrapping to %d nodes: %s", id, numToDial, randSubset) - return bootstrapConnect(ctx, host, randSubset) + return ErrNotEnoughBootstrapPeers } -func bootstrapConnect(ctx context.Context, ph host.Host, peers []peer.AddrInfo) error { - if len(peers) < 1 { - return ErrNotEnoughBootstrapPeers - } +// Attempt to make `needed` connections from the `availablePeers` list. Mark +// peers as either `permanent` or temporary when adding them to the Peerstore. +// Return the number of connections completed. We eagerly over-connect in parallel, +// so we might connect to more than needed. +// (We spawn as many routines and attempt connections as the number of availablePeers, +// but this list comes from restricted sets of original or temporary bootstrap +// nodes which will keep it under a sane value.) +func peersConnect(ctx context.Context, ph host.Host, availablePeers []peer.AddrInfo, needed int, permanent bool) uint64 { + peers := randomizeList(availablePeers) + + // Monitor the number of connections and stop if we reach the target. + var connected uint64 + ctx, cancel := context.WithCancel(ctx) + defer cancel() + go func() { + for { + select { + case <-ctx.Done(): + return + case <-time.After(1 * time.Second): + if int(atomic.LoadUint64(&connected)) >= needed { + cancel() + return + } + } + } + }() - errs := make(chan error, len(peers)) var wg sync.WaitGroup for _, p := range peers { @@ -164,45 +294,46 @@ func bootstrapConnect(ctx context.Context, ph host.Host, peers []peer.AddrInfo) // fail/abort due to an expiring context. // Also, performed asynchronously for dial speed. + if int(atomic.LoadUint64(&connected)) >= needed { + cancel() + break + } + wg.Add(1) go func(p peer.AddrInfo) { defer wg.Done() + + // Skip addresses belonging to a peer we're already connected to. + // (Not a guarantee but a best-effort policy.) + if ph.Network().Connectedness(p.ID) == network.Connected { + return + } log.Debugf("%s bootstrapping to %s", ph.ID(), p.ID) - ph.Peerstore().AddAddrs(p.ID, p.Addrs, peerstore.PermanentAddrTTL) if err := ph.Connect(ctx, p); err != nil { - log.Debugf("failed to bootstrap with %v: %s", p.ID, err) - errs <- err + if ctx.Err() != context.Canceled { + log.Debugf("failed to bootstrap with %v: %s", p.ID, err) + } return } + if permanent { + // We're connecting to an original bootstrap peer, mark it as + // a permanent address (Connect will register it as TempAddrTTL). + ph.Peerstore().AddAddrs(p.ID, p.Addrs, peerstore.PermanentAddrTTL) + } + log.Infof("bootstrapped with %v", p.ID) + atomic.AddUint64(&connected, 1) }(p) } wg.Wait() - // our failure condition is when no connection attempt succeeded. - // So drain the errs channel, counting the results. - close(errs) - count := 0 - var err error - for err = range errs { - if err != nil { - count++ - } - } - if count == len(peers) { - return fmt.Errorf("failed to bootstrap. %s", err) - } - return nil + return connected } -func randomSubsetOfPeers(in []peer.AddrInfo, max int) []peer.AddrInfo { - if max > len(in) { - max = len(in) - } - - out := make([]peer.AddrInfo, max) - for i, val := range rand.Perm(len(in))[:max] { +func randomizeList[T any](in []T) []T { + out := make([]T, len(in)) + for i, val := range rand.Perm(len(in)) { out[i] = in[val] } return out diff --git a/core/bootstrap/bootstrap_test.go b/core/bootstrap/bootstrap_test.go index 98a4a78272c..39490a474c7 100644 --- a/core/bootstrap/bootstrap_test.go +++ b/core/bootstrap/bootstrap_test.go @@ -7,9 +7,9 @@ import ( "github.com/libp2p/go-libp2p/core/test" ) -func TestSubsetWhenMaxIsGreaterThanLengthOfSlice(t *testing.T) { +func TestRandomizeAddressList(t *testing.T) { var ps []peer.AddrInfo - sizeofSlice := 100 + sizeofSlice := 10 for i := 0; i < sizeofSlice; i++ { pid, err := test.RandPeerID() if err != nil { @@ -18,7 +18,7 @@ func TestSubsetWhenMaxIsGreaterThanLengthOfSlice(t *testing.T) { ps = append(ps, peer.AddrInfo{ID: pid}) } - out := randomSubsetOfPeers(ps, 2*sizeofSlice) + out := randomizeList(ps) if len(out) != len(ps) { t.Fail() } diff --git a/core/core.go b/core/core.go index 448386444fd..5c0a7ef2cba 100644 --- a/core/core.go +++ b/core/core.go @@ -11,10 +11,13 @@ package core import ( "context" + "encoding/json" "io" + "time" "github.com/ipfs/boxo/filestore" pin "github.com/ipfs/boxo/pinning/pinner" + "github.com/ipfs/go-datastore" bserv "github.com/ipfs/boxo/blockservice" bstore "github.com/ipfs/boxo/blockstore" @@ -46,6 +49,7 @@ import ( "github.com/ipfs/boxo/namesys" ipnsrp "github.com/ipfs/boxo/namesys/republisher" + "github.com/ipfs/kubo/config" "github.com/ipfs/kubo/core/bootstrap" "github.com/ipfs/kubo/core/node" "github.com/ipfs/kubo/core/node/libp2p" @@ -165,12 +169,40 @@ func (n *IpfsNode) Bootstrap(cfg bootstrap.BootstrapConfig) error { return ps } } + if cfg.SaveBackupBootstrapPeers == nil { + cfg.SaveBackupBootstrapPeers = func(ctx context.Context, peerList []peer.AddrInfo) { + err := n.saveTempBootstrapPeers(ctx, peerList) + if err != nil { + log.Warnf("saveTempBootstrapPeers failed: %s", err) + return + } + } + } + if cfg.LoadBackupBootstrapPeers == nil { + cfg.LoadBackupBootstrapPeers = func(ctx context.Context) []peer.AddrInfo { + peerList, err := n.loadTempBootstrapPeers(ctx) + if err != nil { + log.Warnf("loadTempBootstrapPeers failed: %s", err) + return nil + } + return peerList + } + } + + repoConf, err := n.Repo.Config() + if err != nil { + return err + } + if repoConf.Internal.BackupBootstrapInterval != nil { + cfg.BackupBootstrapInterval = repoConf.Internal.BackupBootstrapInterval.WithDefault(time.Hour) + } - var err error n.Bootstrapper, err = bootstrap.Bootstrap(n.Identity, n.PeerHost, n.Routing, cfg) return err } +var TempBootstrapPeersKey = datastore.NewKey("/local/temp_bootstrap_peers") + func (n *IpfsNode) loadBootstrapPeers() ([]peer.AddrInfo, error) { cfg, err := n.Repo.Config() if err != nil { @@ -180,6 +212,33 @@ func (n *IpfsNode) loadBootstrapPeers() ([]peer.AddrInfo, error) { return cfg.BootstrapPeers() } +func (n *IpfsNode) saveTempBootstrapPeers(ctx context.Context, peerList []peer.AddrInfo) error { + ds := n.Repo.Datastore() + bytes, err := json.Marshal(config.BootstrapPeerStrings(peerList)) + if err != nil { + return err + } + + if err := ds.Put(ctx, TempBootstrapPeersKey, bytes); err != nil { + return err + } + return ds.Sync(ctx, TempBootstrapPeersKey) +} + +func (n *IpfsNode) loadTempBootstrapPeers(ctx context.Context) ([]peer.AddrInfo, error) { + ds := n.Repo.Datastore() + bytes, err := ds.Get(ctx, TempBootstrapPeersKey) + if err != nil { + return nil, err + } + + var addrs []string + if err := json.Unmarshal(bytes, &addrs); err != nil { + return nil, err + } + return config.ParseBootstrapPeers(addrs) +} + type ConstructPeerHostOpts struct { AddrsFactory p2pbhost.AddrsFactory DisableNatPortMap bool diff --git a/docs/changelogs/v0.21.md b/docs/changelogs/v0.21.md index 57eb7002bad..b4af6168b3b 100644 --- a/docs/changelogs/v0.21.md +++ b/docs/changelogs/v0.21.md @@ -6,6 +6,7 @@ - [Overview](#overview) - [๐Ÿ”ฆ Highlights](#-highlights) + - [Saving previously seen nodes for later bootstrapping](#saving-previously-seen-nodes-for-later-bootstrapping) - [๐Ÿ“ Changelog](#-changelog) - [๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ Contributors](#-contributors) @@ -13,6 +14,21 @@ ### ๐Ÿ”ฆ Highlights +#### Saving previously seen nodes for later bootstrapping + +Kubo now stores a subset of connected peers as backup bootstrap nodes ([kubo#8856](https://github.com/ipfs/kubo/pull/8856)). +These nodes are used in addition to the explicitly defined bootstrappers in the +[`Bootstrap`](https://github.com/ipfs/kubo/blob/master/docs/config.md#bootstrap) configuration. + +This enhancement improves the resiliency of the system, as it eliminates the +necessity of relying solely on the default bootstrappers operated by Protocol +Labs for joining the public IPFS swarm. Previously, this level of robustness +was only available in LAN contexts with [mDNS peer discovery](https://github.com/ipfs/kubo/blob/master/docs/config.md#discoverymdns) +enabled. + +With this update, the same level of robustness is applied to peers that lack +mDNS peers and solely rely on the public DHT. + ### ๐Ÿ“ Changelog ### ๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ Contributors diff --git a/test/cli/backup_bootstrap_test.go b/test/cli/backup_bootstrap_test.go new file mode 100644 index 00000000000..017499f3d63 --- /dev/null +++ b/test/cli/backup_bootstrap_test.go @@ -0,0 +1,60 @@ +package cli + +import ( + "fmt" + "testing" + "time" + + "github.com/ipfs/kubo/config" + "github.com/ipfs/kubo/test/cli/harness" + "github.com/stretchr/testify/assert" +) + +func TestBackupBootstrapPeers(t *testing.T) { + nodes := harness.NewT(t).NewNodes(3).Init() + nodes.ForEachPar(func(n *harness.Node) { + n.UpdateConfig(func(cfg *config.Config) { + cfg.Bootstrap = []string{} + cfg.Addresses.Swarm = []string{fmt.Sprintf("/ip4/127.0.0.1/tcp/%d", harness.NewRandPort())} + cfg.Discovery.MDNS.Enabled = false + cfg.Internal.BackupBootstrapInterval = config.NewOptionalDuration(250 * time.Millisecond) + }) + }) + + // Start all nodes and ensure they all have no peers. + nodes.StartDaemons() + nodes.ForEachPar(func(n *harness.Node) { + assert.Len(t, n.Peers(), 0) + }) + + // Connect nodes 0 and 1, ensure they know each other. + nodes[0].Connect(nodes[1]) + assert.Len(t, nodes[0].Peers(), 1) + assert.Len(t, nodes[1].Peers(), 1) + assert.Len(t, nodes[2].Peers(), 0) + + // Wait a bit to ensure that 0 and 1 saved their temporary bootstrap backups. + time.Sleep(time.Millisecond * 500) + nodes.StopDaemons() + + // Start 1 and 2. 2 does not know anyone yet. + nodes[1].StartDaemon() + nodes[2].StartDaemon() + assert.Len(t, nodes[1].Peers(), 0) + assert.Len(t, nodes[2].Peers(), 0) + + // Connect 1 and 2, ensure they know each other. + nodes[1].Connect(nodes[2]) + assert.Len(t, nodes[1].Peers(), 1) + assert.Len(t, nodes[2].Peers(), 1) + + // Start 0, wait a bit. Should connect to 1, and then discover 2 via the + // backup bootstrap peers. + nodes[0].StartDaemon() + time.Sleep(time.Millisecond * 500) + + // Check if they're all connected. + assert.Len(t, nodes[0].Peers(), 2) + assert.Len(t, nodes[1].Peers(), 2) + assert.Len(t, nodes[2].Peers(), 2) +}