diff --git a/acceptance/cluster/localcluster.go b/acceptance/cluster/localcluster.go index 782c044f18de..f261381785ea 100644 --- a/acceptance/cluster/localcluster.go +++ b/acceptance/cluster/localcluster.go @@ -233,17 +233,10 @@ func (l *LocalCluster) initCluster() { } } create := func() (*Container, error) { - var entrypoint []string - if *cockroachImage == builderImage { - entrypoint = append(entrypoint, "/"+filepath.Base(*cockroachBinary)) - } else if *cockroachEntry != "" { - entrypoint = append(entrypoint, *cockroachEntry) - } return createContainer(l, dockerclient.ContainerConfig{ Image: *cockroachImage, Volumes: vols, - Entrypoint: entrypoint, - Cmd: []string{"init", "--stores=ssd=" + dataStr(0, 0)}, + Entrypoint: []string{"/bin/true"}, }) } c, err := create() @@ -351,11 +344,6 @@ func (l *LocalCluster) createNodeCerts() { } func (l *LocalCluster) startNode(i int) *Container { - gossipNodes := []string{} - for j := 0; j < l.numLocal; j++ { - gossipNodes = append(gossipNodes, fmt.Sprintf("%s:%d", nodeStr(j), cockroachPort)) - } - var stores = "ssd=" + dataStr(i, 0) for j := 1; j < l.numStores; j++ { stores += ",ssd=" + dataStr(i, j) @@ -366,9 +354,13 @@ func (l *LocalCluster) startNode(i int) *Container { "--stores=" + stores, "--certs=/certs", "--addr=" + fmt.Sprintf("%s:%d", nodeStr(i), cockroachPort), - "--gossip=" + strings.Join(gossipNodes, ","), "--scan-max-idle-time=200ms", // set low to speed up tests } + // Append --join flag for all nodes except first. + if i > 0 { + cmd = append(cmd, fmt.Sprintf("--join=%s:%d", nodeStr(0), cockroachPort)) + } + var locallogDir string if len(l.logDir) > 0 { dockerlogDir := "/logs/" + nodeStr(i) diff --git a/cli/cert.go b/cli/cert.go index 3e3f296a0b30..0cdc4a88701d 100644 --- a/cli/cert.go +++ b/cli/cert.go @@ -17,8 +17,9 @@ package cli import ( + "fmt" + "github.com/cockroachdb/cockroach/security" - "github.com/cockroachdb/cockroach/util" "github.com/spf13/cobra" ) @@ -44,7 +45,7 @@ individual files in the directory specified by --certs (required). // to their corresponding files. func runCreateCACert(cmd *cobra.Command, args []string) error { if err := security.RunCreateCACert(context.Certs, keySize); err != nil { - return util.Errorf("failed to generate CA certificate: %s", err) + return fmt.Errorf("failed to generate CA certificate: %s", err) } return nil } @@ -68,7 +69,7 @@ At least one host should be passed in (either IP address of dns name). // to their corresponding files. func runCreateNodeCert(cmd *cobra.Command, args []string) error { if err := security.RunCreateNodeCert(context.Certs, keySize, args); err != nil { - return util.Errorf("failed to generate node certificate: %s", err) + return fmt.Errorf("failed to generate node certificate: %s", err) } return nil } @@ -95,7 +96,7 @@ func runCreateClientCert(cmd *cobra.Command, args []string) error { return errMissingParams } if err := security.RunCreateClientCert(context.Certs, keySize, args[0]); err != nil { - return util.Errorf("failed to generate clent certificate: %s", err) + return fmt.Errorf("failed to generate clent certificate: %s", err) } return nil } diff --git a/cli/cli.go b/cli/cli.go index a169d07f4a4d..2f6ca53c82f0 100644 --- a/cli/cli.go +++ b/cli/cli.go @@ -57,7 +57,6 @@ var cockroachCmd = &cobra.Command{ func init() { cockroachCmd.AddCommand( - initCmd, startCmd, certCmd, exterminateCmd, diff --git a/cli/cli_test.go b/cli/cli_test.go index 443d757dfd8a..06340771fc6a 100644 --- a/cli/cli_test.go +++ b/cli/cli_test.go @@ -548,8 +548,7 @@ func TestFlagUsage(t *testing.T) { cockroach [command] Available Commands: - init init new Cockroach cluster - start start a node by joining the gossip network + start start a node cert create ca, node, and client certs exterminate destroy all data held by the node quit drain and shutdown node diff --git a/cli/flags.go b/cli/flags.go index 598f92db62f1..244ea0ad1d24 100644 --- a/cli/flags.go +++ b/cli/flags.go @@ -69,18 +69,14 @@ var flagUsage = map[string]string{ Directory containing RSA key and x509 certs. This flag is required if --insecure=false. `, - "gossip": ` - A comma-separated list of gossip addresses or resolvers for gossip - bootstrap. Each item in the list has an optional type: + "join": ` + A comma-separated list of addresses to use when a new node is joining + an existing cluster. Each address in the list has an optional type: [type=]
. An unspecified type means ip address or dns. Type is one of: - tcp: (default if type is omitted): plain ip address or hostname. - - unix: unix socket - - lb: RPC load balancer forwarding to an arbitrary node - http-lb: HTTP load balancer: we query http(s)://
/_status/details/local - - self: for single node systems, specify --gossip=self (the -
is omitted). `, "key-size": ` Key size in bits for CA/Node/Client certificates. @@ -125,8 +121,8 @@ var flagUsage = map[string]string{ clusters to be more responsive. `, "time-until-store-dead": ` - Adjusts the timeout for stores. If there's been no gossiped updated - from a store after this time, the store is considered unavailable. + Adjusts the timeout for stores. If there's been no communication from + a store in this time interval, the store is considered unavailable. Replicas on an unavailable store will be moved to available ones. `, "stores": ` @@ -168,14 +164,6 @@ func initFlags(ctx *server.Context) { pf.Var(pflagValue{f.Value}, normalizeStdFlagName(f.Name), f.Usage) }) - { - f := initCmd.Flags() - f.StringVar(&ctx.Stores, "stores", ctx.Stores, flagUsage["stores"]) - if err := initCmd.MarkFlagRequired("stores"); err != nil { - panic(err) - } - } - { f := startCmd.Flags() f.BoolVar(&ctx.EphemeralSingleNode, "dev", ctx.EphemeralSingleNode, flagUsage["dev"]) @@ -193,8 +181,8 @@ func initFlags(ctx *server.Context) { f.StringVar(&ctx.Certs, "certs", ctx.Certs, flagUsage["certs"]) f.BoolVar(&ctx.Insecure, "insecure", ctx.Insecure, flagUsage["insecure"]) - // Gossip flags. - f.StringVar(&ctx.GossipBootstrap, "gossip", ctx.GossipBootstrap, flagUsage["gossip"]) + // Cluster joining flags. + f.StringVar(&ctx.JoinUsing, "join", ctx.JoinUsing, flagUsage["join"]) // KV flags. f.BoolVar(&ctx.Linearizable, "linearizable", ctx.Linearizable, flagUsage["linearizable"]) @@ -206,9 +194,6 @@ func initFlags(ctx *server.Context) { f.DurationVar(&ctx.ScanMaxIdleTime, "scan-max-idle-time", ctx.ScanMaxIdleTime, flagUsage["scan-max-idle-time"]) f.DurationVar(&ctx.TimeUntilStoreDead, "time-until-store-dead", ctx.TimeUntilStoreDead, flagUsage["time-until-store-dead"]) - if err := startCmd.MarkFlagRequired("gossip"); err != nil { - panic(err) - } if err := startCmd.MarkFlagRequired("stores"); err != nil { panic(err) } diff --git a/cli/start.go b/cli/start.go index c7f4dac71474..f4d6adc800f4 100644 --- a/cli/start.go +++ b/cli/start.go @@ -34,7 +34,6 @@ import ( "github.com/cockroachdb/cockroach/util" "github.com/cockroachdb/cockroach/util/log" "github.com/cockroachdb/cockroach/util/stop" - "github.com/cockroachdb/cockroach/util/uuid" "github.com/spf13/cobra" ) @@ -77,77 +76,29 @@ func getJSON(hostport, path string, v interface{}) error { return util.GetJSON(httpClient, context.HTTPRequestScheme(), hostport, path, v) } -// initCmd command initializes a new Cockroach cluster. -var initCmd = &cobra.Command{ - Use: "init --stores=...", - Short: "init new Cockroach cluster", - Long: ` -Initialize a new Cockroach cluster using the --stores flag to specify one or -more storage locations. The first of these storage locations is used to -bootstrap the first replica of the first range. If any of the storage locations -are already part of a pre-existing cluster, the bootstrap will fail. -`, - Example: ` cockroach init --stores=ssd=/mnt/ssd1,ssd=/mnt/ssd2`, - SilenceUsage: true, - RunE: runInit, -} - -// runInit initializes the engine based on the first -// store. The bootstrap engine may not be an in-memory type. -func runInit(_ *cobra.Command, _ []string) error { - stopper := stop.NewStopper() - defer stopper.Stop() - - // Default user for servers. - context.User = security.NodeUser - - if err := context.InitStores(stopper); err != nil { - return util.Errorf("failed to initialize stores: %s", err) - } - if len(context.Engines) > 1 { - return util.Errorf("cannot bootstrap to more than one store") - } - - return initCluster(stopper) -} - -func initCluster(stopper *stop.Stopper) error { - // Generate a new UUID for cluster ID and bootstrap the cluster. - clusterID := uuid.NewUUID4().String() - if _, err := server.BootstrapCluster(clusterID, context.Engines, stopper); err != nil { - return util.Errorf("unable to bootstrap cluster: %s", err) - } - - log.Infof("cockroach cluster %s has been initialized", clusterID) - return nil -} - -// startCmd command starts nodes by joining the gossip network. +// startCmd starts a node by initializing the stores and joining +// the cluster. var startCmd = &cobra.Command{ Use: "start", - Short: "start a node by joining the gossip network", + Short: "start a node", Long: ` -Start a Cockroach node by joining the gossip network and exporting key ranges -stored on physical device(s). The gossip network is joined by contacting one or -more well-known hosts specified by the --gossip flag. Every node should be run -with the same list of bootstrap hosts to guarantee a connected network. An -alternate approach is to use a single host for --gossip and round-robin DNS. +Start a CockroachDB node, which will export data from one or more +storage devices, specified via the --stores flag. -Each node exports data from one or more physical devices. These devices are -specified via the --stores flag. This is a comma-separated list of paths to -storage directories or for in-memory stores, the number of bytes. Although the -paths should be specified to correspond uniquely to physical devices, this -requirement isn't strictly enforced. See the --stores flag help description for -additional details.`, - Example: ` cockroach start --certs= --gossip=host1:port1[,...] --stores=ssd=/mnt/ssd1,...`, +If no cluster exists yet and this is the first node, no additional +flags are required. If the cluster already exists, and this node is +uninitialized, specify the --join flag to point to any healhty node +(or list of nodes) already part of the cluster. +`, + Example: ` cockroach start --certs= --stores=ssd=/mnt/ssd1,... [--join=host:port,[host:port]]`, SilenceUsage: true, RunE: runStart, } // runStart starts the cockroach node using --stores as the list of -// storage devices ("stores") on this machine and --gossip as the list -// of "well-known" hosts used to join this node to the cockroach -// cluster via the gossip network. +// storage devices ("stores") on this machine and --join as the list +// of other active nodes used to join this node to the cockroach +// cluster, if this is its first time connecting. func runStart(_ *cobra.Command, _ []string) error { info := util.GetBuildInfo() log.Infof("[build] %s @ %s (%s)", info.Tag, info.Time, info.Vers) @@ -160,36 +111,25 @@ func runStart(_ *cobra.Command, _ []string) error { // for the default cluster-wide zone config. config.DefaultZoneConfig.ReplicaAttrs = []roachpb.Attributes{{}} context.Stores = "mem=1073741824" - context.GossipBootstrap = server.SelfGossipAddr } stopper := stop.NewStopper() if err := context.InitStores(stopper); err != nil { - return util.Errorf("failed to initialize stores: %s", err) - } - - if context.EphemeralSingleNode { - // A separate stopper for bootstrapping so that we can properly shutdown - // all of the stores. - initStopper := stop.NewStopper() - if err := initCluster(initStopper); err != nil { - return err - } - initStopper.Stop() + return fmt.Errorf("failed to initialize stores: %s", err) } if err := context.InitNode(); err != nil { - return util.Errorf("failed to initialize node: %s", err) + return fmt.Errorf("failed to initialize node: %s", err) } log.Info("starting cockroach node") s, err := server.NewServer(context, stopper) if err != nil { - return util.Errorf("failed to start Cockroach server: %s", err) + return fmt.Errorf("failed to start Cockroach server: %s", err) } - if err := s.Start(false); err != nil { - return util.Errorf("cockroach server exited with error: %s", err) + if err := s.Start(); err != nil { + return fmt.Errorf("cockroach server exited with error: %s", err) } signalCh := make(chan os.Signal, 1) diff --git a/client/client_test.go b/client/client_test.go index 9100ebd78474..33618bb533dc 100644 --- a/client/client_test.go +++ b/client/client_test.go @@ -683,7 +683,6 @@ func setupClientBenchData(useSSL bool, numVersions, numKeys int, b *testing.B) ( s := &server.TestServer{} s.Ctx = server.NewTestContext() - s.SkipBootstrap = exists if !useSSL { s.Ctx.Insecure = true } diff --git a/gossip/client.go b/gossip/client.go index f4a11783c732..351ad0a28f59 100644 --- a/gossip/client.go +++ b/gossip/client.go @@ -188,7 +188,7 @@ func (c *client) handleGossip(g *Gossip, call *netrpc.Call) error { return util.Errorf("received forward from node %d to %d (%s)", reply.NodeID, reply.AlternateNodeID, reply.AlternateAddr) } - // If we have the sentinel gossip, we're considered connected. + // If we have the sentinel gossip we're considered connected. g.checkHasConnected() // Check whether this outgoing client is duplicating work already diff --git a/gossip/gossip.go b/gossip/gossip.go index c0d7ab17ff24..48173c90484f 100644 --- a/gossip/gossip.go +++ b/gossip/gossip.go @@ -262,12 +262,11 @@ func (g *Gossip) SetStorage(storage Storage) error { var storedBI BootstrapInfo err := storage.ReadBootstrapInfo(&storedBI) if err != nil { - log.Warningf("failed to read bootstrap info: %s", err) + log.Warningf("failed to read gossip bootstrap info: %s", err) } - log.Infof("read %d gossip host(s) for bootstrapping from persistent storage", len(storedBI.Addresses)) // Merge the stored bootstrap info addresses with any we've become - // aware of through the --gossip bootstrap hosts we've connected to. + // aware of through the --join bootstrap hosts we've connected to. if len(g.bootstrapInfo.Addresses) > 0 { existing := map[string]struct{}{} makeKey := func(a util.UnresolvedAddr) string { return fmt.Sprintf("%s,%s", a.Network(), a.String()) } @@ -283,8 +282,6 @@ func (g *Gossip) SetStorage(storage Storage) error { if numAddrs := len(g.bootstrapInfo.Addresses); numAddrs > len(storedBI.Addresses) { if err := g.storage.WriteBootstrapInfo(&g.bootstrapInfo); err != nil { log.Error(err) - } else { - log.Infof("wrote %d merged gossip host(s) to persistent storage", numAddrs) } } } else { @@ -297,7 +294,7 @@ func (g *Gossip) SetStorage(storage Storage) error { for _, addr := range g.bootstrapInfo.Addresses { r, err := resolver.NewResolverFromUnresolvedAddr(addr) if err != nil { - log.Warningf("bad bootstrap address %s: %s", addr, err) + log.Warningf("bad node address %s: %s", addr, err) continue } if g.haveResolver(r) { @@ -312,9 +309,10 @@ func (g *Gossip) SetStorage(storage Storage) error { g.resolvers = append(g.resolvers, r) } - // If there are no resolvers after persistent storage has been queried, fatal error. + // If there are no resolvers even after merging known nodes from + // persistent storage, we need to error out. if len(g.resolvers) == 0 { - return fmt.Errorf("no resolvers specified for gossip network: try adding peers via --gossip") + return fmt.Errorf("no nodes were found to connect to cluster; use --join") } // If a new resolver was found, immediately signal bootstrap. @@ -338,6 +336,14 @@ func (g *Gossip) SetResolvers(resolvers []resolver.Resolver) { g.resolversTried = map[int]struct{}{} } +// HasResolvers returns true if this gossip instance has any +// configured resolvers. +func (g *Gossip) HasResolvers() bool { + g.mu.Lock() + defer g.mu.Unlock() + return len(g.resolvers) > 0 +} + // GetNodeIDAddress looks up the address of the node by ID. func (g *Gossip) GetNodeIDAddress(nodeID roachpb.NodeID) (net.Addr, error) { g.mu.Lock() @@ -381,6 +387,7 @@ func (g *Gossip) SimulationCycle() { // haveResolver returns whether the specified resolver is already in // the gossip node's list of resolvers. The caller must hold the // gossip mutex. +// TODO(spencer): sort and binary search. func (g *Gossip) haveResolver(r resolver.Resolver) bool { for _, ex := range g.resolvers { if ex.Type() == r.Type() && ex.Addr() == r.Addr() { @@ -390,6 +397,19 @@ func (g *Gossip) haveResolver(r resolver.Resolver) bool { return false } +// haveBootstrapAddress returns whether there is already a bootstrap +// address matching the specified address. The caller must hold the +// gossip mutex. +// TODO(spencer): sort and binary search. +func (g *Gossip) haveBootstrapAddress(addr util.UnresolvedAddr) bool { + for _, ex := range g.bootstrapInfo.Addresses { + if ex == addr { + return true + } + } + return false +} + // maxPeers returns the maximum number of peers each gossip node // may connect to. This is based on maxHops, which is a preset // maximum for number of hops allowed before the gossip network @@ -442,17 +462,18 @@ func (g *Gossip) updateNodeAddress(_ string, content roachpb.Value) { log.Warningf("bad address from gossip node %s: %s", desc, err) return } - if g.haveResolver(r) { - return + if !g.haveResolver(r) { + g.resolvers = append(g.resolvers, r) } - g.resolvers = append(g.resolvers, r) // Add new address to bootstrap info and persist if possible. - g.bootstrapInfo.Addresses = append(g.bootstrapInfo.Addresses, desc.Address) - if g.storage != nil { - // TODO(spencer): need to clean up ancient gossip nodes, which - // will otherwise stick around in the bootstrap info forever. - if err := g.storage.WriteBootstrapInfo(&g.bootstrapInfo); err != nil { - log.Error(err) + if !g.haveBootstrapAddress(desc.Address) { + g.bootstrapInfo.Addresses = append(g.bootstrapInfo.Addresses, desc.Address) + if g.storage != nil { + // TODO(spencer): need to clean up ancient gossip nodes, which + // will otherwise stick around in the bootstrap info forever. + if err := g.storage.WriteBootstrapInfo(&g.bootstrapInfo); err != nil { + log.Error(err) + } } } } @@ -653,8 +674,9 @@ func (g *Gossip) MaxHops() uint32 { } // Start launches the gossip instance, which commences joining the -// gossip network using the supplied rpc server and the gossip -// bootstrap addresses specified via command-line flag: --gossip. +// gossip network using the supplied rpc server and previously known +// peer addresses in addition to any bootstrap addresses specified via +// --join. // // The supplied address is used to identify the gossip instance in the // gossip network; it will be used by other instances to connect to @@ -878,13 +900,13 @@ func (g *Gossip) signalStalled() { // not being connected to the sentinel. This could happen in a network // partition, or because of misconfiguration. It's impossible to tell, // but we can warn appropriately. If there are no incoming or outgoing -// connections, we warn about the --gossip flag being set. If we've +// connections, we warn about the --join flag being set. If we've // connected, and all resolvers have been tried, we warn about either // the first range not being available or else possible the cluster // never having been initialized. func (g *Gossip) warnAboutStall() { if g.outgoing.len()+g.incoming.len() == 0 { - log.Warningf("not connected to gossip; check that gossip flag is set appropriately") + log.Warningf("not connected to cluster; check --join specifies another active node") } else if len(g.resolversTried) == len(g.resolvers) { log.Warningf("first range unavailable or cluster not initialized") } else { diff --git a/gossip/gossip_test.go b/gossip/gossip_test.go index a9ae4635801b..47f21f6ce9ae 100644 --- a/gossip/gossip_test.go +++ b/gossip/gossip_test.go @@ -19,6 +19,9 @@ package gossip import ( "bytes" "errors" + "fmt" + "net/http" + "net/http/httptest" "testing" "time" @@ -29,7 +32,6 @@ import ( "github.com/cockroachdb/cockroach/util" "github.com/cockroachdb/cockroach/util/hlc" "github.com/cockroachdb/cockroach/util/leaktest" - "github.com/cockroachdb/cockroach/util/log" "github.com/cockroachdb/cockroach/util/stop" ) @@ -54,26 +56,35 @@ func TestGossipInfoStore(t *testing.T) { func TestGossipGetNextBootstrapAddress(t *testing.T) { defer leaktest.AfterTest(t) + + // Set up an http server for testing the http load balancer. + i := 0 + handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + i++ + fmt.Fprintf(w, `{"address": {"network": "tcp", "address": "10.10.0.%d:26257"}}`, i) + }) + s := httptest.NewServer(handler) + defer s.Close() + resolverSpecs := []string{ "127.0.0.1:9000", "tcp=127.0.0.1:9001", "unix=/tmp/unix-socket12345", - "lb=127.0.0.1:9002", + fmt.Sprintf("http-lb=%s", s.Listener.Addr()), "foo=127.0.0.1:9003", // error should not resolve. - "lb=", // error should not resolve. + "http-lb=", // error should not resolve. "localhost:9004", - "lb=127.0.0.1:9005", } resolvers := []resolver.Resolver{} for _, rs := range resolverSpecs { - resolver, err := resolver.NewResolver(&base.Context{}, rs) + resolver, err := resolver.NewResolver(&base.Context{Insecure: true}, rs) if err == nil { resolvers = append(resolvers, resolver) } } - if len(resolvers) != 6 { - t.Errorf("expected 6 resolvers; got %d", len(resolvers)) + if len(resolvers) != 5 { + t.Errorf("expected 5 resolvers; got %d", len(resolvers)) } g := New(nil, resolvers) @@ -83,16 +94,15 @@ func TestGossipGetNextBootstrapAddress(t *testing.T) { "127.0.0.1:9000", "127.0.0.1:9001", "/tmp/unix-socket12345", - "127.0.0.1:9002", + "10.10.0.1:26257", "localhost:9004", - "127.0.0.1:9005", - "127.0.0.1:9002", - "127.0.0.1:9005", - "127.0.0.1:9002", - "127.0.0.1:9005", + "10.10.0.2:26257", + "10.10.0.3:26257", + "10.10.0.4:26257", + "10.10.0.5:26257", + "10.10.0.6:26257", } for i := 0; i < len(expAddresses); i++ { - log.Infof("getting next address") addr := g.getNextBootstrapAddress() if addr == nil { t.Errorf("%d: unexpected nil addr when expecting %s", i, expAddresses[i]) diff --git a/gossip/resolver/node_lookup.go b/gossip/resolver/node_lookup.go index 9026251019f4..a87bfdfd2b36 100644 --- a/gossip/resolver/node_lookup.go +++ b/gossip/resolver/node_lookup.go @@ -25,7 +25,6 @@ import ( "github.com/cockroachdb/cockroach/base" "github.com/cockroachdb/cockroach/util" - "github.com/cockroachdb/cockroach/util/log" ) // nodeLookupResolver implements Resolver. @@ -33,10 +32,9 @@ import ( // address. This is useful for http load balancers which will not forward RPC. // It is never exhausted. type nodeLookupResolver struct { - context *base.Context - typ string - addr string - exhausted bool + context *base.Context + typ string + addr string // We need our own client so that we may specify timeouts. httpClient *http.Client } @@ -48,20 +46,7 @@ func (nl *nodeLookupResolver) Type() string { return nl.typ } func (nl *nodeLookupResolver) Addr() string { return nl.addr } // GetAddress returns a net.Addr or error. -// Upon errors, we set exhausted=true, then flip it back when called again. func (nl *nodeLookupResolver) GetAddress() (net.Addr, error) { - // TODO(marc): this is a bit of a hack to allow the server to start. - // In single-node setups, this resolver will never return anything since - // the status handlers are not serving yet. Instead, we specify multiple - // gossip addresses (--gossip=localhost,http-lb=lb). We need this one to - // be exhausted from time to time so that we have a chance to hit the fixed address. - // Remove once the status pages are served before we've established a connection to - // the gossip network. - if nl.exhausted { - nl.exhausted = false - return nil, util.Errorf("skipping temporarily-exhausted resolver") - } - if nl.httpClient == nil { tlsConfig, err := nl.context.GetClientTLSConfig() if err != nil { @@ -73,14 +58,12 @@ func (nl *nodeLookupResolver) GetAddress() (net.Addr, error) { } } - nl.exhausted = true // TODO(marc): put common URIs in base and reuse everywhere. url := fmt.Sprintf("%s://%s/_status/details/local", nl.context.HTTPRequestScheme(), nl.addr) req, err := http.NewRequest("GET", url, nil) if err != nil { return nil, err } - log.Infof("querying %s for gossip nodes", url) resp, err := nl.httpClient.Do(req) if err != nil { return nil, err @@ -106,8 +89,6 @@ func (nl *nodeLookupResolver) GetAddress() (net.Addr, error) { if err != nil { return nil, err } - nl.exhausted = false - log.Infof("found gossip node: %+v", addr) return addr, nil } @@ -122,6 +103,6 @@ func resolveAddress(network, address string) (net.Addr, error) { return nil, util.Errorf("unknown address type: %q", network) } -// IsExhausted returns whether the resolver can yield further -// addresses. -func (nl *nodeLookupResolver) IsExhausted() bool { return nl.exhausted } +// IsExhausted always returns true, as there's no way to know how many +// nodes are behind a load balancer. +func (nl *nodeLookupResolver) IsExhausted() bool { return true } diff --git a/gossip/resolver/resolver.go b/gossip/resolver/resolver.go index 6e8c17a5a360..b40c0dbc4869 100644 --- a/gossip/resolver/resolver.go +++ b/gossip/resolver/resolver.go @@ -35,7 +35,6 @@ type Resolver interface { var validTypes = map[string]struct{}{ "tcp": {}, - "lb": {}, "unix": {}, "http-lb": {}, } @@ -44,7 +43,6 @@ var validTypes = map[string]struct{}{ // A specification is of the form: [=]
// Network type can be one of: // - tcp: plain hostname of ip address -// - lb: load balancer host name or ip: points to an unknown number of backends // - unix: unix sockets // - http-lb: http load balancer: queries http(s):///_status/details/local // for node addresses diff --git a/gossip/resolver/resolver_test.go b/gossip/resolver/resolver_test.go index a4a4463ddc7c..9f4da25b6699 100644 --- a/gossip/resolver/resolver_test.go +++ b/gossip/resolver/resolver_test.go @@ -21,6 +21,7 @@ import ( "github.com/cockroachdb/cockroach/testutils" "github.com/cockroachdb/cockroach/util" + _ "github.com/cockroachdb/cockroach/util/log" // Needed to import --verbosity ) var nodeTestBaseContext = testutils.NewNodeTestBaseContext() @@ -39,7 +40,6 @@ func TestParseResolverSpec(t *testing.T) { {"127.0.0.1", true, "tcp", "127.0.0.1:26257"}, {"tcp=127.0.0.1", true, "tcp", "127.0.0.1:26257"}, {"tcp=127.0.0.1:23456", true, "tcp", "127.0.0.1:23456"}, - {"lb=127.0.0.1", true, "lb", "127.0.0.1:26257"}, {"unix=/tmp/unix-socket12345", true, "unix", "/tmp/unix-socket12345"}, {"http-lb=localhost:26257", true, "http-lb", "localhost:26257"}, {"http-lb=newhost:1234", true, "http-lb", "newhost:1234"}, @@ -47,7 +47,6 @@ func TestParseResolverSpec(t *testing.T) { {"http-lb=:", true, "http-lb", def}, {"", false, "", ""}, {"foo=127.0.0.1", false, "", ""}, - {"lb=", false, "", ""}, {"", false, "tcp", ""}, {":", true, "tcp", def}, {"tcp=", false, "tcp", ""}, @@ -83,9 +82,6 @@ func TestGetAddress(t *testing.T) { {"tcp=127.0.0.1", true, true, "tcp", "127.0.0.1:26257"}, {"tcp=localhost:80", true, true, "tcp", "localhost:80"}, // We should test unresolvable dns too, but this would be fragile. - {"lb=localhost:80", true, false, "tcp", "localhost:80"}, - {"lb=127.0.0.1:80", true, false, "tcp", "127.0.0.1:80"}, - {"lb=127.0.0.1", true, false, "tcp", "127.0.0.1:26257"}, {"unix=/tmp/foo", true, true, "unix", "/tmp/foo"}, } diff --git a/main.go b/main.go index 53376ed89664..bc25c5420815 100644 --- a/main.go +++ b/main.go @@ -35,7 +35,7 @@ func main() { os.Args = append(os.Args, "help") } if err := cli.Run(os.Args[1:]); err != nil { - fmt.Fprintf(os.Stderr, "Failed running command %q: %v\n", os.Args[1:], err) + fmt.Fprintf(os.Stderr, "Failed running %q\n", os.Args[1]) os.Exit(1) } } diff --git a/server/context.go b/server/context.go index 897548171be4..e717f7b32663 100644 --- a/server/context.go +++ b/server/context.go @@ -82,9 +82,9 @@ type Context struct { // Maximum clock offset for the cluster. MaxOffset time.Duration - // GossipBootstrap is a comma-separated list of node addresses that + // JoinUsing is a comma-separated list of node addresses that // act as bootstrap hosts for connecting to the gossip network. - GossipBootstrap string + JoinUsing string // Enables running the node as a single-node in-memory cluster. EphemeralSingleNode bool @@ -193,6 +193,11 @@ func (ctx *Context) InitNode() error { // Initialize attributes. ctx.NodeAttributes = parseAttributes(ctx.Attrs) + // Skip gossip bootstrap if we're running as an ephemeral single node. + if ctx.EphemeralSingleNode { + return nil + } + // Get the gossip bootstrap resolvers. resolvers, err := ctx.parseGossipBootstrapResolvers() if err != nil { @@ -207,6 +212,8 @@ func (ctx *Context) InitNode() error { var errUnsizedInMemStore = errors.New("unable to initialize an in-memory store with capacity 0") +var errCannotJoinSelf = errors.New("cannot specify --join as node's own address") + // initEngine parses the store attributes as a colon-separated list // and instantiates an engine based on the dir parameter. If dir parses // to an integer, it's taken to mean an in-memory engine; otherwise, @@ -225,22 +232,20 @@ func (ctx *Context) initEngine(attrsStr, path string, stopper *stop.Stopper) (en return engine.NewRocksDB(attrs, path, ctx.CacheSize, ctx.MemtableBudget, stopper), nil } -// SelfGossipAddr is a special flag that configures a node to gossip -// only with itself. This avoids having to specify the port twice for -// single-node clusters (i.e. once in --addr, and again in --gossip). -const SelfGossipAddr = "self" - // parseGossipBootstrapResolvers parses a comma-separated list of // gossip bootstrap resolvers. func (ctx *Context) parseGossipBootstrapResolvers() ([]resolver.Resolver, error) { var bootstrapResolvers []resolver.Resolver - addresses := strings.Split(ctx.GossipBootstrap, ",") + addresses := strings.Split(ctx.JoinUsing, ",") for _, address := range addresses { if len(address) == 0 { continue } - if address == SelfGossipAddr { - address = ctx.Addr + if address == ctx.Addr { + if len(addresses) == 1 { + return nil, errCannotJoinSelf + } + continue } resolver, err := resolver.NewResolver(&ctx.Context, address) if err != nil { diff --git a/server/context_test.go b/server/context_test.go index 9cc576506ea3..62f4cf886d43 100644 --- a/server/context_test.go +++ b/server/context_test.go @@ -30,7 +30,6 @@ func TestParseNodeAttributes(t *testing.T) { ctx := NewContext() ctx.Attrs = "attr1=val1::attr2=val2" ctx.Stores = "mem=1" - ctx.GossipBootstrap = SelfGossipAddr stopper := stop.NewStopper() defer stopper.Stop() if err := ctx.InitStores(stopper); err != nil { @@ -45,12 +44,12 @@ func TestParseNodeAttributes(t *testing.T) { } } -// TestParseGossipBootstrapAddrs verifies that GossipBootstrap is -// parsed correctly. -func TestParseGossipBootstrapAddrs(t *testing.T) { +// TestParseJoinUsingAddrs verifies that JoinUsing is parsed +// correctly. +func TestParseJoinUsingAddrs(t *testing.T) { defer leaktest.AfterTest(t) ctx := NewContext() - ctx.GossipBootstrap = "localhost:12345,,localhost:23456" + ctx.JoinUsing = "localhost:12345,,localhost:23456" ctx.Stores = "mem=1" stopper := stop.NewStopper() defer stopper.Stop() diff --git a/server/node.go b/server/node.go index a7ebd899c78a..bd6906327012 100644 --- a/server/node.go +++ b/server/node.go @@ -18,6 +18,7 @@ package server import ( "container/list" + "errors" "fmt" "net" "time" @@ -26,6 +27,7 @@ import ( "github.com/cockroachdb/cockroach/client" "github.com/cockroachdb/cockroach/gossip" + "github.com/cockroachdb/cockroach/gossip/resolver" "github.com/cockroachdb/cockroach/keys" "github.com/cockroachdb/cockroach/kv" "github.com/cockroachdb/cockroach/roachpb" @@ -40,6 +42,7 @@ import ( "github.com/cockroachdb/cockroach/util/metric" "github.com/cockroachdb/cockroach/util/stop" "github.com/cockroachdb/cockroach/util/tracer" + "github.com/cockroachdb/cockroach/util/uuid" "github.com/gogo/protobuf/proto" ) @@ -51,6 +54,10 @@ const ( publishStatusInterval = 10 * time.Second ) +// errNeedsBootstrap indicates the node should be used as the seed of +// a new cluster. +var errNeedsBootstrap = errors.New("node has no initialized stores and no instructions for joining an existing cluster") + // A Node manages a map of stores (by store ID) for which it serves // traffic. A node is the top-level data structure. There is one node // instance per process. A node accepts incoming RPCs and services @@ -100,13 +107,15 @@ func GetBootstrapSchema() sql.MetadataSchema { return schema } -// BootstrapCluster bootstraps a multiple stores using the provided engines and -// cluster ID. The first bootstrapped store contains a single range spanning -// all keys. Initial range lookup metadata is populated for the range. -// -// Returns a KV client for unittest purposes. Caller should close the returned -// client. -func BootstrapCluster(clusterID string, engines []engine.Engine, stopper *stop.Stopper) (*client.DB, error) { +// bootstrapCluster bootstraps a multiple stores using the provided +// engines and cluster ID. The first bootstrapped store contains a +// single range spanning all keys. Initial range lookup metadata is +// populated for the range. Returns the cluster ID. +func bootstrapCluster(engines []engine.Engine) (string, error) { + clusterID := uuid.NewUUID4().String() + stopper := stop.NewStopper() + defer stopper.Stop() + ctx := storage.StoreContext{} ctx.ScanInterval = 10 * time.Minute ctx.Clock = hlc.NewClock(hlc.UnixNano) @@ -128,12 +137,12 @@ func BootstrapCluster(clusterID string, engines []engine.Engine, stopper *stop.S // Verify the store isn't already part of a cluster. if len(s.Ident.ClusterID) > 0 { - return nil, util.Errorf("storage engine already belongs to a cluster (%s)", s.Ident.ClusterID) + return "", util.Errorf("storage engine already belongs to a cluster (%s)", s.Ident.ClusterID) } // Bootstrap store to persist the store ident. if err := s.Bootstrap(sIdent, stopper); err != nil { - return nil, err + return "", err } // Create first range, writing directly to engine. Note this does // not create the range, just its data. Only do this if this is the @@ -141,11 +150,11 @@ func BootstrapCluster(clusterID string, engines []engine.Engine, stopper *stop.S if i == 0 { initialValues := GetBootstrapSchema().GetInitialValues() if err := s.BootstrapRange(initialValues); err != nil { - return nil, err + return "", err } } if err := s.Start(stopper); err != nil { - return nil, err + return "", err } stores.AddStore(s) @@ -153,16 +162,16 @@ func BootstrapCluster(clusterID string, engines []engine.Engine, stopper *stop.S // Initialize node and store ids. Only initialize the node once. if i == 0 { if nodeID, err := allocateNodeID(ctx.DB); nodeID != sIdent.NodeID || err != nil { - return nil, util.Errorf("expected to initialize node id allocator to %d, got %d: %s", + return "", util.Errorf("expected to initialize node id allocator to %d, got %d: %s", sIdent.NodeID, nodeID, err) } } if storeID, err := allocateStoreIDs(sIdent.NodeID, 1, ctx.DB); storeID != sIdent.StoreID || err != nil { - return nil, util.Errorf("expected to initialize store id allocator to %d, got %d: %s", + return "", util.Errorf("expected to initialize store id allocator to %d, got %d: %s", sIdent.StoreID, storeID, err) } } - return ctx.DB, nil + return clusterID, nil } // NewNode returns a new instance of Node. @@ -229,8 +238,7 @@ func (n *Node) initNodeID(id roachpb.NodeID) { // start starts the node by registering the storage instance for the // RPC service "Node" and initializing stores for each specified // engine. Launches periodic store gossiping in a goroutine. -func (n *Node) start(rpcServer *rpc.Server, addr net.Addr, engines []engine.Engine, - attrs roachpb.Attributes) error { +func (n *Node) start(rpcServer *rpc.Server, addr net.Addr, engines []engine.Engine, attrs roachpb.Attributes) error { n.initDescriptor(addr, attrs) const method = "Node.Batch" if err := rpcServer.Register(method, n.executeCmd, &roachpb.BatchRequest{}); err != nil { @@ -242,7 +250,28 @@ func (n *Node) start(rpcServer *rpc.Server, addr net.Addr, engines []engine.Engi // Initialize stores, including bootstrapping new ones. if err := n.initStores(engines, n.stopper); err != nil { - return err + if err == errNeedsBootstrap { + // This node has no initialized stores and no way to connect to + // an existing cluster, so we bootstrap it. + clusterID, err := bootstrapCluster(engines) + if err != nil { + return err + } + log.Infof("**** cluster %s has been created", clusterID) + log.Infof("**** add additional nodes by specifying --join=%s", addr) + // Make sure we add the node as a resolver. + selfResolver, err := resolver.NewResolverFromAddress(addr) + if err != nil { + return err + } + n.ctx.Gossip.SetResolvers([]resolver.Resolver{selfResolver}) + // After bootstrapping, try again to initialize the stores. + if err := n.initStores(engines, n.stopper); err != nil { + return err + } + } else { + return err + } } n.startedAt = n.ctx.Clock.Now().WallTime @@ -294,6 +323,12 @@ func (n *Node) initStores(engines []engine.Engine, stopper *stop.Stopper) error n.stores.AddStore(s) } + // If there are no initialized stores and no gossip resolvers, + // bootstrap this node as the seed of a new cluster. + if n.stores.GetStoreCount() == 0 && !n.ctx.Gossip.HasResolvers() { + return errNeedsBootstrap + } + // Verify all initialized stores agree on cluster and node IDs. if err := n.validateStores(); err != nil { return err diff --git a/server/node_test.go b/server/node_test.go index 2cb834ebcbcc..d2fdb964ef11 100644 --- a/server/node_test.go +++ b/server/node_test.go @@ -124,17 +124,16 @@ func (s keySlice) Less(i, j int) bool { return bytes.Compare(s[i], s[j]) < 0 } func TestBootstrapCluster(t *testing.T) { defer leaktest.AfterTest(t) stopper := stop.NewStopper() + defer stopper.Stop() e := engine.NewInMem(roachpb.Attributes{}, 1<<20, stopper) - localDB, err := BootstrapCluster("cluster-1", []engine.Engine{e}, stopper) - if err != nil { + if _, err := bootstrapCluster([]engine.Engine{e}); err != nil { t.Fatal(err) } - defer stopper.Stop() - // Scan the complete contents of the local database. - rows, pErr := localDB.Scan(keys.LocalMax, roachpb.KeyMax, 0) - if pErr != nil { - t.Fatal(pErr.GoError()) + // Scan the complete contents of the local database directly from the engine. + rows, _, err := engine.MVCCScan(e, keys.LocalMax, roachpb.KeyMax, 0, roachpb.MaxTimestamp, true, nil) + if err != nil { + t.Fatal(err) } var foundKeys keySlice for _, kv := range rows { @@ -169,11 +168,9 @@ func TestBootstrapNewStore(t *testing.T) { engineStopper := stop.NewStopper() defer engineStopper.Stop() e := engine.NewInMem(roachpb.Attributes{}, 1<<20, engineStopper) - eagerStopper := stop.NewStopper() - if _, err := BootstrapCluster("cluster-1", []engine.Engine{e}, eagerStopper); err != nil { + if _, err := bootstrapCluster([]engine.Engine{e}); err != nil { t.Fatal(err) } - eagerStopper.Stop() // Start a new node with two new stores which will require bootstrapping. engines := []engine.Engine{ @@ -210,12 +207,9 @@ func TestNodeJoin(t *testing.T) { engineStopper := stop.NewStopper() defer engineStopper.Stop() e := engine.NewInMem(roachpb.Attributes{}, 1<<20, engineStopper) - stopper := stop.NewStopper() - _, err := BootstrapCluster("cluster-1", []engine.Engine{e}, stopper) - if err != nil { + if _, err := bootstrapCluster([]engine.Engine{e}); err != nil { t.Fatal(err) } - stopper.Stop() // Start the bootstrap node. engines1 := []engine.Engine{e} @@ -264,12 +258,9 @@ func TestCorruptedClusterID(t *testing.T) { engineStopper := stop.NewStopper() e := engine.NewInMem(roachpb.Attributes{}, 1<<20, engineStopper) defer engineStopper.Stop() - eagerStopper := stop.NewStopper() - _, err := BootstrapCluster("cluster-1", []engine.Engine{e}, eagerStopper) - if err != nil { + if _, err := bootstrapCluster([]engine.Engine{e}); err != nil { t.Fatal(err) } - eagerStopper.Stop() // Set the cluster ID to an empty string. sIdent := roachpb.StoreIdent{ @@ -277,7 +268,7 @@ func TestCorruptedClusterID(t *testing.T) { NodeID: 1, StoreID: 1, } - if err = engine.MVCCPutProto(e, nil, keys.StoreIdentKey(), roachpb.ZeroTimestamp, nil, &sIdent); err != nil { + if err := engine.MVCCPutProto(e, nil, keys.StoreIdentKey(), roachpb.ZeroTimestamp, nil, &sIdent); err != nil { t.Fatal(err) } diff --git a/server/server.go b/server/server.go index dbb628acf17a..f46b5a31f137 100644 --- a/server/server.go +++ b/server/server.go @@ -31,7 +31,6 @@ import ( snappy "github.com/cockroachdb/c-snappy" "github.com/cockroachdb/cockroach/client" "github.com/cockroachdb/cockroach/gossip" - "github.com/cockroachdb/cockroach/gossip/resolver" "github.com/cockroachdb/cockroach/keys" "github.com/cockroachdb/cockroach/kv" crpc "github.com/cockroachdb/cockroach/rpc" @@ -206,10 +205,9 @@ func NewServer(ctx *Context, stopper *stop.Stopper) (*Server, error) { return s, nil } -// Start runs the RPC and HTTP servers, starts the gossip instance (if -// selfBootstrap is true, uses the rpc server's address as the gossip -// bootstrap), and starts the node using the supplied engines slice. -func (s *Server) Start(selfBootstrap bool) error { +// Start starts the server on the specified port, starts gossip and +// initializes the node using the engines from the server's context. +func (s *Server) Start() error { tlsConfig, err := s.ctx.GetServerTLSConfig() if err != nil { return err @@ -222,18 +220,8 @@ func (s *Server) Start(selfBootstrap bool) error { } s.listener = ln - addr := ln.Addr() - addrStr := addr.String() - // Handle self-bootstrapping case for a single node. - if selfBootstrap { - selfResolver, err := resolver.NewResolver(&s.ctx.Context, addrStr) - if err != nil { - return err - } - s.gossip.SetResolvers([]resolver.Resolver{selfResolver}) - } s.gossip.Start(s.rpc, addr, s.stopper) if err := s.node.start(s.rpc, addr, s.ctx.Engines, s.ctx.NodeAttributes); err != nil { diff --git a/server/server_test.go b/server/server_test.go index ae89a0866337..b4002a39a0ed 100644 --- a/server/server_test.go +++ b/server/server_test.go @@ -79,7 +79,7 @@ func TestInitEngine(t *testing.T) { } for _, spec := range testCases { ctx := NewContext() - ctx.Stores, ctx.GossipBootstrap = spec.key, SelfGossipAddr + ctx.Stores = spec.key if err := ctx.InitStores(stopper); err == nil { engines := ctx.Engines if spec.wantError { @@ -111,7 +111,6 @@ func TestInitEngines(t *testing.T) { ctx := NewContext() ctx.Stores = fmt.Sprintf("mem=1000,mem:ddr3=1000,ssd=%s,hdd:7200rpm=%s", tmp[0], tmp[1]) - ctx.GossipBootstrap = SelfGossipAddr expEngines := []struct { attrs roachpb.Attributes isMem bool diff --git a/server/status_test.go b/server/status_test.go index 185d34b2472a..e3c11684c330 100644 --- a/server/status_test.go +++ b/server/status_test.go @@ -261,7 +261,7 @@ func startServer(t *testing.T) TestServer { ts.Ctx = NewTestContext() ts.StoresPerNode = 3 if err := ts.Start(); err != nil { - t.Fatal(err) + t.Fatalf("failed to start test server: %s", err) } // Make sure the range is spun up with an arbitrary read command. We do not diff --git a/server/testserver.go b/server/testserver.go index 22b3382271d5..3dc94dabb223 100644 --- a/server/testserver.go +++ b/server/testserver.go @@ -98,8 +98,7 @@ func NewTestContext() *Context { // type TestServer struct { // Ctx is the context used by this server. - Ctx *Context - SkipBootstrap bool + Ctx *Context // server is the embedded Cockroach server struct. *Server StoresPerNode int @@ -195,24 +194,15 @@ func (ts *TestServer) StartWithStopper(stopper *stop.Stopper) error { return err } - // Ensure we have the correct number of engines. Add in in-memory ones where - // needed. There must be at least one store/engine. + // Ensure we have the correct number of engines. Add in-memory ones where + // needed. There must be at least one store/engine. if ts.StoresPerNode < 1 { ts.StoresPerNode = 1 } for i := len(ts.Ctx.Engines); i < ts.StoresPerNode; i++ { ts.Ctx.Engines = append(ts.Ctx.Engines, engine.NewInMem(roachpb.Attributes{}, 100<<20, ts.Server.stopper)) } - - if !ts.SkipBootstrap { - stopper := stop.NewStopper() - _, err := BootstrapCluster("cluster-1", ts.Ctx.Engines, stopper) - if err != nil { - return util.Errorf("could not bootstrap cluster: %s", err) - } - stopper.Stop() - } - if err := ts.Server.Start(true); err != nil { + if err := ts.Server.Start(); err != nil { return err } diff --git a/storage/engine/rocksdb.go b/storage/engine/rocksdb.go index 1818b975e240..28653ad3c11e 100644 --- a/storage/engine/rocksdb.go +++ b/storage/engine/rocksdb.go @@ -117,7 +117,7 @@ func (r *RocksDB) Open() error { return util.Errorf("could not open rocksdb instance: %s", err) } - // Start a gorountine that will finish when the underlying handle + // Start a goroutine that will finish when the underlying handle // is deallocated. This is used to check a leak in tests. go func() { <-r.deallocated diff --git a/storage/stores.go b/storage/stores.go index 2a5c928d9892..c34ef3c388a6 100644 --- a/storage/stores.go +++ b/storage/stores.go @@ -20,6 +20,7 @@ import ( "fmt" "sync" + "github.com/gogo/protobuf/proto" "golang.org/x/net/context" "github.com/cockroachdb/cockroach/client" @@ -44,6 +45,7 @@ type Stores struct { mu sync.RWMutex // Protects storeMap and addrs storeMap map[roachpb.StoreID]*Store // Map from StoreID to Store biLatestTS roachpb.Timestamp // Timestamp of gossip bootstrap info + latestBI *gossip.BootstrapInfo // Latest cached bootstrap info } var _ client.Sender = &Stores{} // Stores implements the client.Sender interface @@ -96,9 +98,8 @@ func (ls *Stores) AddStore(s *Store) { // If we've already read the gossip bootstrap info, ensure that // all stores have the most recent values. if !ls.biLatestTS.Equal(roachpb.ZeroTimestamp) { - // ReadBootstrapInfo calls updateAllBootstrapInfos. - if err := ls.readBootstrapInfoLocked(&gossip.BootstrapInfo{}); err != nil { - log.Errorf("failed to update bootstrap info on stores: %s", err) + if err := ls.updateBootstrapInfo(ls.latestBI); err != nil { + log.Errorf("failed to update bootstrap info on newly added store: %s", err) } } } @@ -256,40 +257,22 @@ func (ls *Stores) RangeLookup(key roachpb.RKey, _ *roachpb.RangeDescriptor, cons func (ls *Stores) ReadBootstrapInfo(bi *gossip.BootstrapInfo) error { ls.mu.RLock() defer ls.mu.RUnlock() - return ls.readBootstrapInfoLocked(bi) -} - -func (ls *Stores) readBootstrapInfoLocked(bi *gossip.BootstrapInfo) error { latestTS := roachpb.ZeroTimestamp - timestamps := map[roachpb.StoreID]roachpb.Timestamp{} - // Find the most recent bootstrap info, collecting timestamps for - // each store along the way. - for id, s := range ls.storeMap { + // Find the most recent bootstrap info. + for _, s := range ls.storeMap { var storeBI gossip.BootstrapInfo ok, err := engine.MVCCGetProto(s.engine, keys.StoreGossipKey(), roachpb.ZeroTimestamp, true, nil, &storeBI) if err != nil { return err } - timestamps[id] = storeBI.Timestamp if ok && latestTS.Less(storeBI.Timestamp) { latestTS = storeBI.Timestamp *bi = storeBI } } - - // Update all stores with an earlier timestamp. - for id, s := range ls.storeMap { - if timestamps[id].Less(latestTS) { - if err := engine.MVCCPutProto(s.engine, nil, keys.StoreGossipKey(), roachpb.ZeroTimestamp, nil, bi); err != nil { - return err - } - log.Infof("updated gossip bootstrap info to %s", s) - } - } - - ls.biLatestTS = latestTS - return nil + log.Infof("read %d node addresses from persistent storage", len(bi.Addresses)) + return ls.updateBootstrapInfo(bi) } // WriteBootstrapInfo implements the gossip.Storage interface. Write @@ -299,13 +282,26 @@ func (ls *Stores) readBootstrapInfoLocked(bi *gossip.BootstrapInfo) error { func (ls *Stores) WriteBootstrapInfo(bi *gossip.BootstrapInfo) error { ls.mu.RLock() defer ls.mu.RUnlock() - ls.biLatestTS = ls.clock.Now() - bi.Timestamp = ls.biLatestTS + bi.Timestamp = ls.clock.Now() + if err := ls.updateBootstrapInfo(bi); err != nil { + return err + } + log.Infof("wrote %d node addresses to persistent storage", len(bi.Addresses)) + return nil +} + +func (ls *Stores) updateBootstrapInfo(bi *gossip.BootstrapInfo) error { + if bi.Timestamp.Less(ls.biLatestTS) { + return nil + } + // Update the latest timestamp and set cached version. + ls.biLatestTS = bi.Timestamp + ls.latestBI = proto.Clone(bi).(*gossip.BootstrapInfo) + // Update all stores. for _, s := range ls.storeMap { if err := engine.MVCCPutProto(s.engine, nil, keys.StoreGossipKey(), roachpb.ZeroTimestamp, nil, bi); err != nil { return err } - log.Infof("wrote gossip bootstrap info to %s", s) } return nil }