diff --git a/acceptance/cluster/localcluster.go b/acceptance/cluster/localcluster.go
index 782c044f18de..f261381785ea 100644
--- a/acceptance/cluster/localcluster.go
+++ b/acceptance/cluster/localcluster.go
@@ -233,17 +233,10 @@ func (l *LocalCluster) initCluster() {
}
}
create := func() (*Container, error) {
- var entrypoint []string
- if *cockroachImage == builderImage {
- entrypoint = append(entrypoint, "/"+filepath.Base(*cockroachBinary))
- } else if *cockroachEntry != "" {
- entrypoint = append(entrypoint, *cockroachEntry)
- }
return createContainer(l, dockerclient.ContainerConfig{
Image: *cockroachImage,
Volumes: vols,
- Entrypoint: entrypoint,
- Cmd: []string{"init", "--stores=ssd=" + dataStr(0, 0)},
+ Entrypoint: []string{"/bin/true"},
})
}
c, err := create()
@@ -351,11 +344,6 @@ func (l *LocalCluster) createNodeCerts() {
}
func (l *LocalCluster) startNode(i int) *Container {
- gossipNodes := []string{}
- for j := 0; j < l.numLocal; j++ {
- gossipNodes = append(gossipNodes, fmt.Sprintf("%s:%d", nodeStr(j), cockroachPort))
- }
-
var stores = "ssd=" + dataStr(i, 0)
for j := 1; j < l.numStores; j++ {
stores += ",ssd=" + dataStr(i, j)
@@ -366,9 +354,13 @@ func (l *LocalCluster) startNode(i int) *Container {
"--stores=" + stores,
"--certs=/certs",
"--addr=" + fmt.Sprintf("%s:%d", nodeStr(i), cockroachPort),
- "--gossip=" + strings.Join(gossipNodes, ","),
"--scan-max-idle-time=200ms", // set low to speed up tests
}
+ // Append --join flag for all nodes except first.
+ if i > 0 {
+ cmd = append(cmd, fmt.Sprintf("--join=%s:%d", nodeStr(0), cockroachPort))
+ }
+
var locallogDir string
if len(l.logDir) > 0 {
dockerlogDir := "/logs/" + nodeStr(i)
diff --git a/cli/cert.go b/cli/cert.go
index 3e3f296a0b30..0cdc4a88701d 100644
--- a/cli/cert.go
+++ b/cli/cert.go
@@ -17,8 +17,9 @@
package cli
import (
+ "fmt"
+
"github.com/cockroachdb/cockroach/security"
- "github.com/cockroachdb/cockroach/util"
"github.com/spf13/cobra"
)
@@ -44,7 +45,7 @@ individual files in the directory specified by --certs (required).
// to their corresponding files.
func runCreateCACert(cmd *cobra.Command, args []string) error {
if err := security.RunCreateCACert(context.Certs, keySize); err != nil {
- return util.Errorf("failed to generate CA certificate: %s", err)
+ return fmt.Errorf("failed to generate CA certificate: %s", err)
}
return nil
}
@@ -68,7 +69,7 @@ At least one host should be passed in (either IP address of dns name).
// to their corresponding files.
func runCreateNodeCert(cmd *cobra.Command, args []string) error {
if err := security.RunCreateNodeCert(context.Certs, keySize, args); err != nil {
- return util.Errorf("failed to generate node certificate: %s", err)
+ return fmt.Errorf("failed to generate node certificate: %s", err)
}
return nil
}
@@ -95,7 +96,7 @@ func runCreateClientCert(cmd *cobra.Command, args []string) error {
return errMissingParams
}
if err := security.RunCreateClientCert(context.Certs, keySize, args[0]); err != nil {
- return util.Errorf("failed to generate clent certificate: %s", err)
+ return fmt.Errorf("failed to generate clent certificate: %s", err)
}
return nil
}
diff --git a/cli/cli.go b/cli/cli.go
index a169d07f4a4d..2f6ca53c82f0 100644
--- a/cli/cli.go
+++ b/cli/cli.go
@@ -57,7 +57,6 @@ var cockroachCmd = &cobra.Command{
func init() {
cockroachCmd.AddCommand(
- initCmd,
startCmd,
certCmd,
exterminateCmd,
diff --git a/cli/cli_test.go b/cli/cli_test.go
index 443d757dfd8a..06340771fc6a 100644
--- a/cli/cli_test.go
+++ b/cli/cli_test.go
@@ -548,8 +548,7 @@ func TestFlagUsage(t *testing.T) {
cockroach [command]
Available Commands:
- init init new Cockroach cluster
- start start a node by joining the gossip network
+ start start a node
cert create ca, node, and client certs
exterminate destroy all data held by the node
quit drain and shutdown node
diff --git a/cli/flags.go b/cli/flags.go
index 598f92db62f1..244ea0ad1d24 100644
--- a/cli/flags.go
+++ b/cli/flags.go
@@ -69,18 +69,14 @@ var flagUsage = map[string]string{
Directory containing RSA key and x509 certs. This flag is required if
--insecure=false.
`,
- "gossip": `
- A comma-separated list of gossip addresses or resolvers for gossip
- bootstrap. Each item in the list has an optional type:
+ "join": `
+ A comma-separated list of addresses to use when a new node is joining
+ an existing cluster. Each address in the list has an optional type:
[type=]
. An unspecified type means ip address or dns.
Type is one of:
- tcp: (default if type is omitted): plain ip address or hostname.
- - unix: unix socket
- - lb: RPC load balancer forwarding to an arbitrary node
- http-lb: HTTP load balancer: we query
http(s):///_status/details/local
- - self: for single node systems, specify --gossip=self (the
- is omitted).
`,
"key-size": `
Key size in bits for CA/Node/Client certificates.
@@ -125,8 +121,8 @@ var flagUsage = map[string]string{
clusters to be more responsive.
`,
"time-until-store-dead": `
- Adjusts the timeout for stores. If there's been no gossiped updated
- from a store after this time, the store is considered unavailable.
+ Adjusts the timeout for stores. If there's been no communication from
+ a store in this time interval, the store is considered unavailable.
Replicas on an unavailable store will be moved to available ones.
`,
"stores": `
@@ -168,14 +164,6 @@ func initFlags(ctx *server.Context) {
pf.Var(pflagValue{f.Value}, normalizeStdFlagName(f.Name), f.Usage)
})
- {
- f := initCmd.Flags()
- f.StringVar(&ctx.Stores, "stores", ctx.Stores, flagUsage["stores"])
- if err := initCmd.MarkFlagRequired("stores"); err != nil {
- panic(err)
- }
- }
-
{
f := startCmd.Flags()
f.BoolVar(&ctx.EphemeralSingleNode, "dev", ctx.EphemeralSingleNode, flagUsage["dev"])
@@ -193,8 +181,8 @@ func initFlags(ctx *server.Context) {
f.StringVar(&ctx.Certs, "certs", ctx.Certs, flagUsage["certs"])
f.BoolVar(&ctx.Insecure, "insecure", ctx.Insecure, flagUsage["insecure"])
- // Gossip flags.
- f.StringVar(&ctx.GossipBootstrap, "gossip", ctx.GossipBootstrap, flagUsage["gossip"])
+ // Cluster joining flags.
+ f.StringVar(&ctx.JoinUsing, "join", ctx.JoinUsing, flagUsage["join"])
// KV flags.
f.BoolVar(&ctx.Linearizable, "linearizable", ctx.Linearizable, flagUsage["linearizable"])
@@ -206,9 +194,6 @@ func initFlags(ctx *server.Context) {
f.DurationVar(&ctx.ScanMaxIdleTime, "scan-max-idle-time", ctx.ScanMaxIdleTime, flagUsage["scan-max-idle-time"])
f.DurationVar(&ctx.TimeUntilStoreDead, "time-until-store-dead", ctx.TimeUntilStoreDead, flagUsage["time-until-store-dead"])
- if err := startCmd.MarkFlagRequired("gossip"); err != nil {
- panic(err)
- }
if err := startCmd.MarkFlagRequired("stores"); err != nil {
panic(err)
}
diff --git a/cli/start.go b/cli/start.go
index c7f4dac71474..f4d6adc800f4 100644
--- a/cli/start.go
+++ b/cli/start.go
@@ -34,7 +34,6 @@ import (
"github.com/cockroachdb/cockroach/util"
"github.com/cockroachdb/cockroach/util/log"
"github.com/cockroachdb/cockroach/util/stop"
- "github.com/cockroachdb/cockroach/util/uuid"
"github.com/spf13/cobra"
)
@@ -77,77 +76,29 @@ func getJSON(hostport, path string, v interface{}) error {
return util.GetJSON(httpClient, context.HTTPRequestScheme(), hostport, path, v)
}
-// initCmd command initializes a new Cockroach cluster.
-var initCmd = &cobra.Command{
- Use: "init --stores=...",
- Short: "init new Cockroach cluster",
- Long: `
-Initialize a new Cockroach cluster using the --stores flag to specify one or
-more storage locations. The first of these storage locations is used to
-bootstrap the first replica of the first range. If any of the storage locations
-are already part of a pre-existing cluster, the bootstrap will fail.
-`,
- Example: ` cockroach init --stores=ssd=/mnt/ssd1,ssd=/mnt/ssd2`,
- SilenceUsage: true,
- RunE: runInit,
-}
-
-// runInit initializes the engine based on the first
-// store. The bootstrap engine may not be an in-memory type.
-func runInit(_ *cobra.Command, _ []string) error {
- stopper := stop.NewStopper()
- defer stopper.Stop()
-
- // Default user for servers.
- context.User = security.NodeUser
-
- if err := context.InitStores(stopper); err != nil {
- return util.Errorf("failed to initialize stores: %s", err)
- }
- if len(context.Engines) > 1 {
- return util.Errorf("cannot bootstrap to more than one store")
- }
-
- return initCluster(stopper)
-}
-
-func initCluster(stopper *stop.Stopper) error {
- // Generate a new UUID for cluster ID and bootstrap the cluster.
- clusterID := uuid.NewUUID4().String()
- if _, err := server.BootstrapCluster(clusterID, context.Engines, stopper); err != nil {
- return util.Errorf("unable to bootstrap cluster: %s", err)
- }
-
- log.Infof("cockroach cluster %s has been initialized", clusterID)
- return nil
-}
-
-// startCmd command starts nodes by joining the gossip network.
+// startCmd starts a node by initializing the stores and joining
+// the cluster.
var startCmd = &cobra.Command{
Use: "start",
- Short: "start a node by joining the gossip network",
+ Short: "start a node",
Long: `
-Start a Cockroach node by joining the gossip network and exporting key ranges
-stored on physical device(s). The gossip network is joined by contacting one or
-more well-known hosts specified by the --gossip flag. Every node should be run
-with the same list of bootstrap hosts to guarantee a connected network. An
-alternate approach is to use a single host for --gossip and round-robin DNS.
+Start a CockroachDB node, which will export data from one or more
+storage devices, specified via the --stores flag.
-Each node exports data from one or more physical devices. These devices are
-specified via the --stores flag. This is a comma-separated list of paths to
-storage directories or for in-memory stores, the number of bytes. Although the
-paths should be specified to correspond uniquely to physical devices, this
-requirement isn't strictly enforced. See the --stores flag help description for
-additional details.`,
- Example: ` cockroach start --certs= --gossip=host1:port1[,...] --stores=ssd=/mnt/ssd1,...`,
+If no cluster exists yet and this is the first node, no additional
+flags are required. If the cluster already exists, and this node is
+uninitialized, specify the --join flag to point to any healhty node
+(or list of nodes) already part of the cluster.
+`,
+ Example: ` cockroach start --certs= --stores=ssd=/mnt/ssd1,... [--join=host:port,[host:port]]`,
SilenceUsage: true,
RunE: runStart,
}
// runStart starts the cockroach node using --stores as the list of
-// storage devices ("stores") on this machine and --gossip as the list
-// of "well-known" hosts used to join this node to the cockroach
-// cluster via the gossip network.
+// storage devices ("stores") on this machine and --join as the list
+// of other active nodes used to join this node to the cockroach
+// cluster, if this is its first time connecting.
func runStart(_ *cobra.Command, _ []string) error {
info := util.GetBuildInfo()
log.Infof("[build] %s @ %s (%s)", info.Tag, info.Time, info.Vers)
@@ -160,36 +111,25 @@ func runStart(_ *cobra.Command, _ []string) error {
// for the default cluster-wide zone config.
config.DefaultZoneConfig.ReplicaAttrs = []roachpb.Attributes{{}}
context.Stores = "mem=1073741824"
- context.GossipBootstrap = server.SelfGossipAddr
}
stopper := stop.NewStopper()
if err := context.InitStores(stopper); err != nil {
- return util.Errorf("failed to initialize stores: %s", err)
- }
-
- if context.EphemeralSingleNode {
- // A separate stopper for bootstrapping so that we can properly shutdown
- // all of the stores.
- initStopper := stop.NewStopper()
- if err := initCluster(initStopper); err != nil {
- return err
- }
- initStopper.Stop()
+ return fmt.Errorf("failed to initialize stores: %s", err)
}
if err := context.InitNode(); err != nil {
- return util.Errorf("failed to initialize node: %s", err)
+ return fmt.Errorf("failed to initialize node: %s", err)
}
log.Info("starting cockroach node")
s, err := server.NewServer(context, stopper)
if err != nil {
- return util.Errorf("failed to start Cockroach server: %s", err)
+ return fmt.Errorf("failed to start Cockroach server: %s", err)
}
- if err := s.Start(false); err != nil {
- return util.Errorf("cockroach server exited with error: %s", err)
+ if err := s.Start(); err != nil {
+ return fmt.Errorf("cockroach server exited with error: %s", err)
}
signalCh := make(chan os.Signal, 1)
diff --git a/client/client_test.go b/client/client_test.go
index 9100ebd78474..33618bb533dc 100644
--- a/client/client_test.go
+++ b/client/client_test.go
@@ -683,7 +683,6 @@ func setupClientBenchData(useSSL bool, numVersions, numKeys int, b *testing.B) (
s := &server.TestServer{}
s.Ctx = server.NewTestContext()
- s.SkipBootstrap = exists
if !useSSL {
s.Ctx.Insecure = true
}
diff --git a/gossip/client.go b/gossip/client.go
index f4a11783c732..351ad0a28f59 100644
--- a/gossip/client.go
+++ b/gossip/client.go
@@ -188,7 +188,7 @@ func (c *client) handleGossip(g *Gossip, call *netrpc.Call) error {
return util.Errorf("received forward from node %d to %d (%s)", reply.NodeID, reply.AlternateNodeID, reply.AlternateAddr)
}
- // If we have the sentinel gossip, we're considered connected.
+ // If we have the sentinel gossip we're considered connected.
g.checkHasConnected()
// Check whether this outgoing client is duplicating work already
diff --git a/gossip/gossip.go b/gossip/gossip.go
index c0d7ab17ff24..48173c90484f 100644
--- a/gossip/gossip.go
+++ b/gossip/gossip.go
@@ -262,12 +262,11 @@ func (g *Gossip) SetStorage(storage Storage) error {
var storedBI BootstrapInfo
err := storage.ReadBootstrapInfo(&storedBI)
if err != nil {
- log.Warningf("failed to read bootstrap info: %s", err)
+ log.Warningf("failed to read gossip bootstrap info: %s", err)
}
- log.Infof("read %d gossip host(s) for bootstrapping from persistent storage", len(storedBI.Addresses))
// Merge the stored bootstrap info addresses with any we've become
- // aware of through the --gossip bootstrap hosts we've connected to.
+ // aware of through the --join bootstrap hosts we've connected to.
if len(g.bootstrapInfo.Addresses) > 0 {
existing := map[string]struct{}{}
makeKey := func(a util.UnresolvedAddr) string { return fmt.Sprintf("%s,%s", a.Network(), a.String()) }
@@ -283,8 +282,6 @@ func (g *Gossip) SetStorage(storage Storage) error {
if numAddrs := len(g.bootstrapInfo.Addresses); numAddrs > len(storedBI.Addresses) {
if err := g.storage.WriteBootstrapInfo(&g.bootstrapInfo); err != nil {
log.Error(err)
- } else {
- log.Infof("wrote %d merged gossip host(s) to persistent storage", numAddrs)
}
}
} else {
@@ -297,7 +294,7 @@ func (g *Gossip) SetStorage(storage Storage) error {
for _, addr := range g.bootstrapInfo.Addresses {
r, err := resolver.NewResolverFromUnresolvedAddr(addr)
if err != nil {
- log.Warningf("bad bootstrap address %s: %s", addr, err)
+ log.Warningf("bad node address %s: %s", addr, err)
continue
}
if g.haveResolver(r) {
@@ -312,9 +309,10 @@ func (g *Gossip) SetStorage(storage Storage) error {
g.resolvers = append(g.resolvers, r)
}
- // If there are no resolvers after persistent storage has been queried, fatal error.
+ // If there are no resolvers even after merging known nodes from
+ // persistent storage, we need to error out.
if len(g.resolvers) == 0 {
- return fmt.Errorf("no resolvers specified for gossip network: try adding peers via --gossip")
+ return fmt.Errorf("no nodes were found to connect to cluster; use --join")
}
// If a new resolver was found, immediately signal bootstrap.
@@ -338,6 +336,14 @@ func (g *Gossip) SetResolvers(resolvers []resolver.Resolver) {
g.resolversTried = map[int]struct{}{}
}
+// HasResolvers returns true if this gossip instance has any
+// configured resolvers.
+func (g *Gossip) HasResolvers() bool {
+ g.mu.Lock()
+ defer g.mu.Unlock()
+ return len(g.resolvers) > 0
+}
+
// GetNodeIDAddress looks up the address of the node by ID.
func (g *Gossip) GetNodeIDAddress(nodeID roachpb.NodeID) (net.Addr, error) {
g.mu.Lock()
@@ -381,6 +387,7 @@ func (g *Gossip) SimulationCycle() {
// haveResolver returns whether the specified resolver is already in
// the gossip node's list of resolvers. The caller must hold the
// gossip mutex.
+// TODO(spencer): sort and binary search.
func (g *Gossip) haveResolver(r resolver.Resolver) bool {
for _, ex := range g.resolvers {
if ex.Type() == r.Type() && ex.Addr() == r.Addr() {
@@ -390,6 +397,19 @@ func (g *Gossip) haveResolver(r resolver.Resolver) bool {
return false
}
+// haveBootstrapAddress returns whether there is already a bootstrap
+// address matching the specified address. The caller must hold the
+// gossip mutex.
+// TODO(spencer): sort and binary search.
+func (g *Gossip) haveBootstrapAddress(addr util.UnresolvedAddr) bool {
+ for _, ex := range g.bootstrapInfo.Addresses {
+ if ex == addr {
+ return true
+ }
+ }
+ return false
+}
+
// maxPeers returns the maximum number of peers each gossip node
// may connect to. This is based on maxHops, which is a preset
// maximum for number of hops allowed before the gossip network
@@ -442,17 +462,18 @@ func (g *Gossip) updateNodeAddress(_ string, content roachpb.Value) {
log.Warningf("bad address from gossip node %s: %s", desc, err)
return
}
- if g.haveResolver(r) {
- return
+ if !g.haveResolver(r) {
+ g.resolvers = append(g.resolvers, r)
}
- g.resolvers = append(g.resolvers, r)
// Add new address to bootstrap info and persist if possible.
- g.bootstrapInfo.Addresses = append(g.bootstrapInfo.Addresses, desc.Address)
- if g.storage != nil {
- // TODO(spencer): need to clean up ancient gossip nodes, which
- // will otherwise stick around in the bootstrap info forever.
- if err := g.storage.WriteBootstrapInfo(&g.bootstrapInfo); err != nil {
- log.Error(err)
+ if !g.haveBootstrapAddress(desc.Address) {
+ g.bootstrapInfo.Addresses = append(g.bootstrapInfo.Addresses, desc.Address)
+ if g.storage != nil {
+ // TODO(spencer): need to clean up ancient gossip nodes, which
+ // will otherwise stick around in the bootstrap info forever.
+ if err := g.storage.WriteBootstrapInfo(&g.bootstrapInfo); err != nil {
+ log.Error(err)
+ }
}
}
}
@@ -653,8 +674,9 @@ func (g *Gossip) MaxHops() uint32 {
}
// Start launches the gossip instance, which commences joining the
-// gossip network using the supplied rpc server and the gossip
-// bootstrap addresses specified via command-line flag: --gossip.
+// gossip network using the supplied rpc server and previously known
+// peer addresses in addition to any bootstrap addresses specified via
+// --join.
//
// The supplied address is used to identify the gossip instance in the
// gossip network; it will be used by other instances to connect to
@@ -878,13 +900,13 @@ func (g *Gossip) signalStalled() {
// not being connected to the sentinel. This could happen in a network
// partition, or because of misconfiguration. It's impossible to tell,
// but we can warn appropriately. If there are no incoming or outgoing
-// connections, we warn about the --gossip flag being set. If we've
+// connections, we warn about the --join flag being set. If we've
// connected, and all resolvers have been tried, we warn about either
// the first range not being available or else possible the cluster
// never having been initialized.
func (g *Gossip) warnAboutStall() {
if g.outgoing.len()+g.incoming.len() == 0 {
- log.Warningf("not connected to gossip; check that gossip flag is set appropriately")
+ log.Warningf("not connected to cluster; check --join specifies another active node")
} else if len(g.resolversTried) == len(g.resolvers) {
log.Warningf("first range unavailable or cluster not initialized")
} else {
diff --git a/gossip/gossip_test.go b/gossip/gossip_test.go
index a9ae4635801b..47f21f6ce9ae 100644
--- a/gossip/gossip_test.go
+++ b/gossip/gossip_test.go
@@ -19,6 +19,9 @@ package gossip
import (
"bytes"
"errors"
+ "fmt"
+ "net/http"
+ "net/http/httptest"
"testing"
"time"
@@ -29,7 +32,6 @@ import (
"github.com/cockroachdb/cockroach/util"
"github.com/cockroachdb/cockroach/util/hlc"
"github.com/cockroachdb/cockroach/util/leaktest"
- "github.com/cockroachdb/cockroach/util/log"
"github.com/cockroachdb/cockroach/util/stop"
)
@@ -54,26 +56,35 @@ func TestGossipInfoStore(t *testing.T) {
func TestGossipGetNextBootstrapAddress(t *testing.T) {
defer leaktest.AfterTest(t)
+
+ // Set up an http server for testing the http load balancer.
+ i := 0
+ handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ i++
+ fmt.Fprintf(w, `{"address": {"network": "tcp", "address": "10.10.0.%d:26257"}}`, i)
+ })
+ s := httptest.NewServer(handler)
+ defer s.Close()
+
resolverSpecs := []string{
"127.0.0.1:9000",
"tcp=127.0.0.1:9001",
"unix=/tmp/unix-socket12345",
- "lb=127.0.0.1:9002",
+ fmt.Sprintf("http-lb=%s", s.Listener.Addr()),
"foo=127.0.0.1:9003", // error should not resolve.
- "lb=", // error should not resolve.
+ "http-lb=", // error should not resolve.
"localhost:9004",
- "lb=127.0.0.1:9005",
}
resolvers := []resolver.Resolver{}
for _, rs := range resolverSpecs {
- resolver, err := resolver.NewResolver(&base.Context{}, rs)
+ resolver, err := resolver.NewResolver(&base.Context{Insecure: true}, rs)
if err == nil {
resolvers = append(resolvers, resolver)
}
}
- if len(resolvers) != 6 {
- t.Errorf("expected 6 resolvers; got %d", len(resolvers))
+ if len(resolvers) != 5 {
+ t.Errorf("expected 5 resolvers; got %d", len(resolvers))
}
g := New(nil, resolvers)
@@ -83,16 +94,15 @@ func TestGossipGetNextBootstrapAddress(t *testing.T) {
"127.0.0.1:9000",
"127.0.0.1:9001",
"/tmp/unix-socket12345",
- "127.0.0.1:9002",
+ "10.10.0.1:26257",
"localhost:9004",
- "127.0.0.1:9005",
- "127.0.0.1:9002",
- "127.0.0.1:9005",
- "127.0.0.1:9002",
- "127.0.0.1:9005",
+ "10.10.0.2:26257",
+ "10.10.0.3:26257",
+ "10.10.0.4:26257",
+ "10.10.0.5:26257",
+ "10.10.0.6:26257",
}
for i := 0; i < len(expAddresses); i++ {
- log.Infof("getting next address")
addr := g.getNextBootstrapAddress()
if addr == nil {
t.Errorf("%d: unexpected nil addr when expecting %s", i, expAddresses[i])
diff --git a/gossip/resolver/node_lookup.go b/gossip/resolver/node_lookup.go
index 9026251019f4..a87bfdfd2b36 100644
--- a/gossip/resolver/node_lookup.go
+++ b/gossip/resolver/node_lookup.go
@@ -25,7 +25,6 @@ import (
"github.com/cockroachdb/cockroach/base"
"github.com/cockroachdb/cockroach/util"
- "github.com/cockroachdb/cockroach/util/log"
)
// nodeLookupResolver implements Resolver.
@@ -33,10 +32,9 @@ import (
// address. This is useful for http load balancers which will not forward RPC.
// It is never exhausted.
type nodeLookupResolver struct {
- context *base.Context
- typ string
- addr string
- exhausted bool
+ context *base.Context
+ typ string
+ addr string
// We need our own client so that we may specify timeouts.
httpClient *http.Client
}
@@ -48,20 +46,7 @@ func (nl *nodeLookupResolver) Type() string { return nl.typ }
func (nl *nodeLookupResolver) Addr() string { return nl.addr }
// GetAddress returns a net.Addr or error.
-// Upon errors, we set exhausted=true, then flip it back when called again.
func (nl *nodeLookupResolver) GetAddress() (net.Addr, error) {
- // TODO(marc): this is a bit of a hack to allow the server to start.
- // In single-node setups, this resolver will never return anything since
- // the status handlers are not serving yet. Instead, we specify multiple
- // gossip addresses (--gossip=localhost,http-lb=lb). We need this one to
- // be exhausted from time to time so that we have a chance to hit the fixed address.
- // Remove once the status pages are served before we've established a connection to
- // the gossip network.
- if nl.exhausted {
- nl.exhausted = false
- return nil, util.Errorf("skipping temporarily-exhausted resolver")
- }
-
if nl.httpClient == nil {
tlsConfig, err := nl.context.GetClientTLSConfig()
if err != nil {
@@ -73,14 +58,12 @@ func (nl *nodeLookupResolver) GetAddress() (net.Addr, error) {
}
}
- nl.exhausted = true
// TODO(marc): put common URIs in base and reuse everywhere.
url := fmt.Sprintf("%s://%s/_status/details/local", nl.context.HTTPRequestScheme(), nl.addr)
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return nil, err
}
- log.Infof("querying %s for gossip nodes", url)
resp, err := nl.httpClient.Do(req)
if err != nil {
return nil, err
@@ -106,8 +89,6 @@ func (nl *nodeLookupResolver) GetAddress() (net.Addr, error) {
if err != nil {
return nil, err
}
- nl.exhausted = false
- log.Infof("found gossip node: %+v", addr)
return addr, nil
}
@@ -122,6 +103,6 @@ func resolveAddress(network, address string) (net.Addr, error) {
return nil, util.Errorf("unknown address type: %q", network)
}
-// IsExhausted returns whether the resolver can yield further
-// addresses.
-func (nl *nodeLookupResolver) IsExhausted() bool { return nl.exhausted }
+// IsExhausted always returns true, as there's no way to know how many
+// nodes are behind a load balancer.
+func (nl *nodeLookupResolver) IsExhausted() bool { return true }
diff --git a/gossip/resolver/resolver.go b/gossip/resolver/resolver.go
index 6e8c17a5a360..b40c0dbc4869 100644
--- a/gossip/resolver/resolver.go
+++ b/gossip/resolver/resolver.go
@@ -35,7 +35,6 @@ type Resolver interface {
var validTypes = map[string]struct{}{
"tcp": {},
- "lb": {},
"unix": {},
"http-lb": {},
}
@@ -44,7 +43,6 @@ var validTypes = map[string]struct{}{
// A specification is of the form: [=]
// Network type can be one of:
// - tcp: plain hostname of ip address
-// - lb: load balancer host name or ip: points to an unknown number of backends
// - unix: unix sockets
// - http-lb: http load balancer: queries http(s):///_status/details/local
// for node addresses
diff --git a/gossip/resolver/resolver_test.go b/gossip/resolver/resolver_test.go
index a4a4463ddc7c..9f4da25b6699 100644
--- a/gossip/resolver/resolver_test.go
+++ b/gossip/resolver/resolver_test.go
@@ -21,6 +21,7 @@ import (
"github.com/cockroachdb/cockroach/testutils"
"github.com/cockroachdb/cockroach/util"
+ _ "github.com/cockroachdb/cockroach/util/log" // Needed to import --verbosity
)
var nodeTestBaseContext = testutils.NewNodeTestBaseContext()
@@ -39,7 +40,6 @@ func TestParseResolverSpec(t *testing.T) {
{"127.0.0.1", true, "tcp", "127.0.0.1:26257"},
{"tcp=127.0.0.1", true, "tcp", "127.0.0.1:26257"},
{"tcp=127.0.0.1:23456", true, "tcp", "127.0.0.1:23456"},
- {"lb=127.0.0.1", true, "lb", "127.0.0.1:26257"},
{"unix=/tmp/unix-socket12345", true, "unix", "/tmp/unix-socket12345"},
{"http-lb=localhost:26257", true, "http-lb", "localhost:26257"},
{"http-lb=newhost:1234", true, "http-lb", "newhost:1234"},
@@ -47,7 +47,6 @@ func TestParseResolverSpec(t *testing.T) {
{"http-lb=:", true, "http-lb", def},
{"", false, "", ""},
{"foo=127.0.0.1", false, "", ""},
- {"lb=", false, "", ""},
{"", false, "tcp", ""},
{":", true, "tcp", def},
{"tcp=", false, "tcp", ""},
@@ -83,9 +82,6 @@ func TestGetAddress(t *testing.T) {
{"tcp=127.0.0.1", true, true, "tcp", "127.0.0.1:26257"},
{"tcp=localhost:80", true, true, "tcp", "localhost:80"},
// We should test unresolvable dns too, but this would be fragile.
- {"lb=localhost:80", true, false, "tcp", "localhost:80"},
- {"lb=127.0.0.1:80", true, false, "tcp", "127.0.0.1:80"},
- {"lb=127.0.0.1", true, false, "tcp", "127.0.0.1:26257"},
{"unix=/tmp/foo", true, true, "unix", "/tmp/foo"},
}
diff --git a/main.go b/main.go
index 53376ed89664..bc25c5420815 100644
--- a/main.go
+++ b/main.go
@@ -35,7 +35,7 @@ func main() {
os.Args = append(os.Args, "help")
}
if err := cli.Run(os.Args[1:]); err != nil {
- fmt.Fprintf(os.Stderr, "Failed running command %q: %v\n", os.Args[1:], err)
+ fmt.Fprintf(os.Stderr, "Failed running %q\n", os.Args[1])
os.Exit(1)
}
}
diff --git a/server/context.go b/server/context.go
index 897548171be4..e717f7b32663 100644
--- a/server/context.go
+++ b/server/context.go
@@ -82,9 +82,9 @@ type Context struct {
// Maximum clock offset for the cluster.
MaxOffset time.Duration
- // GossipBootstrap is a comma-separated list of node addresses that
+ // JoinUsing is a comma-separated list of node addresses that
// act as bootstrap hosts for connecting to the gossip network.
- GossipBootstrap string
+ JoinUsing string
// Enables running the node as a single-node in-memory cluster.
EphemeralSingleNode bool
@@ -193,6 +193,11 @@ func (ctx *Context) InitNode() error {
// Initialize attributes.
ctx.NodeAttributes = parseAttributes(ctx.Attrs)
+ // Skip gossip bootstrap if we're running as an ephemeral single node.
+ if ctx.EphemeralSingleNode {
+ return nil
+ }
+
// Get the gossip bootstrap resolvers.
resolvers, err := ctx.parseGossipBootstrapResolvers()
if err != nil {
@@ -207,6 +212,8 @@ func (ctx *Context) InitNode() error {
var errUnsizedInMemStore = errors.New("unable to initialize an in-memory store with capacity 0")
+var errCannotJoinSelf = errors.New("cannot specify --join as node's own address")
+
// initEngine parses the store attributes as a colon-separated list
// and instantiates an engine based on the dir parameter. If dir parses
// to an integer, it's taken to mean an in-memory engine; otherwise,
@@ -225,22 +232,20 @@ func (ctx *Context) initEngine(attrsStr, path string, stopper *stop.Stopper) (en
return engine.NewRocksDB(attrs, path, ctx.CacheSize, ctx.MemtableBudget, stopper), nil
}
-// SelfGossipAddr is a special flag that configures a node to gossip
-// only with itself. This avoids having to specify the port twice for
-// single-node clusters (i.e. once in --addr, and again in --gossip).
-const SelfGossipAddr = "self"
-
// parseGossipBootstrapResolvers parses a comma-separated list of
// gossip bootstrap resolvers.
func (ctx *Context) parseGossipBootstrapResolvers() ([]resolver.Resolver, error) {
var bootstrapResolvers []resolver.Resolver
- addresses := strings.Split(ctx.GossipBootstrap, ",")
+ addresses := strings.Split(ctx.JoinUsing, ",")
for _, address := range addresses {
if len(address) == 0 {
continue
}
- if address == SelfGossipAddr {
- address = ctx.Addr
+ if address == ctx.Addr {
+ if len(addresses) == 1 {
+ return nil, errCannotJoinSelf
+ }
+ continue
}
resolver, err := resolver.NewResolver(&ctx.Context, address)
if err != nil {
diff --git a/server/context_test.go b/server/context_test.go
index 9cc576506ea3..62f4cf886d43 100644
--- a/server/context_test.go
+++ b/server/context_test.go
@@ -30,7 +30,6 @@ func TestParseNodeAttributes(t *testing.T) {
ctx := NewContext()
ctx.Attrs = "attr1=val1::attr2=val2"
ctx.Stores = "mem=1"
- ctx.GossipBootstrap = SelfGossipAddr
stopper := stop.NewStopper()
defer stopper.Stop()
if err := ctx.InitStores(stopper); err != nil {
@@ -45,12 +44,12 @@ func TestParseNodeAttributes(t *testing.T) {
}
}
-// TestParseGossipBootstrapAddrs verifies that GossipBootstrap is
-// parsed correctly.
-func TestParseGossipBootstrapAddrs(t *testing.T) {
+// TestParseJoinUsingAddrs verifies that JoinUsing is parsed
+// correctly.
+func TestParseJoinUsingAddrs(t *testing.T) {
defer leaktest.AfterTest(t)
ctx := NewContext()
- ctx.GossipBootstrap = "localhost:12345,,localhost:23456"
+ ctx.JoinUsing = "localhost:12345,,localhost:23456"
ctx.Stores = "mem=1"
stopper := stop.NewStopper()
defer stopper.Stop()
diff --git a/server/node.go b/server/node.go
index a7ebd899c78a..bd6906327012 100644
--- a/server/node.go
+++ b/server/node.go
@@ -18,6 +18,7 @@ package server
import (
"container/list"
+ "errors"
"fmt"
"net"
"time"
@@ -26,6 +27,7 @@ import (
"github.com/cockroachdb/cockroach/client"
"github.com/cockroachdb/cockroach/gossip"
+ "github.com/cockroachdb/cockroach/gossip/resolver"
"github.com/cockroachdb/cockroach/keys"
"github.com/cockroachdb/cockroach/kv"
"github.com/cockroachdb/cockroach/roachpb"
@@ -40,6 +42,7 @@ import (
"github.com/cockroachdb/cockroach/util/metric"
"github.com/cockroachdb/cockroach/util/stop"
"github.com/cockroachdb/cockroach/util/tracer"
+ "github.com/cockroachdb/cockroach/util/uuid"
"github.com/gogo/protobuf/proto"
)
@@ -51,6 +54,10 @@ const (
publishStatusInterval = 10 * time.Second
)
+// errNeedsBootstrap indicates the node should be used as the seed of
+// a new cluster.
+var errNeedsBootstrap = errors.New("node has no initialized stores and no instructions for joining an existing cluster")
+
// A Node manages a map of stores (by store ID) for which it serves
// traffic. A node is the top-level data structure. There is one node
// instance per process. A node accepts incoming RPCs and services
@@ -100,13 +107,15 @@ func GetBootstrapSchema() sql.MetadataSchema {
return schema
}
-// BootstrapCluster bootstraps a multiple stores using the provided engines and
-// cluster ID. The first bootstrapped store contains a single range spanning
-// all keys. Initial range lookup metadata is populated for the range.
-//
-// Returns a KV client for unittest purposes. Caller should close the returned
-// client.
-func BootstrapCluster(clusterID string, engines []engine.Engine, stopper *stop.Stopper) (*client.DB, error) {
+// bootstrapCluster bootstraps a multiple stores using the provided
+// engines and cluster ID. The first bootstrapped store contains a
+// single range spanning all keys. Initial range lookup metadata is
+// populated for the range. Returns the cluster ID.
+func bootstrapCluster(engines []engine.Engine) (string, error) {
+ clusterID := uuid.NewUUID4().String()
+ stopper := stop.NewStopper()
+ defer stopper.Stop()
+
ctx := storage.StoreContext{}
ctx.ScanInterval = 10 * time.Minute
ctx.Clock = hlc.NewClock(hlc.UnixNano)
@@ -128,12 +137,12 @@ func BootstrapCluster(clusterID string, engines []engine.Engine, stopper *stop.S
// Verify the store isn't already part of a cluster.
if len(s.Ident.ClusterID) > 0 {
- return nil, util.Errorf("storage engine already belongs to a cluster (%s)", s.Ident.ClusterID)
+ return "", util.Errorf("storage engine already belongs to a cluster (%s)", s.Ident.ClusterID)
}
// Bootstrap store to persist the store ident.
if err := s.Bootstrap(sIdent, stopper); err != nil {
- return nil, err
+ return "", err
}
// Create first range, writing directly to engine. Note this does
// not create the range, just its data. Only do this if this is the
@@ -141,11 +150,11 @@ func BootstrapCluster(clusterID string, engines []engine.Engine, stopper *stop.S
if i == 0 {
initialValues := GetBootstrapSchema().GetInitialValues()
if err := s.BootstrapRange(initialValues); err != nil {
- return nil, err
+ return "", err
}
}
if err := s.Start(stopper); err != nil {
- return nil, err
+ return "", err
}
stores.AddStore(s)
@@ -153,16 +162,16 @@ func BootstrapCluster(clusterID string, engines []engine.Engine, stopper *stop.S
// Initialize node and store ids. Only initialize the node once.
if i == 0 {
if nodeID, err := allocateNodeID(ctx.DB); nodeID != sIdent.NodeID || err != nil {
- return nil, util.Errorf("expected to initialize node id allocator to %d, got %d: %s",
+ return "", util.Errorf("expected to initialize node id allocator to %d, got %d: %s",
sIdent.NodeID, nodeID, err)
}
}
if storeID, err := allocateStoreIDs(sIdent.NodeID, 1, ctx.DB); storeID != sIdent.StoreID || err != nil {
- return nil, util.Errorf("expected to initialize store id allocator to %d, got %d: %s",
+ return "", util.Errorf("expected to initialize store id allocator to %d, got %d: %s",
sIdent.StoreID, storeID, err)
}
}
- return ctx.DB, nil
+ return clusterID, nil
}
// NewNode returns a new instance of Node.
@@ -229,8 +238,7 @@ func (n *Node) initNodeID(id roachpb.NodeID) {
// start starts the node by registering the storage instance for the
// RPC service "Node" and initializing stores for each specified
// engine. Launches periodic store gossiping in a goroutine.
-func (n *Node) start(rpcServer *rpc.Server, addr net.Addr, engines []engine.Engine,
- attrs roachpb.Attributes) error {
+func (n *Node) start(rpcServer *rpc.Server, addr net.Addr, engines []engine.Engine, attrs roachpb.Attributes) error {
n.initDescriptor(addr, attrs)
const method = "Node.Batch"
if err := rpcServer.Register(method, n.executeCmd, &roachpb.BatchRequest{}); err != nil {
@@ -242,7 +250,28 @@ func (n *Node) start(rpcServer *rpc.Server, addr net.Addr, engines []engine.Engi
// Initialize stores, including bootstrapping new ones.
if err := n.initStores(engines, n.stopper); err != nil {
- return err
+ if err == errNeedsBootstrap {
+ // This node has no initialized stores and no way to connect to
+ // an existing cluster, so we bootstrap it.
+ clusterID, err := bootstrapCluster(engines)
+ if err != nil {
+ return err
+ }
+ log.Infof("**** cluster %s has been created", clusterID)
+ log.Infof("**** add additional nodes by specifying --join=%s", addr)
+ // Make sure we add the node as a resolver.
+ selfResolver, err := resolver.NewResolverFromAddress(addr)
+ if err != nil {
+ return err
+ }
+ n.ctx.Gossip.SetResolvers([]resolver.Resolver{selfResolver})
+ // After bootstrapping, try again to initialize the stores.
+ if err := n.initStores(engines, n.stopper); err != nil {
+ return err
+ }
+ } else {
+ return err
+ }
}
n.startedAt = n.ctx.Clock.Now().WallTime
@@ -294,6 +323,12 @@ func (n *Node) initStores(engines []engine.Engine, stopper *stop.Stopper) error
n.stores.AddStore(s)
}
+ // If there are no initialized stores and no gossip resolvers,
+ // bootstrap this node as the seed of a new cluster.
+ if n.stores.GetStoreCount() == 0 && !n.ctx.Gossip.HasResolvers() {
+ return errNeedsBootstrap
+ }
+
// Verify all initialized stores agree on cluster and node IDs.
if err := n.validateStores(); err != nil {
return err
diff --git a/server/node_test.go b/server/node_test.go
index 2cb834ebcbcc..d2fdb964ef11 100644
--- a/server/node_test.go
+++ b/server/node_test.go
@@ -124,17 +124,16 @@ func (s keySlice) Less(i, j int) bool { return bytes.Compare(s[i], s[j]) < 0 }
func TestBootstrapCluster(t *testing.T) {
defer leaktest.AfterTest(t)
stopper := stop.NewStopper()
+ defer stopper.Stop()
e := engine.NewInMem(roachpb.Attributes{}, 1<<20, stopper)
- localDB, err := BootstrapCluster("cluster-1", []engine.Engine{e}, stopper)
- if err != nil {
+ if _, err := bootstrapCluster([]engine.Engine{e}); err != nil {
t.Fatal(err)
}
- defer stopper.Stop()
- // Scan the complete contents of the local database.
- rows, pErr := localDB.Scan(keys.LocalMax, roachpb.KeyMax, 0)
- if pErr != nil {
- t.Fatal(pErr.GoError())
+ // Scan the complete contents of the local database directly from the engine.
+ rows, _, err := engine.MVCCScan(e, keys.LocalMax, roachpb.KeyMax, 0, roachpb.MaxTimestamp, true, nil)
+ if err != nil {
+ t.Fatal(err)
}
var foundKeys keySlice
for _, kv := range rows {
@@ -169,11 +168,9 @@ func TestBootstrapNewStore(t *testing.T) {
engineStopper := stop.NewStopper()
defer engineStopper.Stop()
e := engine.NewInMem(roachpb.Attributes{}, 1<<20, engineStopper)
- eagerStopper := stop.NewStopper()
- if _, err := BootstrapCluster("cluster-1", []engine.Engine{e}, eagerStopper); err != nil {
+ if _, err := bootstrapCluster([]engine.Engine{e}); err != nil {
t.Fatal(err)
}
- eagerStopper.Stop()
// Start a new node with two new stores which will require bootstrapping.
engines := []engine.Engine{
@@ -210,12 +207,9 @@ func TestNodeJoin(t *testing.T) {
engineStopper := stop.NewStopper()
defer engineStopper.Stop()
e := engine.NewInMem(roachpb.Attributes{}, 1<<20, engineStopper)
- stopper := stop.NewStopper()
- _, err := BootstrapCluster("cluster-1", []engine.Engine{e}, stopper)
- if err != nil {
+ if _, err := bootstrapCluster([]engine.Engine{e}); err != nil {
t.Fatal(err)
}
- stopper.Stop()
// Start the bootstrap node.
engines1 := []engine.Engine{e}
@@ -264,12 +258,9 @@ func TestCorruptedClusterID(t *testing.T) {
engineStopper := stop.NewStopper()
e := engine.NewInMem(roachpb.Attributes{}, 1<<20, engineStopper)
defer engineStopper.Stop()
- eagerStopper := stop.NewStopper()
- _, err := BootstrapCluster("cluster-1", []engine.Engine{e}, eagerStopper)
- if err != nil {
+ if _, err := bootstrapCluster([]engine.Engine{e}); err != nil {
t.Fatal(err)
}
- eagerStopper.Stop()
// Set the cluster ID to an empty string.
sIdent := roachpb.StoreIdent{
@@ -277,7 +268,7 @@ func TestCorruptedClusterID(t *testing.T) {
NodeID: 1,
StoreID: 1,
}
- if err = engine.MVCCPutProto(e, nil, keys.StoreIdentKey(), roachpb.ZeroTimestamp, nil, &sIdent); err != nil {
+ if err := engine.MVCCPutProto(e, nil, keys.StoreIdentKey(), roachpb.ZeroTimestamp, nil, &sIdent); err != nil {
t.Fatal(err)
}
diff --git a/server/server.go b/server/server.go
index dbb628acf17a..f46b5a31f137 100644
--- a/server/server.go
+++ b/server/server.go
@@ -31,7 +31,6 @@ import (
snappy "github.com/cockroachdb/c-snappy"
"github.com/cockroachdb/cockroach/client"
"github.com/cockroachdb/cockroach/gossip"
- "github.com/cockroachdb/cockroach/gossip/resolver"
"github.com/cockroachdb/cockroach/keys"
"github.com/cockroachdb/cockroach/kv"
crpc "github.com/cockroachdb/cockroach/rpc"
@@ -206,10 +205,9 @@ func NewServer(ctx *Context, stopper *stop.Stopper) (*Server, error) {
return s, nil
}
-// Start runs the RPC and HTTP servers, starts the gossip instance (if
-// selfBootstrap is true, uses the rpc server's address as the gossip
-// bootstrap), and starts the node using the supplied engines slice.
-func (s *Server) Start(selfBootstrap bool) error {
+// Start starts the server on the specified port, starts gossip and
+// initializes the node using the engines from the server's context.
+func (s *Server) Start() error {
tlsConfig, err := s.ctx.GetServerTLSConfig()
if err != nil {
return err
@@ -222,18 +220,8 @@ func (s *Server) Start(selfBootstrap bool) error {
}
s.listener = ln
-
addr := ln.Addr()
- addrStr := addr.String()
- // Handle self-bootstrapping case for a single node.
- if selfBootstrap {
- selfResolver, err := resolver.NewResolver(&s.ctx.Context, addrStr)
- if err != nil {
- return err
- }
- s.gossip.SetResolvers([]resolver.Resolver{selfResolver})
- }
s.gossip.Start(s.rpc, addr, s.stopper)
if err := s.node.start(s.rpc, addr, s.ctx.Engines, s.ctx.NodeAttributes); err != nil {
diff --git a/server/server_test.go b/server/server_test.go
index ae89a0866337..b4002a39a0ed 100644
--- a/server/server_test.go
+++ b/server/server_test.go
@@ -79,7 +79,7 @@ func TestInitEngine(t *testing.T) {
}
for _, spec := range testCases {
ctx := NewContext()
- ctx.Stores, ctx.GossipBootstrap = spec.key, SelfGossipAddr
+ ctx.Stores = spec.key
if err := ctx.InitStores(stopper); err == nil {
engines := ctx.Engines
if spec.wantError {
@@ -111,7 +111,6 @@ func TestInitEngines(t *testing.T) {
ctx := NewContext()
ctx.Stores = fmt.Sprintf("mem=1000,mem:ddr3=1000,ssd=%s,hdd:7200rpm=%s", tmp[0], tmp[1])
- ctx.GossipBootstrap = SelfGossipAddr
expEngines := []struct {
attrs roachpb.Attributes
isMem bool
diff --git a/server/status_test.go b/server/status_test.go
index 185d34b2472a..e3c11684c330 100644
--- a/server/status_test.go
+++ b/server/status_test.go
@@ -261,7 +261,7 @@ func startServer(t *testing.T) TestServer {
ts.Ctx = NewTestContext()
ts.StoresPerNode = 3
if err := ts.Start(); err != nil {
- t.Fatal(err)
+ t.Fatalf("failed to start test server: %s", err)
}
// Make sure the range is spun up with an arbitrary read command. We do not
diff --git a/server/testserver.go b/server/testserver.go
index 22b3382271d5..3dc94dabb223 100644
--- a/server/testserver.go
+++ b/server/testserver.go
@@ -98,8 +98,7 @@ func NewTestContext() *Context {
//
type TestServer struct {
// Ctx is the context used by this server.
- Ctx *Context
- SkipBootstrap bool
+ Ctx *Context
// server is the embedded Cockroach server struct.
*Server
StoresPerNode int
@@ -195,24 +194,15 @@ func (ts *TestServer) StartWithStopper(stopper *stop.Stopper) error {
return err
}
- // Ensure we have the correct number of engines. Add in in-memory ones where
- // needed. There must be at least one store/engine.
+ // Ensure we have the correct number of engines. Add in-memory ones where
+ // needed. There must be at least one store/engine.
if ts.StoresPerNode < 1 {
ts.StoresPerNode = 1
}
for i := len(ts.Ctx.Engines); i < ts.StoresPerNode; i++ {
ts.Ctx.Engines = append(ts.Ctx.Engines, engine.NewInMem(roachpb.Attributes{}, 100<<20, ts.Server.stopper))
}
-
- if !ts.SkipBootstrap {
- stopper := stop.NewStopper()
- _, err := BootstrapCluster("cluster-1", ts.Ctx.Engines, stopper)
- if err != nil {
- return util.Errorf("could not bootstrap cluster: %s", err)
- }
- stopper.Stop()
- }
- if err := ts.Server.Start(true); err != nil {
+ if err := ts.Server.Start(); err != nil {
return err
}
diff --git a/storage/engine/rocksdb.go b/storage/engine/rocksdb.go
index 1818b975e240..28653ad3c11e 100644
--- a/storage/engine/rocksdb.go
+++ b/storage/engine/rocksdb.go
@@ -117,7 +117,7 @@ func (r *RocksDB) Open() error {
return util.Errorf("could not open rocksdb instance: %s", err)
}
- // Start a gorountine that will finish when the underlying handle
+ // Start a goroutine that will finish when the underlying handle
// is deallocated. This is used to check a leak in tests.
go func() {
<-r.deallocated
diff --git a/storage/stores.go b/storage/stores.go
index 2a5c928d9892..c34ef3c388a6 100644
--- a/storage/stores.go
+++ b/storage/stores.go
@@ -20,6 +20,7 @@ import (
"fmt"
"sync"
+ "github.com/gogo/protobuf/proto"
"golang.org/x/net/context"
"github.com/cockroachdb/cockroach/client"
@@ -44,6 +45,7 @@ type Stores struct {
mu sync.RWMutex // Protects storeMap and addrs
storeMap map[roachpb.StoreID]*Store // Map from StoreID to Store
biLatestTS roachpb.Timestamp // Timestamp of gossip bootstrap info
+ latestBI *gossip.BootstrapInfo // Latest cached bootstrap info
}
var _ client.Sender = &Stores{} // Stores implements the client.Sender interface
@@ -96,9 +98,8 @@ func (ls *Stores) AddStore(s *Store) {
// If we've already read the gossip bootstrap info, ensure that
// all stores have the most recent values.
if !ls.biLatestTS.Equal(roachpb.ZeroTimestamp) {
- // ReadBootstrapInfo calls updateAllBootstrapInfos.
- if err := ls.readBootstrapInfoLocked(&gossip.BootstrapInfo{}); err != nil {
- log.Errorf("failed to update bootstrap info on stores: %s", err)
+ if err := ls.updateBootstrapInfo(ls.latestBI); err != nil {
+ log.Errorf("failed to update bootstrap info on newly added store: %s", err)
}
}
}
@@ -256,40 +257,22 @@ func (ls *Stores) RangeLookup(key roachpb.RKey, _ *roachpb.RangeDescriptor, cons
func (ls *Stores) ReadBootstrapInfo(bi *gossip.BootstrapInfo) error {
ls.mu.RLock()
defer ls.mu.RUnlock()
- return ls.readBootstrapInfoLocked(bi)
-}
-
-func (ls *Stores) readBootstrapInfoLocked(bi *gossip.BootstrapInfo) error {
latestTS := roachpb.ZeroTimestamp
- timestamps := map[roachpb.StoreID]roachpb.Timestamp{}
- // Find the most recent bootstrap info, collecting timestamps for
- // each store along the way.
- for id, s := range ls.storeMap {
+ // Find the most recent bootstrap info.
+ for _, s := range ls.storeMap {
var storeBI gossip.BootstrapInfo
ok, err := engine.MVCCGetProto(s.engine, keys.StoreGossipKey(), roachpb.ZeroTimestamp, true, nil, &storeBI)
if err != nil {
return err
}
- timestamps[id] = storeBI.Timestamp
if ok && latestTS.Less(storeBI.Timestamp) {
latestTS = storeBI.Timestamp
*bi = storeBI
}
}
-
- // Update all stores with an earlier timestamp.
- for id, s := range ls.storeMap {
- if timestamps[id].Less(latestTS) {
- if err := engine.MVCCPutProto(s.engine, nil, keys.StoreGossipKey(), roachpb.ZeroTimestamp, nil, bi); err != nil {
- return err
- }
- log.Infof("updated gossip bootstrap info to %s", s)
- }
- }
-
- ls.biLatestTS = latestTS
- return nil
+ log.Infof("read %d node addresses from persistent storage", len(bi.Addresses))
+ return ls.updateBootstrapInfo(bi)
}
// WriteBootstrapInfo implements the gossip.Storage interface. Write
@@ -299,13 +282,26 @@ func (ls *Stores) readBootstrapInfoLocked(bi *gossip.BootstrapInfo) error {
func (ls *Stores) WriteBootstrapInfo(bi *gossip.BootstrapInfo) error {
ls.mu.RLock()
defer ls.mu.RUnlock()
- ls.biLatestTS = ls.clock.Now()
- bi.Timestamp = ls.biLatestTS
+ bi.Timestamp = ls.clock.Now()
+ if err := ls.updateBootstrapInfo(bi); err != nil {
+ return err
+ }
+ log.Infof("wrote %d node addresses to persistent storage", len(bi.Addresses))
+ return nil
+}
+
+func (ls *Stores) updateBootstrapInfo(bi *gossip.BootstrapInfo) error {
+ if bi.Timestamp.Less(ls.biLatestTS) {
+ return nil
+ }
+ // Update the latest timestamp and set cached version.
+ ls.biLatestTS = bi.Timestamp
+ ls.latestBI = proto.Clone(bi).(*gossip.BootstrapInfo)
+ // Update all stores.
for _, s := range ls.storeMap {
if err := engine.MVCCPutProto(s.engine, nil, keys.StoreGossipKey(), roachpb.ZeroTimestamp, nil, bi); err != nil {
return err
}
- log.Infof("wrote gossip bootstrap info to %s", s)
}
return nil
}