Skip to content

Commit

Permalink
rpc,security: use the tenant client cert for pod-pod communication
Browse files Browse the repository at this point in the history
Release note (security update): Multitenant SQL servers now reuse
the tenant client certificate (`client-tenant.NN.crt`) for SQL-to-SQL
communication. Existing deployments must regenerate the certificates
with dual purpose (client and server authentication).
  • Loading branch information
knz committed Oct 6, 2021
1 parent 04d6d93 commit d860f6c
Show file tree
Hide file tree
Showing 31 changed files with 530 additions and 464 deletions.
11 changes: 7 additions & 4 deletions pkg/cli/mt_cert.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ If the CA certificate exists and --overwrite is true, the new CA certificate is
// A createClientCert command generates a client certificate and stores it
// in the cert directory under <username>.crt and key under <username>.key.
var mtCreateTenantClientCertCmd = &cobra.Command{
Use: "create-tenant-client --certs-dir=<path to cockroach certs dir> --ca-key=<path-to-ca-key> <tenant-id>",
Use: "create-tenant-client --certs-dir=<path to cockroach certs dir> --ca-key=<path-to-ca-key> <tenant-id> <host 1> <host 2> ... <host N>",
Short: "create tenant client certificate and key",
Long: `
Generate a tenant client certificate "<certs-dir>/client-tenant.<tenant-id>.crt" and key
Expand All @@ -60,19 +60,22 @@ Requires a CA cert in "<certs-dir>/ca-client-tenant.crt" and matching key in "--
If "ca-client-tenant.crt" contains more than one certificate, the first is used.
Creation fails if the CA expiration time is before the desired certificate expiration.
`,
Args: cobra.ExactArgs(1),
Args: cobra.MinimumNArgs(2),
RunE: clierrorplus.MaybeDecorateError(
func(cmd *cobra.Command, args []string) error {
tenantID, err := strconv.ParseUint(args[0], 10, 64)
tenantIDs := args[0]
hostAddrs := args[1:]
tenantID, err := strconv.ParseUint(tenantIDs, 10, 64)
if err != nil {
return errors.Wrapf(err, "%s is invalid uint64", args[0])
return errors.Wrapf(err, "%s is invalid uint64", tenantIDs)
}
cp, err := security.CreateTenantClientPair(
certCtx.certsDir,
certCtx.caKey,
certCtx.keySize,
certCtx.certificateLifetime,
tenantID,
hostAddrs,
)
if err != nil {
return errors.Wrap(
Expand Down
42 changes: 28 additions & 14 deletions pkg/rpc/auth.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,21 +121,35 @@ func (a kvAuth) authenticate(ctx context.Context) (roachpb.TenantID, error) {
return roachpb.TenantID{}, err
}

subj := tlsInfo.State.PeerCertificates[0].Subject
if security.Contains(subj.OrganizationalUnit, security.TenantsOU) {
// Tenant authentication.
return tenantFromCommonName(subj.CommonName)
}

// KV auth.
if a.tenant.tenantID == roachpb.SystemTenantID {
// This node is a KV node.
//
//
// Is this a connection from a SQL tenant server?
subj := tlsInfo.State.PeerCertificates[0].Subject
if security.Contains(subj.OrganizationalUnit, security.TenantsOU) {
// Incoming connection originating from a tenant SQL server,
// into a KV node.
return tenantFromCommonName(subj.CommonName)
}

// TODO(benesch): the vast majority of RPCs should be limited to just
// NodeUser. This is not a security concern, as RootUser has access to
// read and write all data, merely good hygiene. For example, there is
// no reason to permit the root user to send raw Raft RPCs.
if !security.Contains(certUsers, security.NodeUser) &&
!security.Contains(certUsers, security.RootUser) {
return roachpb.TenantID{}, authErrorf("user %s is not allowed to perform this RPC", certUsers)
// Connection is from another KV node.
//
// TODO(benesch): the vast majority of RPCs should be limited to just
// NodeUser. This is not a security concern, as RootUser has access to
// read and write all data, merely good hygiene. For example, there is
// no reason to permit the root user to send raw Raft RPCs.
if !security.Contains(certUsers, security.NodeUser) &&
!security.Contains(certUsers, security.RootUser) {
return roachpb.TenantID{}, authErrorf("user %s is not allowed to perform this RPC", certUsers)
}
} else {
// This node is a SQL tenant server.
// Is this a connection from another SQL tenant server?
subj := tlsInfo.State.PeerCertificates[0].Subject
if !security.Contains(subj.OrganizationalUnit, security.TenantsOU) {
return roachpb.TenantID{}, authErrorf("user %s is not allowed to perform this RPC", certUsers)
}
}
return roachpb.TenantID{}, nil
}
2 changes: 2 additions & 0 deletions pkg/rpc/auth_tenant.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ import (
// server, that is, it ensures that the request only accesses resources
// available to the tenant.
type tenantAuthorizer struct {
// tenantID is the tenant ID for the current node.
// Equals SystemTenantID when running a KV node.
tenantID roachpb.TenantID
}

Expand Down
2 changes: 1 addition & 1 deletion pkg/rpc/auth_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ func TestTenantFromCert(t *testing.T) {
p := peer.Peer{AuthInfo: tlsInfo}
ctx := peer.NewContext(context.Background(), &p)

tenID, err := kvAuth{}.authenticate(ctx)
tenID, err := kvAuth{tenant: tenantAuthorizer{tenantID: roachpb.SystemTenantID}}.authenticate(ctx)

if tc.expErr == "" {
require.Equal(t, tc.expTenID, tenID)
Expand Down
3 changes: 3 additions & 0 deletions pkg/rpc/tls.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@ type SecurityContext struct {
func MakeSecurityContext(
cfg *base.Config, tlsSettings security.TLSSettings, tenID roachpb.TenantID,
) SecurityContext {
if tenID.ToUint64() == 0 {
panic(errors.AssertionFailedf("programming error: tenant ID not defined"))
}
return SecurityContext{
CertsLocator: security.MakeCertsLocator(cfg.SSLCertsDir),
TLSSettings: tlsSettings,
Expand Down
49 changes: 39 additions & 10 deletions pkg/security/certificate_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -774,9 +774,17 @@ func (cm *CertificateManager) getEmbeddedServerTLSConfig(
return nil, err
}

nodeCert, err := cm.getNodeCertLocked()
if err != nil {
return nil, err
var nodeCert *CertInfo
if cm.tenantClientCert == nil {
nodeCert, err = cm.getNodeCertLocked()
if err != nil {
return nil, err
}
} else {
nodeCert, err = cm.getTenantClientCertLocked()
if err != nil {
return nil, err
}
}

clientCA, err := cm.getClientCACertLocked()
Expand Down Expand Up @@ -862,7 +870,10 @@ func (cm *CertificateManager) getCACertLocked() (*CertInfo, error) {
func (cm *CertificateManager) getClientCACertLocked() (*CertInfo, error) {
if cm.clientCACert == nil {
// No client CA: use general CA.
return cm.getCACertLocked()
if cm.tenantClientCert == nil {
return cm.getCACertLocked()
}
return cm.getTenantClientCACertLocked()
}

if err := checkCertIsValid(cm.clientCACert); err != nil {
Expand All @@ -877,7 +888,10 @@ func (cm *CertificateManager) getClientCACertLocked() (*CertInfo, error) {
func (cm *CertificateManager) getUICACertLocked() (*CertInfo, error) {
if cm.uiCACert == nil {
// No UI CA: use general CA.
return cm.getCACertLocked()
if cm.tenantClientCert == nil {
return cm.getCACertLocked()
}
return cm.getTenantClientCACertLocked()
}

if err := checkCertIsValid(cm.uiCACert); err != nil {
Expand All @@ -901,7 +915,10 @@ func (cm *CertificateManager) getNodeCertLocked() (*CertInfo, error) {
func (cm *CertificateManager) getUICertLocked() (*CertInfo, error) {
if cm.uiCert == nil {
// No UI certificate: use node certificate.
return cm.getNodeCertLocked()
if cm.tenantClientCert == nil {
return cm.getNodeCertLocked()
}
return cm.getTenantClientCertLocked()
}
if err := checkCertIsValid(cm.uiCert); err != nil {
return nil, makeError(err, "problem with UI certificate")
Expand All @@ -928,7 +945,10 @@ func (cm *CertificateManager) getClientCertLocked(user SQLUsername) (*CertInfo,
func (cm *CertificateManager) getNodeClientCertLocked() (*CertInfo, error) {
if cm.nodeClientCert == nil {
// No specific client cert for 'node': use multi-purpose node cert.
return cm.getNodeCertLocked()
if cm.tenantClientCert == nil {
return cm.getNodeCertLocked()
}
return cm.getTenantClientCertLocked()
}

if err := checkCertIsValid(cm.nodeClientCert); err != nil {
Expand Down Expand Up @@ -1002,9 +1022,18 @@ func (cm *CertificateManager) GetClientTLSConfig(user SQLUsername) (*tls.Config,
defer cm.mu.Unlock()

// We always need the CA cert.
ca, err := cm.getCACertLocked()
if err != nil {
return nil, err
var ca *CertInfo
var err error
if cm.tenantClientCert == nil {
ca, err = cm.getCACertLocked()
if err != nil {
return nil, err
}
} else {
ca, err = cm.getTenantClientCACertLocked()
if err != nil {
return nil, err
}
}

if !user.IsNodeUser() {
Expand Down
2 changes: 1 addition & 1 deletion pkg/security/certificate_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ func TestManagerWithPrincipalMap(t *testing.T) {

defer func() { _ = security.SetCertPrincipalMap(nil) }()
defer func() {
_ = os.Setenv("COCKROACH_CERT_NODE_USER", security.NodeUser)
_ = os.Unsetenv("COCKROACH_CERT_NODE_USER")
envutil.ClearEnvCache()
}()
require.NoError(t, os.Setenv("COCKROACH_CERT_NODE_USER", "node.crdb.io"))
Expand Down
8 changes: 6 additions & 2 deletions pkg/security/certs.go
Original file line number Diff line number Diff line change
Expand Up @@ -462,7 +462,11 @@ type TenantClientPair struct {
//
// To write the returned TenantClientPair to disk, use WriteTenantClientPair.
func CreateTenantClientPair(
certsDir, caKeyPath string, keySize int, lifetime time.Duration, tenantIdentifier uint64,
certsDir, caKeyPath string,
keySize int,
lifetime time.Duration,
tenantIdentifier uint64,
hosts []string,
) (*TenantClientPair, error) {
if len(caKeyPath) == 0 {
return nil, errors.New("the path to the CA key is required")
Expand Down Expand Up @@ -501,7 +505,7 @@ func CreateTenantClientPair(
}

clientCert, err := GenerateTenantClientCert(
caCert, caPrivateKey, clientKey.Public(), lifetime, tenantIdentifier,
caCert, caPrivateKey, clientKey.Public(), lifetime, tenantIdentifier, hosts,
)
if err != nil {
return nil, errors.Errorf("error creating tenant certificate and key: %s", err)
Expand Down
2 changes: 1 addition & 1 deletion pkg/security/certs_tenant_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ func makeTenantCerts(t *testing.T, tenant uint64) (certsDir string, cleanup func

// That dedicated service can make client certs for a tenant as follows:
tenantCerts, err := security.CreateTenantClientPair(
certsDir, tenantCAKey, testKeySize, 48*time.Hour, tenant,
certsDir, tenantCAKey, testKeySize, 48*time.Hour, tenant, []string{"127.0.0.1"},
)
require.NoError(t, err)
// We write the certs to disk, though in production this would not necessarily
Expand Down
3 changes: 2 additions & 1 deletion pkg/security/certs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ func TestGenerateTenantCerts(t *testing.T) {
testKeySize,
time.Hour,
999,
[]string{"127.0.0.1"},
)
require.NoError(t, err)
require.NoError(t, security.WriteTenantClientPair(certsDir, cp, false))
Expand Down Expand Up @@ -253,7 +254,7 @@ func generateBaseCerts(certsDir string) error {
}

tcp, err := security.CreateTenantClientPair(certsDir, caKey,
testKeySize, time.Hour*48, 10)
testKeySize, time.Hour*48, 10, []string{"127.0.0.1"})
if err != nil {
return err
}
Expand Down
Loading

0 comments on commit d860f6c

Please sign in to comment.