Skip to content

Commit

Permalink
auth: fix cluster init deadlock (#49638)
Browse files Browse the repository at this point in the history
* auth: fix cluster init deadlock

If the bootstrapped resources are created but a failure occurs later
in the cluster init process subsequent runs of the cluster initialization
will fail due to the existing bootstrap resources.

* add test for new behavior

* add test for cluster init failures recovery

* Update lib/services/local/resource_test.go

Co-authored-by: Zac Bergquist <zac.bergquist@goteleport.com>

* reorder assertion values

---------

Co-authored-by: Zac Bergquist <zac.bergquist@goteleport.com>
  • Loading branch information
dboslee and zmb3 committed Dec 9, 2024
1 parent 35d0b06 commit ed4fbe4
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 3 deletions.
34 changes: 34 additions & 0 deletions lib/auth/init_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -493,6 +493,40 @@ func keysIn[K comparable, V any](m map[K]V) []K {
return result
}

type failingTrustInternal struct {
services.TrustInternal
}

func (t *failingTrustInternal) CreateCertAuthority(ctx context.Context, ca types.CertAuthority) error {
return trace.Errorf("error")
}

// TestInitCertFailureRecovery ensures the auth server is able to recover from
// a failure in the cert creation process.
func TestInitCertFailureRecovery(t *testing.T) {
ctx := context.Background()
cap, err := types.NewAuthPreference(types.AuthPreferenceSpecV2{
Type: constants.SAML,
})
require.NoError(t, err)

conf := setupConfig(t)

// BootstrapResources have lead to an unrecoverable state in the past.
// See https://github.com/gravitational/teleport/pull/49638.
conf.BootstrapResources = []types.Resource{cap}
_, err = Init(ctx, conf, func(s *Server) error {
s.TrustInternal = &failingTrustInternal{
TrustInternal: s.TrustInternal,
}
return nil
})
require.Error(t, err)

_, err = Init(ctx, conf)
require.NoError(t, err)
}

// TestPresets tests behavior of presets
func TestPresets(t *testing.T) {
ctx := context.Background()
Expand Down
9 changes: 6 additions & 3 deletions lib/services/local/resource.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ import (
)

// CreateResources attempts to dynamically create the supplied resources.
// This function returns `trace.AlreadyExistsError` if one or more resources
// would be overwritten, and `trace.NotImplementedError` if any resources
// If any resources already exist they are skipped and not overwritten.
// This function returns a `trace.NotImplementedError` if any resources
// are of an unsupported type (see `itemsFromResources(...)`).
//
// NOTE: This function is non-atomic and performs no internal synchronization;
Expand All @@ -43,6 +43,7 @@ func CreateResources(ctx context.Context, b backend.Backend, resources ...types.
if err != nil {
return trace.Wrap(err)
}
<<<<<<< HEAD
// ensure all items do not exist before continuing.
for _, item := range items {
_, err = b.Get(ctx, item.Key)
Expand All @@ -54,9 +55,11 @@ func CreateResources(ctx context.Context, b backend.Backend, resources ...types.
}
}
// create all items.
=======
>>>>>>> 02cadb4c14 (auth: fix cluster init deadlock (#49638))
for _, item := range items {
_, err := b.Create(ctx, item)
if err != nil {
if !trace.IsAlreadyExists(err) && err != nil {
return trace.Wrap(err)
}
}
Expand Down
29 changes: 29 additions & 0 deletions lib/services/local/resource_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import (
"github.com/stretchr/testify/require"
"golang.org/x/crypto/bcrypt"

"github.com/gravitational/teleport/api/constants"
apidefaults "github.com/gravitational/teleport/api/defaults"
"github.com/gravitational/teleport/api/types"
"github.com/gravitational/teleport/lib/services"
Expand All @@ -55,6 +56,34 @@ func TestCreateResourcesProvisionToken(t *testing.T) {
require.Empty(t, cmp.Diff(token, fetchedToken, cmpopts.IgnoreFields(types.Metadata{}, "Revision")))
}

func TestCreateResource(t *testing.T) {
t.Parallel()
ctx := context.Background()
tt := setupServicesContext(ctx, t)
cap, err := types.NewAuthPreference(types.AuthPreferenceSpecV2{
Type: constants.Local,
})
require.NoError(t, err)

// Check that the initial call to CreateResources creates the given resources.
s, err := NewClusterConfigurationService(tt.bk)
require.NoError(t, err)
err = CreateResources(ctx, tt.bk, cap)
require.NoError(t, err)
got, err := s.GetAuthPreference(ctx)
require.NoError(t, err)
require.Equal(t, cap.GetType(), got.GetType())

// Check that already exists errors are ignored and the resource is not
// updated.
cap.SetType(constants.SAML)
err = CreateResources(ctx, tt.bk, cap)
require.NoError(t, err)
got, err = s.GetAuthPreference(ctx)
require.NoError(t, err)
require.NotEqual(t, cap.GetType(), got.GetType())
}

func TestUserResource(t *testing.T) {
t.Parallel()
ctx := context.Background()
Expand Down

0 comments on commit ed4fbe4

Please sign in to comment.