diff --git a/internal/integration/api/watchdog.go b/internal/integration/api/watchdog.go new file mode 100644 index 0000000000..4aa5cf5a1e --- /dev/null +++ b/internal/integration/api/watchdog.go @@ -0,0 +1,134 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//go:build integration_api + +package api + +import ( + "bytes" + "context" + "io" + "path/filepath" + "time" + + "github.com/cosi-project/runtime/pkg/resource" + "github.com/cosi-project/runtime/pkg/state" + + "github.com/siderolabs/talos/internal/integration/base" + "github.com/siderolabs/talos/pkg/machinery/client" + "github.com/siderolabs/talos/pkg/machinery/config/types/runtime" + runtimeres "github.com/siderolabs/talos/pkg/machinery/resources/runtime" +) + +// WatchdogSuite ... +type WatchdogSuite struct { + base.APISuite + + ctx context.Context //nolint:containedctx + ctxCancel context.CancelFunc +} + +// SuiteName ... +func (suite *WatchdogSuite) SuiteName() string { + return "api.WatchdogSuite" +} + +// SetupTest ... +func (suite *WatchdogSuite) SetupTest() { + suite.ctx, suite.ctxCancel = context.WithTimeout(context.Background(), 1*time.Minute) + + if suite.Cluster == nil || suite.Cluster.Provisioner() != "qemu" { + suite.T().Skip("skipping watchdog test since provisioner is not qemu") + } +} + +// TearDownTest ... +func (suite *WatchdogSuite) TearDownTest() { + if suite.ctxCancel != nil { + suite.ctxCancel() + } +} + +func (suite *WatchdogSuite) readWatchdogSysfs(nodeCtx context.Context, watchdog, property string) string { //nolint:unparam + r, err := suite.Client.Read(nodeCtx, filepath.Join("/sys/class/watchdog", watchdog, property)) + suite.Require().NoError(err) + + value, err := io.ReadAll(r) + suite.Require().NoError(err) + + suite.Require().NoError(r.Close()) + + return string(bytes.TrimSpace(value)) +} + +// TestWatchdogSysfs sets up the watchdog and validates its parameters from the /sys/class/watchdog. +func (suite *WatchdogSuite) TestWatchdogSysfs() { + // pick up a random node to test the watchdog on, and use it throughout the test + node := suite.RandomDiscoveredNodeInternalIP() + + suite.T().Logf("testing watchdog on node %s", node) + + // build a Talos API context which is tied to the node + nodeCtx := client.WithNode(suite.ctx, node) + + // pick a watchdog + const watchdog = "watchdog0" + + cfgDocument := runtime.NewWatchdogTimerV1Alpha1() + cfgDocument.WatchdogDevice = "/dev/" + watchdog + cfgDocument.WatchdogTimeout = 120 * time.Second + + // deactivate the watchdog + suite.RemoveMachineConfigDocuments(nodeCtx, cfgDocument.MetaKind) + + _, err := suite.Client.COSI.WatchFor(nodeCtx, runtimeres.NewWatchdogTimerStatus(runtimeres.WatchdogTimerConfigID).Metadata(), state.WithEventTypes(state.Destroyed)) + suite.Require().NoError(err) + + wdState := suite.readWatchdogSysfs(nodeCtx, watchdog, "state") + suite.Require().Equal("inactive", wdState) + + // enable watchdog with 120s timeout + suite.PatchMachineConfig(nodeCtx, cfgDocument) + + _, err = suite.Client.COSI.WatchFor(nodeCtx, runtimeres.NewWatchdogTimerStatus(runtimeres.WatchdogTimerConfigID).Metadata(), state.WithEventTypes(state.Created, state.Updated)) + suite.Require().NoError(err) + + wdState = suite.readWatchdogSysfs(nodeCtx, watchdog, "state") + suite.Require().Equal("active", wdState) + + wdTimeout := suite.readWatchdogSysfs(nodeCtx, watchdog, "timeout") + suite.Require().Equal("120", wdTimeout) + + // update watchdog timeout to 60s + cfgDocument.WatchdogTimeout = 60 * time.Second + suite.PatchMachineConfig(nodeCtx, cfgDocument) + + _, err = suite.Client.COSI.WatchFor(nodeCtx, runtimeres.NewWatchdogTimerStatus(runtimeres.WatchdogTimerConfigID).Metadata(), + state.WithEventTypes(state.Created, state.Updated), + state.WithCondition(func(r resource.Resource) (bool, error) { + return r.(*runtimeres.WatchdogTimerStatus).TypedSpec().Timeout == cfgDocument.WatchdogTimeout, nil + }), + ) + suite.Require().NoError(err) + + wdState = suite.readWatchdogSysfs(nodeCtx, watchdog, "state") + suite.Require().Equal("active", wdState) + + wdTimeout = suite.readWatchdogSysfs(nodeCtx, watchdog, "timeout") + suite.Require().Equal("60", wdTimeout) + + // deactivate the watchdog + suite.RemoveMachineConfigDocuments(nodeCtx, cfgDocument.MetaKind) + + _, err = suite.Client.COSI.WatchFor(nodeCtx, runtimeres.NewWatchdogTimerStatus(runtimeres.WatchdogTimerConfigID).Metadata(), state.WithEventTypes(state.Destroyed)) + suite.Require().NoError(err) + + wdState = suite.readWatchdogSysfs(nodeCtx, watchdog, "state") + suite.Require().Equal("inactive", wdState) +} + +func init() { + allSuites = append(allSuites, new(WatchdogSuite)) +} diff --git a/internal/integration/base/api.go b/internal/integration/base/api.go index 89413d5ed6..284834416b 100644 --- a/internal/integration/base/api.go +++ b/internal/integration/base/api.go @@ -15,15 +15,18 @@ import ( "io" "math/rand/v2" "path/filepath" + "slices" "strings" "time" "github.com/cosi-project/runtime/pkg/safe" "github.com/cosi-project/runtime/pkg/state" + "github.com/siderolabs/gen/xslices" "github.com/siderolabs/go-retry/retry" "github.com/stretchr/testify/suite" "google.golang.org/grpc/backoff" "google.golang.org/grpc/codes" + "gopkg.in/yaml.v3" "github.com/siderolabs/talos/cmd/talosctl/pkg/talos/helpers" "github.com/siderolabs/talos/internal/app/machined/pkg/runtime" @@ -33,6 +36,9 @@ import ( "github.com/siderolabs/talos/pkg/machinery/client" clientconfig "github.com/siderolabs/talos/pkg/machinery/client/config" "github.com/siderolabs/talos/pkg/machinery/config" + configconfig "github.com/siderolabs/talos/pkg/machinery/config/config" + "github.com/siderolabs/talos/pkg/machinery/config/configpatcher" + "github.com/siderolabs/talos/pkg/machinery/config/container" "github.com/siderolabs/talos/pkg/machinery/config/machine" "github.com/siderolabs/talos/pkg/machinery/config/types/v1alpha1" "github.com/siderolabs/talos/pkg/machinery/constants" @@ -575,6 +581,59 @@ func (apiSuite *APISuite) AssertExpectedModules(ctx context.Context, node string } } +// UpdateMachineConfig fetches machine configuration, patches it and applies the changes. +func (apiSuite *APISuite) UpdateMachineConfig(nodeCtx context.Context, patch func(config.Provider) (config.Provider, error)) { + cfg, err := apiSuite.ReadConfigFromNode(nodeCtx) + apiSuite.Require().NoError(err) + + patchedCfg, err := patch(cfg) + apiSuite.Require().NoError(err) + + bytes, err := patchedCfg.Bytes() + apiSuite.Require().NoError(err) + + resp, err := apiSuite.Client.ApplyConfiguration(nodeCtx, &machineapi.ApplyConfigurationRequest{ + Data: bytes, + Mode: machineapi.ApplyConfigurationRequest_AUTO, + }) + apiSuite.Require().NoError(err) + + apiSuite.T().Logf("patched machine config: %s", resp.Messages[0].ModeDetails) +} + +// PatchMachineConfig patches machine configuration on the node. +func (apiSuite *APISuite) PatchMachineConfig(nodeCtx context.Context, patches ...any) { + configPatches := make([]configpatcher.Patch, 0, len(patches)) + + for _, patch := range patches { + marshaled, err := yaml.Marshal(patch) + apiSuite.Require().NoError(err) + + configPatch, err := configpatcher.LoadPatch(marshaled) + apiSuite.Require().NoError(err) + + configPatches = append(configPatches, configPatch) + } + + apiSuite.UpdateMachineConfig(nodeCtx, func(cfg config.Provider) (config.Provider, error) { + out, err := configpatcher.Apply(configpatcher.WithConfig(cfg), configPatches) + if err != nil { + return nil, err + } + + return out.Config() + }) +} + +// RemoveMachineConfigDocuments removes machine configuration documents of specified type from the node. +func (apiSuite *APISuite) RemoveMachineConfigDocuments(nodeCtx context.Context, docTypes ...string) { + apiSuite.UpdateMachineConfig(nodeCtx, func(cfg config.Provider) (config.Provider, error) { + return container.New(xslices.Filter(cfg.Documents(), func(doc configconfig.Document) bool { + return slices.Index(docTypes, doc.Kind()) == -1 + })...) + }) +} + // PatchV1Alpha1Config patches v1alpha1 config in the config provider. func (apiSuite *APISuite) PatchV1Alpha1Config(provider config.Provider, patch func(*v1alpha1.Config)) []byte { ctr, err := provider.PatchV1Alpha1(func(c *v1alpha1.Config) error {