Skip to content

Commit

Permalink
fix: wait for devices to be discovered before probing filesystems
Browse files Browse the repository at this point in the history
With Talos 1.7+, more storage drivers are split as modules, so the
devices might not be discovered by the time platform config is going to
be loaded. Explicitly wait for udevd to settle down before trying to
probe a CD.

Fixes #8625

Signed-off-by: Andrey Smirnov <andrey.smirnov@siderolabs.com>
  • Loading branch information
smira committed Apr 23, 2024
1 parent 0821b9c commit c5b59df
Show file tree
Hide file tree
Showing 10 changed files with 95 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (

"github.com/siderolabs/talos/pkg/machinery/constants"
"github.com/siderolabs/talos/pkg/machinery/resources/network"
"github.com/siderolabs/talos/pkg/machinery/resources/runtime"
)

// Wait for the network to be ready to interact with platform metadata services.
Expand All @@ -39,11 +40,21 @@ func WaitInterfaces(ctx context.Context, r state.State) error {
return fmt.Errorf("error listing host interfaces: %w", err)
}

if hostInterfaces.Len() != 0 {
numPhysical := 0

for iter := hostInterfaces.Iterator(); iter.Next(); {
iface := iter.Value()

if iface.TypedSpec().Physical() {
numPhysical++
}
}

if numPhysical > 0 {
return nil
}

log.Printf("waiting for network interface appearse...")
log.Printf("waiting for physical network interfaces to appear...")

interval := backoff.NextBackOff()

Expand All @@ -57,6 +68,13 @@ func WaitInterfaces(ctx context.Context, r state.State) error {
return nil
}

// WaitForDevicesReady waits for devices to be ready.
func WaitForDevicesReady(ctx context.Context, r state.State) error {
log.Printf("waiting for devices to be ready...")

return runtime.NewDevicesStatusCondition(r).Wait(ctx)
}

// RetryFetch retries fetching from metadata service.
func RetryFetch(ctx context.Context, f func(ctx context.Context) (string, error)) (string, error) {
var (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ func (m *Metal) Configuration(ctx context.Context, r state.State) ([]byte, error

switch *option {
case constants.MetalConfigISOLabel:
return readConfigFromISO()
return readConfigFromISO(ctx, r)
default:
if err := netutils.Wait(ctx, r); err != nil {
return nil, err
Expand Down Expand Up @@ -119,7 +119,11 @@ func (m *Metal) Mode() runtime.Mode {
return runtime.ModeMetal
}

func readConfigFromISO() ([]byte, error) {
func readConfigFromISO(ctx context.Context, r state.State) ([]byte, error) {
if err := netutils.WaitForDevicesReady(ctx, r); err != nil {
return nil, fmt.Errorf("failed to wait for devices: %w", err)
}

dev, err := probe.GetDevWithFileSystemLabel(constants.MetalConfigISOLabel)
if err != nil {
return nil, fmt.Errorf("failed to find %s iso: %w", constants.MetalConfigISOLabel, err)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,12 @@ func (n *Nocloud) configFromNetwork(ctx context.Context, metaBaseURL string, r s
return metaConfig, networkConfig, machineConfig, err
}

func (n *Nocloud) configFromCD() (metaConfig []byte, networkConfig []byte, machineConfig []byte, err error) {
//nolint:gocyclo
func (n *Nocloud) configFromCD(ctx context.Context, r state.State) (metaConfig []byte, networkConfig []byte, machineConfig []byte, err error) {
if err := netutils.WaitForDevicesReady(ctx, r); err != nil {
return nil, nil, nil, fmt.Errorf("failed to wait for devices: %w", err)
}

var dev *probe.ProbedBlockDevice

dev, err = probe.GetDevWithFileSystemLabel(strings.ToLower(configISOLabel))
Expand Down Expand Up @@ -244,7 +249,7 @@ func (n *Nocloud) acquireConfig(ctx context.Context, r state.State) (metadataCon
if networkSource && metaBaseURL != "" {
metadataConfigDl, metadataNetworkConfigDl, machineConfigDl, err = n.configFromNetwork(ctx, metaBaseURL, r)
} else {
metadataConfigDl, metadataNetworkConfigDl, machineConfigDl, err = n.configFromCD()
metadataConfigDl, metadataNetworkConfigDl, machineConfigDl, err = n.configFromCD(ctx, r)
}

metadata = &MetadataConfig{}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,14 +58,20 @@ func TestParseMetadata(t *testing.T) {

eth0 := network.NewLinkStatus(network.NamespaceName, "eth0")
eth0.TypedSpec().PermanentAddr = nethelpers.HardwareAddr{0x68, 0x05, 0xca, 0xb8, 0xf1, 0xf7}
eth0.TypedSpec().Type = nethelpers.LinkEther
eth0.TypedSpec().Kind = ""
require.NoError(t, st.Create(context.TODO(), eth0))

eth1 := network.NewLinkStatus(network.NamespaceName, "eth1")
eth1.TypedSpec().PermanentAddr = nethelpers.HardwareAddr{0x68, 0x05, 0xca, 0xb8, 0xf1, 0xf8}
eth1.TypedSpec().Type = nethelpers.LinkEther
eth1.TypedSpec().Kind = ""
require.NoError(t, st.Create(context.TODO(), eth1))

eth2 := network.NewLinkStatus(network.NamespaceName, "eth2")
eth2.TypedSpec().PermanentAddr = nethelpers.HardwareAddr{0x68, 0x05, 0xca, 0xb8, 0xf1, 0xf9}
eth2.TypedSpec().Type = nethelpers.LinkEther
eth2.TypedSpec().Kind = ""
require.NoError(t, st.Create(context.TODO(), eth2))

var m nocloud.NetworkConfig
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,20 @@
package opennebula

import (
"context"
"fmt"
"log"
"os"
"path/filepath"
"strings"

"github.com/cosi-project/runtime/pkg/state"
"github.com/siderolabs/go-blockdevice/blockdevice/filesystem"
"github.com/siderolabs/go-blockdevice/blockdevice/probe"
"golang.org/x/sys/unix"

"github.com/siderolabs/talos/internal/app/machined/pkg/runtime/v1alpha1/platform/errors"
"github.com/siderolabs/talos/internal/app/machined/pkg/runtime/v1alpha1/platform/internal/netutils"
)

const (
Expand All @@ -25,7 +28,11 @@ const (
mnt = "/mnt"
)

func (o *OpenNebula) contextFromCD() (oneContext []byte, err error) {
func (o *OpenNebula) contextFromCD(ctx context.Context, r state.State) (oneContext []byte, err error) {
if err := netutils.WaitForDevicesReady(ctx, r); err != nil {
return nil, fmt.Errorf("failed to wait for devices: %w", err)
}

var dev *probe.ProbedBlockDevice

dev, err = probe.GetDevWithFileSystemLabel(strings.ToLower(configISOLabel))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ func (o *OpenNebula) ParseMetadata(st state.State, oneContextPlain []byte) (*run

// Configuration implements the runtime.Platform interface.
func (o *OpenNebula) Configuration(ctx context.Context, r state.State) (machineConfig []byte, err error) {
oneContextPlain, err := o.contextFromCD()
oneContextPlain, err := o.contextFromCD(ctx, r)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -234,7 +234,7 @@ func (o *OpenNebula) KernelArgs(string) procfs.Parameters {

// NetworkConfiguration implements the runtime.Platform interface.
func (o *OpenNebula) NetworkConfiguration(ctx context.Context, st state.State, ch chan<- *runtime.PlatformNetworkConfig) error {
oneContext, err := o.contextFromCD()
oneContext, err := o.contextFromCD(ctx, st)
if stderrors.Is(err, errors.ErrNoConfigSource) {
err = nil
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
// go test -v ./internal/app/machined/pkg/runtime/v1alpha1/platform/opennebula

package opennebula_test

import (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,13 @@ import (
"os"
"path/filepath"

"github.com/cosi-project/runtime/pkg/state"
"github.com/siderolabs/go-blockdevice/blockdevice/filesystem"
"github.com/siderolabs/go-blockdevice/blockdevice/probe"
"golang.org/x/sys/unix"

"github.com/siderolabs/talos/internal/app/machined/pkg/runtime/v1alpha1/platform/errors"
"github.com/siderolabs/talos/internal/app/machined/pkg/runtime/v1alpha1/platform/internal/netutils"
"github.com/siderolabs/talos/pkg/download"
)

Expand Down Expand Up @@ -108,7 +110,12 @@ func (o *Openstack) configFromNetwork(ctx context.Context) (metaConfig []byte, n
return metaConfig, networkConfig, machineConfig, err
}

func (o *Openstack) configFromCD() (metaConfig []byte, networkConfig []byte, machineConfig []byte, err error) {
//nolint:gocyclo
func (o *Openstack) configFromCD(ctx context.Context, r state.State) (metaConfig []byte, networkConfig []byte, machineConfig []byte, err error) {
if err := netutils.WaitForDevicesReady(ctx, r); err != nil {
return nil, nil, nil, fmt.Errorf("failed to wait for devices: %w", err)
}

var dev *probe.ProbedBlockDevice

dev, err = probe.GetDevWithFileSystemLabel(configISOLabel)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,7 @@ func (o *Openstack) ParseMetadata(

// Configuration implements the runtime.Platform interface.
func (o *Openstack) Configuration(ctx context.Context, r state.State) (machineConfig []byte, err error) {
_, _, machineConfig, err = o.configFromCD()
_, _, machineConfig, err = o.configFromCD(ctx, r)
if err != nil {
if err = netutils.Wait(ctx, r); err != nil {
return nil, err
Expand Down Expand Up @@ -389,7 +389,7 @@ func (o *Openstack) KernelArgs(string) procfs.Parameters {
func (o *Openstack) NetworkConfiguration(ctx context.Context, st state.State, ch chan<- *runtime.PlatformNetworkConfig) error {
networkSource := false

metadataConfigDl, metadataNetworkConfigDl, _, err := o.configFromCD()
metadataConfigDl, metadataNetworkConfigDl, _, err := o.configFromCD(ctx, st)
if err != nil {
metadataConfigDl, metadataNetworkConfigDl, _, err = o.configFromNetwork(ctx)
if stderrors.Is(err, errors.ErrNoConfigSource) {
Expand Down
36 changes: 35 additions & 1 deletion pkg/machinery/resources/runtime/condition.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ type ExtensionServiceConfigStatusCondition struct {
serviceName string
}

// NewExtensionServiceConfigStatusCondition builds a coondition which waits for extension service config to be available.
// NewExtensionServiceConfigStatusCondition builds a condition which waits for extension service config to be available.
func NewExtensionServiceConfigStatusCondition(state state.State, serviceName string) *ExtensionServiceConfigStatusCondition {
return &ExtensionServiceConfigStatusCondition{
state: state,
Expand All @@ -85,3 +85,37 @@ func (condition *ExtensionServiceConfigStatusCondition) Wait(ctx context.Context

return err
}

// DevicesStatusCondition implements condition which waits for devices to be ready.
type DevicesStatusCondition struct {
state state.State
}

// NewDevicesStatusCondition builds a condition which waits for devices to be ready.
func NewDevicesStatusCondition(state state.State) *DevicesStatusCondition {
return &DevicesStatusCondition{
state: state,
}
}

func (condition *DevicesStatusCondition) String() string {
return "devices to be ready"
}

// Wait implements condition interface.
func (condition *DevicesStatusCondition) Wait(ctx context.Context) error {
_, err := condition.state.WatchFor(
ctx,
resource.NewMetadata(NamespaceName, DevicesStatusType, DevicesID, resource.VersionUndefined),
state.WithEventTypes(state.Created, state.Updated),
state.WithCondition(func(r resource.Resource) (bool, error) {
if resource.IsTombstone(r) {
return false, nil
}

return r.(*DevicesStatus).TypedSpec().Ready, nil
}),
)

return err
}

0 comments on commit c5b59df

Please sign in to comment.