Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Check zvols existence in zfsmanager #2712

Merged
merged 1 commit into from
Jul 20, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 15 additions & 4 deletions pkg/dom0-ztools/rootfs/bin/mdev-zvol.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,26 @@ if [ "$ACTION" = "add" ]; then
echo "failed to run /lib/udev/zvol_id /dev/$MDEV code: $retVal output: $datasetName"
exit $retVal
fi
if [ -f "/run/mdev/zvol/$MDEV" ]; then
oldDatasetName=$(cat "/run/mdev/zvol/$MDEV")
if [ "$oldDatasetName" != "$datasetName" ]; then
# clean old link if not match
rm "/dev/zvol/$oldDatasetName"
fi
fi
#temp directory to map $MDEV->datasetName
mkdir -p "/run/mdev/zvol"
echo "$datasetName" >"/run/mdev/zvol/$MDEV"
mkdir -p "$(dirname "/dev/zvol/$datasetName")"
ln -s "/dev/$MDEV" "/dev/zvol/$datasetName"
ln -sf "/dev/$MDEV" "/dev/zvol/$datasetName"
fi

if [ "$ACTION" = "remove" ]; then
datasetName=$(cat "/run/mdev/zvol/$MDEV")
rm "/run/mdev/zvol/$MDEV"
rm "/dev/zvol/$datasetName"
if [ -f "/run/mdev/zvol/$MDEV" ]; then
datasetName=$(cat "/run/mdev/zvol/$MDEV")
rm "/run/mdev/zvol/$MDEV"
rm "/dev/zvol/$datasetName"
else
echo "failed to remove /dev/$MDEV: no temp file"
fi
fi
103 changes: 66 additions & 37 deletions pkg/pillar/cmd/zfsmanager/zfsmanager.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (

"github.com/fsnotify/fsnotify"
"github.com/lf-edge/eve/pkg/pillar/base"
"github.com/lf-edge/eve/pkg/pillar/flextimer"
"github.com/lf-edge/eve/pkg/pillar/pidfile"
"github.com/lf-edge/eve/pkg/pillar/pubsub"
"github.com/lf-edge/eve/pkg/pillar/types"
Expand All @@ -27,6 +28,7 @@ const (
stillRunningInterval = 25 * time.Second

disksProcessingInterval = 60 * time.Second
zvolsProcessingInterval = 5 * time.Second
)

var (
Expand All @@ -38,7 +40,6 @@ var (

type zVolDeviceEvent struct {
delete bool
device string
dataset string
}

Expand All @@ -48,6 +49,7 @@ type zfsContext struct {
storageStatusPub pubsub.Publication
subDisksConfig pubsub.Subscription
disksProcessingTrigger chan interface{}
zVolDeviceEvents *base.LockedStringMap // stores device->zVolDeviceEvent mapping to check and publish
}

// Run - an zfs run
Expand All @@ -72,19 +74,18 @@ func Run(ps *pubsub.PubSub, loggerArg *logrus.Logger, logArg *base.LogObject) in
stillRunning := time.NewTicker(stillRunningInterval)
ps.StillRunning(agentName, warningTime, errorTime)

ctxPtr := zfsContext{ps: ps, disksProcessingTrigger: make(chan interface{}, 1)}
ctxPtr := zfsContext{ps: ps, disksProcessingTrigger: make(chan interface{}, 1), zVolDeviceEvents: base.NewLockedStringMap()}

if err := utils.WaitForVault(ps, log, agentName, warningTime, errorTime); err != nil {
log.Fatal(err)
}
log.Functionf("processed Vault Status")

// Publish cloud metrics
// Publish ZVolStatus for zvol devices
zVolStatusPub, err := ps.NewPublication(
pubsub.PublicationOptions{
AgentName: agentName,
TopicType: types.ZVolStatus{},
Persistent: true,
AgentName: agentName,
TopicType: types.ZVolStatus{},
})
if err != nil {
log.Fatal(err)
Expand Down Expand Up @@ -121,22 +122,25 @@ func Run(ps *pubsub.PubSub, loggerArg *logrus.Logger, logArg *base.LogObject) in
}
ctxPtr.storageStatusPub = storageStatusPub

deviceNotifyChannel := make(chan *zVolDeviceEvent)

if err := os.MkdirAll(types.ZVolDevicePrefix, os.ModeDir); err != nil {
log.Fatal(err)
}

go processDisksTask(&ctxPtr)

go deviceWatcher(deviceNotifyChannel)
go deviceWatcher(&ctxPtr)

go storageStatusPublisher(&ctxPtr)

max := float64(zvolsProcessingInterval)
min := max * 0.3
devicesProcessTicker := flextimer.NewRangeTicker(time.Duration(min),
time.Duration(max))

for {
select {
case event := <-deviceNotifyChannel:
processEvent(&ctxPtr, event)
case <-devicesProcessTicker.C:
processZVolDeviceEvents(&ctxPtr)
case change := <-subDisksConfig.MsgChan():
subDisksConfig.ProcessChange(change)
case <-stillRunning.C:
Expand All @@ -145,24 +149,55 @@ func Run(ps *pubsub.PubSub, loggerArg *logrus.Logger, logArg *base.LogObject) in
}
}

func processEvent(ctxPtr *zfsContext, event *zVolDeviceEvent) {
if event == nil {
return
// processZVolDeviceEvents iterates over saved zVolDeviceEvent, check for device existence and publish ZVolStatus
func processZVolDeviceEvents(ctxPtr *zfsContext) {
var processedKeys []string
checker := func(key string, val interface{}) bool {
event, ok := val.(zVolDeviceEvent)
if !ok {
log.Fatalf("unexpected type for key: %s", key)
}
zvolStatus := types.ZVolStatus{
Device: key,
Dataset: event.dataset,
}
if event.delete {
if el, _ := ctxPtr.zVolStatusPub.Get(zvolStatus.Key()); el == nil {
processedKeys = append(processedKeys, key)
return true
}
if err := ctxPtr.zVolStatusPub.Unpublish(zvolStatus.Key()); err != nil {
log.Errorf("cannot unpublish device: %s", err)
return true
}
processedKeys = append(processedKeys, key)
return true
}
l, err := filepath.EvalSymlinks(key)
if err != nil {
log.Warnf("failed to EvalSymlinks: %s", err)
return true
}
_, err = os.Stat(l)
if err != nil {
log.Warnf("failed to Stat device: %s", err)
return true
}

if err := ctxPtr.zVolStatusPub.Publish(zvolStatus.Key(), zvolStatus); err != nil {
log.Errorf("cannot publish device: %s", err)
return true
}
processedKeys = append(processedKeys, key)
return true
}
log.Functionf("processEvent: %+v", event)
if event.delete {
ctxPtr.zVolStatusPub.Unpublish(event.device)
return
ctxPtr.zVolDeviceEvents.Range(checker)
for _, key := range processedKeys {
ctxPtr.zVolDeviceEvents.Delete(key)
}
ctxPtr.zVolStatusPub.Publish(event.device,
types.ZVolStatus{
Device: event.device,
Dataset: event.dataset,
},
)
}

func deviceWatcher(notifyChannel chan *zVolDeviceEvent) {
func deviceWatcher(ctxPtr *zfsContext) {
w, err := fsnotify.NewWatcher()
if err != nil {
log.Fatalf("NewWatcher: %s", err)
Expand All @@ -185,11 +220,9 @@ func deviceWatcher(notifyChannel chan *zVolDeviceEvent) {
log.Errorf("cannot determine dataset for device: %s", walkPath)
return nil
}
notifyChannel <- &zVolDeviceEvent{
delete: false,
device: walkPath,
ctxPtr.zVolDeviceEvents.Store(walkPath, zVolDeviceEvent{
dataset: dataset,
}
})
}
}
return nil
Expand Down Expand Up @@ -218,17 +251,13 @@ func deviceWatcher(notifyChannel chan *zVolDeviceEvent) {
log.Errorf("cannot determine dataset for device: %s", fileName)
continue
}
notifyChannel <- &zVolDeviceEvent{
delete: false,
device: fileName,
ctxPtr.zVolDeviceEvents.Store(fileName, zVolDeviceEvent{
dataset: dataset,
}
})
} else if event.Op&fsnotify.Remove != 0 {
_ = w.Remove(fileName)
notifyChannel <- &zVolDeviceEvent{
ctxPtr.zVolDeviceEvents.Store(fileName, zVolDeviceEvent{
delete: true,
device: fileName,
}
})
}
}
}
4 changes: 2 additions & 2 deletions pkg/pillar/types/zfs.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,9 @@ type ZVolStatus struct {
Device string
}

// Key is volume UUID which will be unique
// Key is Dataset with '/' replaced by '_'
func (status ZVolStatus) Key() string {
return status.Device
return strings.ReplaceAll(status.Dataset, "/", "_")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How did the old code work at all? Did it fail to write the checkpoint files inside pubsub but continued anyhow?

}

// StorageRaidType indicates storage raid type
Expand Down
8 changes: 8 additions & 0 deletions pkg/storage-init/storage-init.sh
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,11 @@ zfs_module_unload() {
rmmod $(lsmod | grep zfs | awk '{print $1;}') || :
}

# set sequential mdev handler to avoid add-remove-add mis-order of zvols
set_sequential_mdev() {
echo >/dev/mdev.seq
}

PERSISTDIR=/persist
CONFIGDIR=/config
SMART_DETAILS_FILE=$PERSISTDIR/SMART_details.json
Expand Down Expand Up @@ -223,6 +228,7 @@ if P3=$(findfs PARTLABEL=P3) && [ -n "$P3" ]; then
echo "$(date -Ins -u) Using $P3 (formatted with $P3_FS_TYPE), for $PERSISTDIR"

if [ "$P3_FS_TYPE" = zfs ]; then
set_sequential_mdev
if ! chroot /hostfs zpool import -f persist; then
echo "$(date -Ins -u) Cannot import persist pool on P3 partition $P3 of type $P3_FS_TYPE, recreating it as $P3_FS_TYPE_DEFAULT"
INIT_FS=1
Expand Down Expand Up @@ -259,6 +265,7 @@ if P3=$(findfs PARTLABEL=P3) && [ -n "$P3" ]; then
chroot /hostfs zfs set mountpoint="$PERSISTDIR" persist && \
chroot /hostfs zfs set primarycache=metadata persist && \
chroot /hostfs zfs create -p -o mountpoint="$PERSISTDIR/containerd/io.containerd.snapshotter.v1.zfs" persist/snapshots
set_sequential_mdev
fi
;;
esac || echo "$(date -Ins -u) mount of $P3 as $P3_FS_TYPE failed"
Expand All @@ -271,6 +278,7 @@ if P3=$(findfs PARTLABEL=P3) && [ -n "$P3" ]; then
else
#in case of no P3 we may have EVE persist on another disks
zfs_module_load
set_sequential_mdev
if chroot /hostfs zpool import -f persist; then
echo "zfs" > /run/eve.persist_type
else
Expand Down