diff --git a/cmd/climc/shell/compute/servers.go b/cmd/climc/shell/compute/servers.go index 39081390a56..ae3e01be958 100644 --- a/cmd/climc/shell/compute/servers.go +++ b/cmd/climc/shell/compute/servers.go @@ -951,4 +951,46 @@ func init() { } return nil }) + + // ServerStartRescueOptions is used to start a rescue os. + type ServerStartRescueOptions struct { + ID string `help:"ID of server" json:"-"` + QemuVersion string `help:"prefer qemu version" json:"qemu_version"` + } + R(&ServerStartRescueOptions{}, "server-start-rescue ", "Start rescu e a guest server", func(s *mcclient.ClientSession, opts *ServerStartRescueOptions) error { + params, err := baseoptions.StructToParams(opts) + if err != nil { + return err + } + + result, err := modules.Servers.PerformAction(s, opts.ID, "start-rescue", params) + if err != nil { + return err + } + + printObject(result) + + return nil + }) + + // ServerStopRescueOptions is used to stop a rescue os. + type ServerStopRescueOptions struct { + ID string `help:"ID of server" json:"-"` + QemuVersion string `help:"prefer qemu version" json:"qemu_version"` + } + R(&ServerStopRescueOptions{}, "server-stop-rescue", "Stop rescue a guest server", func(s *mcclient.ClientSession, opts *ServerStopRescueOptions) error { + params, err := baseoptions.StructToParams(opts) + if err != nil { + return err + } + + result, err := modules.Servers.PerformAction(s, opts.ID, "stop-rescue", params) + if err != nil { + return err + } + + printObject(result) + + return nil + }) } diff --git a/pkg/apis/compute/guest_const.go b/pkg/apis/compute/guest_const.go index 21a49e1afad..90d0ee650a0 100644 --- a/pkg/apis/compute/guest_const.go +++ b/pkg/apis/compute/guest_const.go @@ -46,6 +46,12 @@ const ( VM_DETACH_DISK = "detach_disk" VM_UNSYNC = "unsync" + VM_START_RESCUE = "start_rescue" + VM_RESCUING = "rescuing" + VM_STOP_RESCUE = "stop_rescue" + VM_START_RESCUE_FAILED = "start_rescue_failed" + VM_STOP_RESCUE_FAILED = "stop_rescue_failed" + VM_BACKUP_STARTING = "backup_starting" VM_BACKUP_STOPING = "backup_stopping" VM_BACKUP_CREATING = "backup_creating" diff --git a/pkg/apis/compute/guest_rescue.go b/pkg/apis/compute/guest_rescue.go new file mode 100644 index 00000000000..3fe3d87752a --- /dev/null +++ b/pkg/apis/compute/guest_rescue.go @@ -0,0 +1,28 @@ +// Copyright 2019 Yunion +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package compute + +// Rescue constants are used for rescue mode +const ( + GUEST_RESCUE_RELATIVE_PATH = "rescue" // serverxxx/rescue + + GUEST_RESCUE_INITRAMFS = "initramfs" + GUEST_RESCUE_KERNEL = "kernel" + GUEST_RESCUE_INITRAMFS_ARM64 = "initramfs_aarch64" + GUEST_RESCUE_KERNEL_ARM64 = "kernel_aarch64" + + GUEST_RESCUE_SYS_DISK_NAME = "sys_img" + GUEST_RESCUE_SYS_DISK_SIZE = 500 // MB +) diff --git a/pkg/apis/compute/guests.go b/pkg/apis/compute/guests.go index 0ef7f28c11d..f0c1758884e 100644 --- a/pkg/apis/compute/guests.go +++ b/pkg/apis/compute/guests.go @@ -863,6 +863,8 @@ type GuestJsonDesc struct { EncryptKeyId string `json:"encrypt_key_id,omitempty"` IsDaemon bool `json:"is_daemon"` + + RescueMode bool `json:"rescue_mode"` } type ServerSetBootIndexInput struct { diff --git a/pkg/cloudcommon/db/opslog_const.go b/pkg/cloudcommon/db/opslog_const.go index e2dd67608bd..e996083e26d 100644 --- a/pkg/cloudcommon/db/opslog_const.go +++ b/pkg/cloudcommon/db/opslog_const.go @@ -322,4 +322,9 @@ const ( ACT_BIND = "bind" ACT_UNBIND = "unbind" + + ACT_START_RESCUE = "start_rescue" + ACT_STOP_RESCUE = "stop_rescue" + ACT_START_RESCUE_FAILED = "start_rescue_failed" + ACT_STOP_RESCUE_FAILED = "stop_rescue_failed" ) diff --git a/pkg/compute/guestdrivers/base.go b/pkg/compute/guestdrivers/base.go index 437231d773b..0544ddc3a32 100644 --- a/pkg/compute/guestdrivers/base.go +++ b/pkg/compute/guestdrivers/base.go @@ -544,3 +544,11 @@ func (drv *SBaseGuestDriver) RequestSetNicTrafficLimit(ctx context.Context, task func (drv *SBaseGuestDriver) SyncOsInfo(ctx context.Context, userCred mcclient.TokenCredential, g *models.SGuest, extVM cloudprovider.IOSInfo) error { return nil } + +func (self *SBaseGuestDriver) RequestStartRescue(ctx context.Context, task taskman.ITask, body jsonutils.JSONObject, host *models.SHost, guest *models.SGuest) error { + return httperrors.ErrNotImplemented +} + +func (self *SBaseGuestDriver) RequestStopRescue(ctx context.Context, task taskman.ITask, body jsonutils.JSONObject, host *models.SHost, guest *models.SGuest) error { + return httperrors.ErrNotImplemented +} diff --git a/pkg/compute/guestdrivers/kvm.go b/pkg/compute/guestdrivers/kvm.go index c219eaf976b..9ca3c73a7a9 100644 --- a/pkg/compute/guestdrivers/kvm.go +++ b/pkg/compute/guestdrivers/kvm.go @@ -1173,3 +1173,27 @@ func (self *SKVMGuestDriver) RequestSetNicTrafficLimit(ctx context.Context, task } return nil } + +func (self *SKVMGuestDriver) RequestStartRescue(ctx context.Context, task taskman.ITask, body jsonutils.JSONObject, host *models.SHost, guest *models.SGuest) error { + header := self.getTaskRequestHeader(task) + client := httputils.GetDefaultClient() + url := fmt.Sprintf("%s/servers/%s/start-rescue", host.ManagerUri, guest.Id) + _, _, err := httputils.JSONRequest(client, ctx, "POST", url, header, body, false) + if err != nil { + return err + } + + return nil +} + +func (self *SKVMGuestDriver) RequestStopRescue(ctx context.Context, task taskman.ITask, body jsonutils.JSONObject, host *models.SHost, guest *models.SGuest) error { + header := self.getTaskRequestHeader(task) + client := httputils.GetDefaultClient() + url := fmt.Sprintf("%s/servers/%s/stop-rescue", host.ManagerUri, guest.Id) + _, _, err := httputils.JSONRequest(client, ctx, "POST", url, header, body, false) + if err != nil { + return err + } + + return nil +} diff --git a/pkg/compute/models/guest_rescue.go b/pkg/compute/models/guest_rescue.go new file mode 100644 index 00000000000..743dd3cd330 --- /dev/null +++ b/pkg/compute/models/guest_rescue.go @@ -0,0 +1,148 @@ +// Copyright 2019 Yunion +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package models + +import ( + "context" + + "yunion.io/x/jsonutils" + "yunion.io/x/pkg/errors" + "yunion.io/x/pkg/utils" + + api "yunion.io/x/onecloud/pkg/apis/compute" + "yunion.io/x/onecloud/pkg/cloudcommon/db" + "yunion.io/x/onecloud/pkg/cloudcommon/db/taskman" + "yunion.io/x/onecloud/pkg/httperrors" + "yunion.io/x/onecloud/pkg/mcclient" +) + +func (self *SGuest) PerformStartRescue(ctx context.Context, userCred mcclient.TokenCredential, query jsonutils.JSONObject, + data jsonutils.JSONObject) (jsonutils.JSONObject, error) { + if !utils.IsInStringArray(self.Status, []string{api.VM_READY, api.VM_RUNNING}) { + return nil, httperrors.NewInvalidStatusError("guest status must be ready or running") + } + + // Check vmem size, need to be greater than 2G + if self.VmemSize < 2048 { + return nil, httperrors.NewInvalidStatusError("vmem size must be greater than 2G") + } + + // Reset index + disks, err := self.GetGuestDisks() + if err != nil || len(disks) < 1 { + return nil, httperrors.NewInvalidStatusError("guest.GetGuestDisks: %s", err.Error()) + } + for i := 0; i < len(disks); i++ { + if disks[i].BootIndex >= 0 { + // Move to next index, and easy to rollback + err = disks[i].SetBootIndex(disks[i].BootIndex + 1) + if err != nil { + return nil, httperrors.NewInvalidStatusError("guest.SetBootIndex: %s", err.Error()) + } + } + } + + // Get baremetal agent + host, err := self.GetHost() + if err != nil { + return nil, httperrors.NewInvalidStatusError("guest.GetHost: %s", err.Error()) + } + bmAgent := BaremetalagentManager.GetAgent(api.AgentTypeBaremetal, host.ZoneId) + if bmAgent == nil { + return nil, httperrors.NewInvalidStatusError("BaremetalagentManager.GetAgent: %s", "Baremetal agent not found") + } + + // Set available baremetal agent managerURi to data + dataDict := data.(*jsonutils.JSONDict) + dataDict.Add(jsonutils.NewString(bmAgent.ManagerUri), "manager_uri") + + // Start rescue vm task + err = self.StartRescueTask(ctx, userCred, dataDict, "") + if err != nil { + return nil, httperrors.NewInvalidStatusError("guest.StartGuestRescueTask: %s", err.Error()) + } + + // Now it only support kvm guest os rescue + return nil, nil +} + +func (self *SGuest) PerformStopRescue(ctx context.Context, userCred mcclient.TokenCredential, query jsonutils.JSONObject, + data jsonutils.JSONObject) (jsonutils.JSONObject, error) { + if !self.RescueMode { + return nil, httperrors.NewInvalidStatusError("guest is not in rescue mode") + } + + // Recover index + disks, err := self.GetGuestDisks() + if err != nil || len(disks) < 1 { + return nil, httperrors.NewInvalidStatusError("guest.GetGuestDisks: %s", err.Error()) + } + for i := 0; i < len(disks); i++ { + if disks[i].BootIndex >= 0 { + // Rollback index + err = disks[i].SetBootIndex(disks[i].BootIndex - 1) + if err != nil { + return nil, httperrors.NewInvalidStatusError("guest.SetBootIndex: %s", err.Error()) + } + } + } + + // Start rescue vm task + err = self.StopRescueTask(ctx, userCred, data.(*jsonutils.JSONDict), "") + if err != nil { + return nil, httperrors.NewInvalidStatusError("guest.StopGuestRescueTask: %s", err.Error()) + } + + // Now it only support kvm guest os rescue + return nil, nil +} + +func (self *SGuest) UpdateRescueMode(mode bool) error { + _, err := db.Update(self, func() error { + self.RescueMode = mode + return nil + }) + if err != nil { + return errors.Wrap(err, "Update RescueMode") + } + return nil +} + +func (self *SGuest) StartRescueTask(ctx context.Context, userCred mcclient.TokenCredential, data *jsonutils.JSONDict, parentTaskId string) error { + // Now only support KVM + taskName := "StartRescueTask" + task, err := taskman.TaskManager.NewTask(ctx, taskName, self, userCred, data, parentTaskId, "", nil) + if err != nil { + return err + } + err = task.ScheduleRun(nil) + if err != nil { + return err + } + return nil +} + +func (self *SGuest) StopRescueTask(ctx context.Context, userCred mcclient.TokenCredential, data *jsonutils.JSONDict, parentTaskId string) error { + taskName := "StopRescueTask" + task, err := taskman.TaskManager.NewTask(ctx, taskName, self, userCred, data, parentTaskId, "", nil) + if err != nil { + return err + } + err = task.ScheduleRun(nil) + if err != nil { + return err + } + return nil +} diff --git a/pkg/compute/models/guestdrivers.go b/pkg/compute/models/guestdrivers.go index 463a1e51415..614d683c722 100644 --- a/pkg/compute/models/guestdrivers.go +++ b/pkg/compute/models/guestdrivers.go @@ -244,6 +244,9 @@ type IGuestDriver interface { RequestSetNicTrafficLimit(ctx context.Context, task taskman.ITask, host *SHost, guest *SGuest, input *api.ServerNicTrafficLimit) error SyncOsInfo(ctx context.Context, userCred mcclient.TokenCredential, g *SGuest, extVM cloudprovider.IOSInfo) error + + RequestStartRescue(ctx context.Context, task taskman.ITask, body jsonutils.JSONObject, host *SHost, guest *SGuest) error + RequestStopRescue(ctx context.Context, task taskman.ITask, body jsonutils.JSONObject, host *SHost, guest *SGuest) error } var guestDrivers map[string]IGuestDriver diff --git a/pkg/compute/models/guests.go b/pkg/compute/models/guests.go index 73a46bbcd92..e61aedd9401 100644 --- a/pkg/compute/models/guests.go +++ b/pkg/compute/models/guests.go @@ -184,6 +184,8 @@ type SGuest struct { QgaStatus string `width:"36" charset:"ascii" nullable:"false" default:"unknown" list:"user" create:"optional"` // power_states limit in [on, off, unknown] PowerStates string `width:"36" charset:"ascii" nullable:"false" default:"unknown" list:"user" create:"optional"` + // Used for guest rescue + RescueMode bool `nullable:"false" default:"false" list:"user" create:"optional"` } func (manager *SGuestManager) GetPropertyStatistics(ctx context.Context, userCred mcclient.TokenCredential, query jsonutils.JSONObject) (*apis.StatusStatistic, error) { @@ -4917,6 +4919,8 @@ func (self *SGuest) GetJsonDescAtHypervisor(ctx context.Context, host *SHost) *a EncryptKeyId: self.EncryptKeyId, IsDaemon: self.IsDaemon.Bool(), + + RescueMode: self.RescueMode, } if len(self.BackupHostId) > 0 { diff --git a/pkg/compute/tasks/guest_rescue_task.go b/pkg/compute/tasks/guest_rescue_task.go new file mode 100644 index 00000000000..0c601629ad3 --- /dev/null +++ b/pkg/compute/tasks/guest_rescue_task.go @@ -0,0 +1,220 @@ +// Copyright 2019 Yunion +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tasks + +import ( + "context" + + "yunion.io/x/jsonutils" + + api "yunion.io/x/onecloud/pkg/apis/compute" + "yunion.io/x/onecloud/pkg/cloudcommon/db" + "yunion.io/x/onecloud/pkg/cloudcommon/db/taskman" + "yunion.io/x/onecloud/pkg/compute/models" + "yunion.io/x/onecloud/pkg/util/logclient" +) + +type StartRescueTask struct { + SGuestBaseTask +} + +func init() { + taskman.RegisterTask(StartRescueTask{}) +} + +func (self *StartRescueTask) OnInit(ctx context.Context, obj db.IStandaloneModel, data jsonutils.JSONObject) { + // Flow: stop -> modify startvm script for rescue -> start + guest := obj.(*models.SGuest) + // Check if guest is running + if guest.Status == api.VM_RUNNING { + self.StopServer(ctx, guest) + } else { + self.PrepareRescue(ctx, guest) + } +} + +func (self *StartRescueTask) StopServer(ctx context.Context, guest *models.SGuest) { + db.OpsLog.LogEvent(guest, db.ACT_STOPPING, nil, self.UserCred) + guest.SetStatus(self.UserCred, api.VM_STOPPING, "StopServer") + self.SetStage("OnServerStopComplete", nil) + guest.StartGuestStopTask(ctx, self.UserCred, true, false, self.GetTaskId()) +} + +func (self *StartRescueTask) OnServerStopComplete(ctx context.Context, guest *models.SGuest, data jsonutils.JSONObject) { + db.OpsLog.LogEvent(guest, db.ACT_STOP, guest.GetShortDesc(ctx), self.UserCred) + logclient.AddActionLogWithStartable(self, guest, logclient.ACT_VM_STOP, guest.GetShortDesc(ctx), self.UserCred, true) + + self.PrepareRescue(ctx, guest) +} + +func (self *StartRescueTask) OnServerStopCompleteFailed(ctx context.Context, guest *models.SGuest, err jsonutils.JSONObject) { + guest.SetStatus(self.UserCred, api.VM_STOP_FAILED, err.String()) + db.OpsLog.LogEvent(guest, db.ACT_STOP_FAIL, err, self.UserCred) + logclient.AddActionLogWithStartable(self, guest, logclient.ACT_VM_STOP, err, self.UserCred, false) + self.SetStageFailed(ctx, err) +} + +func (self *StartRescueTask) PrepareRescue(ctx context.Context, guest *models.SGuest) { + db.OpsLog.LogEvent(guest, db.ACT_START_RESCUE, nil, self.UserCred) + guest.SetStatus(self.UserCred, api.VM_START_RESCUE, "PrepareRescue") + self.SetStage("OnRescuePrepareComplete", nil) + + host, _ := guest.GetHost() + err := guest.GetDriver().RequestStartRescue(ctx, self, self.GetParams(), host, guest) + if err != nil { + self.OnRescuePrepareCompleteFailed(ctx, guest, jsonutils.NewString(err.Error())) + return + } +} + +func (self *StartRescueTask) OnRescuePrepareComplete(ctx context.Context, guest *models.SGuest, data jsonutils.JSONObject) { + db.OpsLog.LogEvent(guest, db.ACT_START_RESCUE, guest.GetShortDesc(ctx), self.UserCred) + logclient.AddActionLogWithStartable(self, guest, logclient.ACT_VM_START_RESCUE, guest.GetShortDesc(ctx), self.UserCred, true) + guest.UpdateRescueMode(true) + self.RescueStartServer(ctx, guest) +} + +func (self *StartRescueTask) OnRescuePrepareCompleteFailed(ctx context.Context, guest *models.SGuest, err jsonutils.JSONObject) { + guest.SetStatus(self.UserCred, api.VM_START_RESCUE_FAILED, err.String()) + db.OpsLog.LogEvent(guest, db.ACT_STOP_FAIL, err, self.UserCred) + logclient.AddActionLogWithStartable(self, guest, logclient.ACT_VM_STOP, err, self.UserCred, false) + guest.UpdateRescueMode(false) + self.SetStageFailed(ctx, err) +} + +func (self *StartRescueTask) RescueStartServer(ctx context.Context, guest *models.SGuest) { + guest.SetStatus(self.UserCred, api.VM_START_RESCUE, "RescueStartServer") + self.SetStage("OnRescueStartServerComplete", nil) + + // Set Guest rescue params to guest start params + host, _ := guest.GetHost() + err := guest.GetDriver().RequestStartOnHost(ctx, guest, host, self.UserCred, self) + if err != nil { + self.OnRescueStartServerCompleteFailed(ctx, guest, jsonutils.NewString(err.Error())) + return + } +} + +func (self *StartRescueTask) OnRescueStartServerComplete(ctx context.Context, guest *models.SGuest, data jsonutils.JSONObject) { + db.OpsLog.LogEvent(guest, db.ACT_START_RESCUE, guest.GetShortDesc(ctx), self.UserCred) + logclient.AddActionLogWithStartable(self, guest, logclient.ACT_VM_START_RESCUE, guest.GetShortDesc(ctx), self.UserCred, true) + + // Set guest status to rescue running + guest.SetStatus(self.UserCred, api.VM_RESCUING, "OnRescueStartServerComplete") + self.SetStageComplete(ctx, nil) +} + +func (self *StartRescueTask) OnRescueStartServerCompleteFailed(ctx context.Context, obj db.IStandaloneModel, err jsonutils.JSONObject) { + guest := obj.(*models.SGuest) + guest.SetStatus(self.UserCred, api.VM_START_RESCUE_FAILED, err.String()) + db.OpsLog.LogEvent(guest, db.ACT_START_RESCUE_FAILED, guest.GetShortDesc(ctx), self.UserCred) + logclient.AddActionLogWithStartable(self, guest, logclient.ACT_VM_START, guest.GetShortDesc(ctx), self.UserCred, true) +} + +type StopRescueTask struct { + SGuestBaseTask +} + +func init() { + taskman.RegisterTask(StopRescueTask{}) +} + +func (self *StopRescueTask) OnInit(ctx context.Context, obj db.IStandaloneModel, data jsonutils.JSONObject) { + // Flow: stop -> modify startvm script for rescue -> start + guest := obj.(*models.SGuest) + // Check if guest is running + if guest.Status == api.VM_RUNNING || guest.Status == api.VM_RESCUING { + self.StopServer(ctx, guest) + } else { + self.ClearRescue(ctx, guest) + } +} + +func (self *StopRescueTask) StopServer(ctx context.Context, guest *models.SGuest) { + db.OpsLog.LogEvent(guest, db.ACT_STOPPING, nil, self.UserCred) + guest.SetStatus(self.UserCred, api.VM_STOPPING, "StopServer") + self.SetStage("OnServerStopComplete", nil) + guest.StartGuestStopTask(ctx, self.UserCred, true, false, self.GetTaskId()) +} + +func (self *StopRescueTask) OnServerStopComplete(ctx context.Context, guest *models.SGuest, data jsonutils.JSONObject) { + db.OpsLog.LogEvent(guest, db.ACT_STOP, guest.GetShortDesc(ctx), self.UserCred) + logclient.AddActionLogWithStartable(self, guest, logclient.ACT_VM_STOP, guest.GetShortDesc(ctx), self.UserCred, true) + + self.ClearRescue(ctx, guest) +} + +func (self *StopRescueTask) OnServerStopCompleteFailed(ctx context.Context, guest *models.SGuest, err jsonutils.JSONObject) { + guest.SetStatus(self.UserCred, api.VM_STOP_FAILED, err.String()) + db.OpsLog.LogEvent(guest, db.ACT_STOP_FAIL, err, self.UserCred) + logclient.AddActionLogWithStartable(self, guest, logclient.ACT_VM_STOP, err, self.UserCred, false) + self.SetStageFailed(ctx, err) +} + +func (self *StopRescueTask) ClearRescue(ctx context.Context, guest *models.SGuest) { + db.OpsLog.LogEvent(guest, db.ACT_STOP_RESCUE, nil, self.UserCred) + guest.SetStatus(self.UserCred, api.VM_STOP_RESCUE, "ClearRescue") + self.SetStage("OnRescueClearComplete", nil) + + host, _ := guest.GetHost() + err := guest.GetDriver().RequestStopRescue(ctx, self, nil, host, guest) + if err != nil { + self.OnRescueClearCompleteFailed(ctx, guest, jsonutils.NewString(err.Error())) + return + } +} + +func (self *StopRescueTask) OnRescueClearComplete(ctx context.Context, guest *models.SGuest, data jsonutils.JSONObject) { + db.OpsLog.LogEvent(guest, db.ACT_STOP_RESCUE, guest.GetShortDesc(ctx), self.UserCred) + logclient.AddActionLogWithStartable(self, guest, logclient.ACT_VM_STOP_RESCUE, guest.GetShortDesc(ctx), self.UserCred, true) + guest.UpdateRescueMode(false) + self.RescueStartServer(ctx, guest) +} + +func (self *StopRescueTask) OnRescueClearCompleteFailed(ctx context.Context, guest *models.SGuest, err jsonutils.JSONObject) { + guest.SetStatus(self.UserCred, api.VM_STOP_RESCUE_FAILED, err.String()) + db.OpsLog.LogEvent(guest, db.ACT_STOP_RESCUE_FAILED, err, self.UserCred) + logclient.AddActionLogWithStartable(self, guest, logclient.ACT_VM_STOP_RESCUE, err, self.UserCred, false) + self.SetStageFailed(ctx, err) +} + +func (self *StopRescueTask) RescueStartServer(ctx context.Context, guest *models.SGuest) { + guest.SetStatus(self.UserCred, api.VM_STARTING, "RescueStartServer") + self.SetStage("OnRescueStartServerComplete", nil) + + // Set Guest rescue params to guest start params + host, _ := guest.GetHost() + err := guest.GetDriver().RequestStartOnHost(ctx, guest, host, self.UserCred, self) + if err != nil { + self.OnRescueStartServerCompleteFailed(ctx, guest, jsonutils.NewString(err.Error())) + return + } +} + +func (self *StopRescueTask) OnRescueStartServerComplete(ctx context.Context, guest *models.SGuest, data jsonutils.JSONObject) { + db.OpsLog.LogEvent(guest, db.ACT_START, guest.GetShortDesc(ctx), self.UserCred) + logclient.AddActionLogWithStartable(self, guest, logclient.ACT_VM_START, guest.GetShortDesc(ctx), self.UserCred, true) + + // Set guest status to rescue running + guest.SetStatus(self.UserCred, api.VM_RUNNING, "OnRescueStartServerComplete") + self.SetStageComplete(ctx, nil) +} + +func (self *StopRescueTask) OnRescueStartServerCompleteFailed(ctx context.Context, obj db.IStandaloneModel, err jsonutils.JSONObject) { + guest := obj.(*models.SGuest) + guest.SetStatus(self.UserCred, api.VM_START_FAILED, err.String()) + db.OpsLog.LogEvent(guest, db.ACT_START_FAIL, guest.GetShortDesc(ctx), self.UserCred) + logclient.AddActionLogWithStartable(self, guest, logclient.ACT_VM_START, guest.GetShortDesc(ctx), self.UserCred, true) +} diff --git a/pkg/hostman/guestman/desc/desc.go b/pkg/hostman/guestman/desc/desc.go index 630f366425f..13f02fa4b42 100644 --- a/pkg/hostman/guestman/desc/desc.go +++ b/pkg/hostman/guestman/desc/desc.go @@ -114,6 +114,12 @@ type SGuestHardwareDesc struct { PCIControllers []*PCIController `json:",omitempty"` AnonymousPCIDevs []*PCIDevice `json:",omitempty"` + + RescueInitdPath string `json:",omitempty"` // rescue initramfs path + RescueKernelPath string `json:",omitempty"` // rescue kernel path + RescueDiskPath string `json:",omitempty"` // rescue disk path + RescueDiskDeviceBus uint `json:",omitempty"` // rescue disk device bus + RescueDiskDeviceSlot uint `json:",omitempty"` // rescue disk device slot } type SGuestIsaSerial struct { @@ -336,6 +342,8 @@ type SGuestControlDesc struct { SrcMacCheck bool EncryptKeyId string + + RescueMode bool // rescue mode } type SGuestMetaDesc struct { diff --git a/pkg/hostman/guestman/guesthandlers/guesthandler.go b/pkg/hostman/guestman/guesthandlers/guesthandler.go index 00160f651b5..53d53cc09d0 100644 --- a/pkg/hostman/guestman/guesthandlers/guesthandler.go +++ b/pkg/hostman/guestman/guesthandlers/guesthandler.go @@ -104,6 +104,8 @@ func AddGuestTaskHandler(prefix string, app *appsrv.Application) { "qga-guest-info-task": qgaGuestInfoTask, "qga-get-network": qgaGetNetwork, "qga-set-network": qgaSetNetwork, + "start-rescue": guestStartRescue, + "stop-rescue": guestStopRescue, } { app.AddHandler("POST", fmt.Sprintf("%s/%s//%s", prefix, keyWord, action), @@ -941,3 +943,15 @@ func qgaSetNetwork(ctx context.Context, userCred mcclient.TokenCredential, sid s } return gm.QgaSetNetwork(qgaNetMod, sid, input.Timeout) } + +// guestStartRescue prepare rescue files +func guestStartRescue(ctx context.Context, userCred mcclient.TokenCredential, sid string, body jsonutils.JSONObject) (interface{}, error) { + // Start rescue guest + return guestman.GetGuestManager().GuestStartRescue(ctx, userCred, sid, body) +} + +// guestStopRescue clear rescue files +func guestStopRescue(ctx context.Context, userCred mcclient.TokenCredential, sid string, body jsonutils.JSONObject) (interface{}, error) { + // Stop rescue guest + return guestman.GetGuestManager().GuestStopRescue(ctx, userCred, sid, body) +} diff --git a/pkg/hostman/guestman/guestman.go b/pkg/hostman/guestman/guestman.go index bb3f7bb07fd..72f1a846c38 100644 --- a/pkg/hostman/guestman/guestman.go +++ b/pkg/hostman/guestman/guestman.go @@ -906,6 +906,28 @@ func (m *SGuestManager) GuestStop(ctx context.Context, sid string, timeout int64 } } +func (m *SGuestManager) GuestStartRescue(ctx context.Context, userCred mcclient.TokenCredential, sid string, body jsonutils.JSONObject) (jsonutils.JSONObject, error) { + baremetalManagerUri, err := body.GetString("manager_uri") + if err != nil { + return nil, httperrors.NewInputParameterError("manager_uri required") + } + if guest, ok := m.GetServer(sid); ok { + guest.ExecStartRescueTask(ctx, baremetalManagerUri) + return nil, nil + } else { + return nil, httperrors.NewNotFoundError("Guest %s not found", sid) + } +} + +func (m *SGuestManager) GuestStopRescue(ctx context.Context, userCred mcclient.TokenCredential, sid string, body jsonutils.JSONObject) (jsonutils.JSONObject, error) { + if guest, ok := m.GetServer(sid); ok { + guest.ExecStopRescueTask(ctx, body) + return nil, nil + } else { + return nil, httperrors.NewNotFoundError("Guest %s not found", sid) + } +} + func (m *SGuestManager) GuestSync(ctx context.Context, params interface{}) (jsonutils.JSONObject, error) { syncParams, ok := params.(*SBaseParams) if !ok { diff --git a/pkg/hostman/guestman/guesttasks.go b/pkg/hostman/guestman/guesttasks.go index b94cdea2dcc..d725e940413 100644 --- a/pkg/hostman/guestman/guesttasks.go +++ b/pkg/hostman/guestman/guesttasks.go @@ -102,6 +102,54 @@ func (s *SGuestStopTask) CheckGuestRunningLater() { s.checkGuestRunning() } +// SGuestStartRescueTask Start a rescue vm +type SGuestStartRescueTask struct { + *SKVMGuestInstance + ctx context.Context + BaremetalManagerUri string +} + +func NewGuestStartRescueTask(guest *SKVMGuestInstance, ctx context.Context, baremetalManagerUri string) *SGuestStartRescueTask { + return &SGuestStartRescueTask{ + SKVMGuestInstance: guest, + ctx: ctx, + BaremetalManagerUri: baremetalManagerUri, + } +} + +func (s *SGuestStartRescueTask) Start() { + if err := s.prepareRescue(s.ctx, s.BaremetalManagerUri); err != nil { + log.Errorf("prepareRescue fail %s", err) + hostutils.TaskFailed(s.ctx, err.Error()) + return + } + + hostutils.TaskComplete(s.ctx, nil) +} + +// SGuestStopRescueTask Stop a rescue vm, clean rescue files +type SGuestStopRescueTask struct { + *SKVMGuestInstance + ctx context.Context +} + +func NewGuestStopRescueTask(guest *SKVMGuestInstance, ctx context.Context) *SGuestStopRescueTask { + return &SGuestStopRescueTask{ + SKVMGuestInstance: guest, + ctx: ctx, + } +} + +func (s *SGuestStopRescueTask) Start() { + if err := s.clearRescue(s.ctx); err != nil { + log.Errorf("clearRescue fail %s", err) + hostutils.TaskFailed(s.ctx, err.Error()) + return + } + + hostutils.TaskComplete(s.ctx, nil) +} + type SGuestSuspendTask struct { *SKVMGuestInstance ctx context.Context diff --git a/pkg/hostman/guestman/qemu-kvm.go b/pkg/hostman/guestman/qemu-kvm.go index 94743f8d471..18b7370417d 100644 --- a/pkg/hostman/guestman/qemu-kvm.go +++ b/pkg/hostman/guestman/qemu-kvm.go @@ -17,7 +17,9 @@ package guestman import ( "context" "fmt" + "io" "io/ioutil" + "net/http" "os" "path" "path/filepath" @@ -32,6 +34,8 @@ import ( "yunion.io/x/log" "yunion.io/x/pkg/appctx" "yunion.io/x/pkg/errors" + "yunion.io/x/pkg/util/httputils" + "yunion.io/x/pkg/util/qemuimgfmt" "yunion.io/x/pkg/util/regutils" "yunion.io/x/pkg/util/seclib" "yunion.io/x/pkg/util/version" @@ -387,6 +391,22 @@ func (s *SKVMGuestInstance) GetSourceDescFilePath() string { return path.Join(s.HomeDir(), "source-desc") } +func (s *SKVMGuestInstance) GetRescueDirPath() string { + return path.Join(s.HomeDir(), api.GUEST_RESCUE_RELATIVE_PATH) +} + +func (s *SKVMGuestInstance) CreateRescueDirPath() (string, error) { + rescueDir := path.Join(s.HomeDir(), api.GUEST_RESCUE_RELATIVE_PATH) + + // Check if rescue dir exists + output, err := procutils.NewCommand("mkdir", "-p", rescueDir).Output() + if err != nil { + return "", errors.Wrapf(err, "mkdir %s failed: %s", s.HomeDir(), output) + } + + return rescueDir, nil +} + func (s *SKVMGuestInstance) LoadDesc() error { descPath := s.GetDescFilePath() descStr, err := ioutil.ReadFile(descPath) @@ -511,7 +531,6 @@ func (s *SKVMGuestInstance) asyncScriptStart(ctx context.Context, params interfa return nil, errors.Wrap(err, "fuse mount") } - // init live migrate listen port if jsonutils.QueryBoolean(data, "need_migrate", false) || s.Desc.IsSlave { migratePort := s.manager.GetLiveMigrateFreePort() defer s.manager.unsetPort(migratePort) @@ -1520,6 +1539,15 @@ func (s *SKVMGuestInstance) SaveSourceDesc(guestDesc *desc.SGuestDesc) error { } } + // Save rescue desc if exist + if s.SourceDesc.RescueMode { + err := s.GetRescueDesc() + if err != nil { + log.Errorf("get rescue desc failed %s", err) + return errors.Wrap(err, "get rescue desc") + } + } + if err := fileutils2.FilePutContents( s.GetSourceDescFilePath(), jsonutils.Marshal(s.SourceDesc).String(), false, ); err != nil { @@ -1540,6 +1568,30 @@ func (s *SKVMGuestInstance) GetVpcNIC() *desc.SGuestNetwork { return nil } +func (s *SKVMGuestInstance) GetRescueDesc() error { + if !s.SourceDesc.RescueMode { + return errors.Errorf("guest %s not in rescue mode", s.Id) + } + + s.SourceDesc.RescueInitdPath = path.Join(s.GetRescueDirPath(), api.GUEST_RESCUE_INITRAMFS) + s.SourceDesc.RescueKernelPath = path.Join(s.GetRescueDirPath(), api.GUEST_RESCUE_KERNEL) + s.SourceDesc.RescueDiskPath = path.Join(s.GetRescueDirPath(), api.GUEST_RESCUE_SYS_DISK_NAME) + if s.manager.GetHost().IsAarch64() { + s.SourceDesc.RescueInitdPath = path.Join(s.GetRescueDirPath(), api.GUEST_RESCUE_INITRAMFS_ARM64) + s.SourceDesc.RescueKernelPath = path.Join(s.GetRescueDirPath(), api.GUEST_RESCUE_KERNEL_ARM64) + } + + // Address + bus, slot, found := s.findUnusedSlotForController(desc.CONTROLLER_TYPE_PCI_ROOT, 0) + if !found { + return errors.Errorf("no valid pci address found ?") + } + s.SourceDesc.RescueDiskDeviceBus = uint(bus) + s.SourceDesc.RescueDiskDeviceSlot = uint(slot) + + return nil +} + type guestStartTask struct { s *SKVMGuestInstance @@ -1827,6 +1879,19 @@ func (s *SKVMGuestInstance) ExecStopTask(ctx context.Context, params interface{} return nil, nil } +func (s *SKVMGuestInstance) ExecStartRescueTask(ctx context.Context, params interface{}) (jsonutils.JSONObject, error) { + baremetalManagerUri, ok := params.(string) + if !ok { + return nil, hostutils.ParamsError + } + NewGuestStartRescueTask(s, ctx, baremetalManagerUri).Start() + return nil, nil +} + +func (s *SKVMGuestInstance) ExecStopRescueTask(ctx context.Context, params interface{}) { + NewGuestStopRescueTask(s, ctx).Start() +} + func (s *SKVMGuestInstance) ExecSuspendTask(ctx context.Context) { NewGuestSuspendTask(s, ctx, nil).Start() } @@ -2950,3 +3015,155 @@ func (s *SKVMGuestInstance) CPUSetRemove(ctx context.Context) error { } return nil } + +func (s *SKVMGuestInstance) clearRescue(ctx context.Context) error { + rescueDir := s.GetRescueDirPath() + if err := fileutils2.Cleandir(rescueDir, false); err != nil { + return errors.Wrap(err, "clear rescue dir failed") + } + + return nil +} + +func (s *SKVMGuestInstance) prepareRescue(ctx context.Context, baremetalManagerUri string) error { + files := []string{ + api.GUEST_RESCUE_INITRAMFS, + api.GUEST_RESCUE_KERNEL, + } + + // Support arm64 + if s.manager.host.IsAarch64() { + files = []string{ + api.GUEST_RESCUE_INITRAMFS_ARM64, + api.GUEST_RESCUE_KERNEL_ARM64, + } + } + + // Prepare files + for _, file := range files { + err := s.downloadFromBaremetal(file, baremetalManagerUri) + if err != nil { + return errors.Wrapf(err, "download %s from baremetal failed", file) + } + } + + // Create disk + diskPath := path.Join(s.GetRescueDirPath(), api.GUEST_RESCUE_SYS_DISK_NAME) + err := s.createTempDisk(diskPath, api.GUEST_RESCUE_SYS_DISK_SIZE, qemuimgfmt.QCOW2.String(), nil, "") + if err != nil { + return errors.Wrapf(err, "create disk %s failed", diskPath) + } + + return nil +} + +func (s *SKVMGuestInstance) downloadFromBaremetal(filename, baremetalManagerUri string) error { + rescueDir, err := s.CreateRescueDirPath() + if err != nil { + return errors.Wrap(err, "SKVMGuestInstance.GetRescueDirPath") + } + + // Check file is exist + filePath := path.Join(rescueDir, filename) + if fileutils2.Exists(filePath) { + // File already exist + log.Debugf("File %s already exist", filePath) + + return nil + } + + // Create file + file, err := os.Create(filePath) + if err != nil { + return errors.Wrapf(err, "create file %s failed", filePath) + } + defer file.Close() + + // Get filepath + fileURL, err := s.getTftpFileUrl(filename, baremetalManagerUri) + if err != nil { + return errors.Wrapf(err, "getTftpFileUrl") + } + + // Request for file + resp, err := httputils.Request( + httputils.GetDefaultClient(), + context.Background(), + "GET", + fileURL, + nil, + nil, + false) + if err != nil || resp.StatusCode != http.StatusOK { + return errors.Wrapf(err, "request %s failed", fileURL) + } + + // Write file + if _, err := io.Copy(file, resp.Body); err != nil { + return errors.Wrapf(err, "write file %s failed", filePath) + } + + return nil +} + +func (s *SKVMGuestInstance) createTempDisk(path string, sizeMB int, diskFormat string, encryptInfo *apis.SEncryptInfo, back string) error { + if fileutils2.Exists(path) { + os.Remove(path) + } + + img, err := qemuimg.NewQemuImage(path) + if err != nil { + log.Errorln(err) + return err + } + + switch diskFormat { + case "qcow2": + if encryptInfo != nil { + err = img.CreateQcow2(sizeMB, false, back, encryptInfo.Key, qemuimg.EncryptFormatLuks, encryptInfo.Alg) + } else { + err = img.CreateQcow2(sizeMB, false, back, "", "", "") + } + case "vmdk": + err = img.CreateVmdk(sizeMB, false) + default: + err = img.CreateRaw(sizeMB) + } + if err != nil { + return errors.Wrapf(err, "create_raw: Fail to create disk") + } + + return nil +} + +func (s *SKVMGuestInstance) getTftpFileUrl(filename, baremetalManagerUri string) (string, error) { + endpoint, err := s.getTftpEndpoint(baremetalManagerUri) + if err != nil { + log.Errorf("Get http file server endpoint: %v", err) + return filename, err + } + return fmt.Sprintf("http://%s/tftp/%s", endpoint, filename), nil +} + +func (s *SKVMGuestInstance) getTftpEndpoint(baremetalManagerUri string) (string, error) { + // Split with : + addrs := strings.Split(baremetalManagerUri, "//") + if len(addrs) < 2 { + return "", errors.Errorf("baremetal manager uri is invalid") + } + endpoints := strings.Split(addrs[1], ":") + if len(endpoints) < 2 { + return "", errors.Errorf("baremetal manager uri is invalid") + } + + // Plus baremetal agent port with 1000 + port, err := strconv.Atoi(endpoints[1]) + if err != nil { + return "", errors.Wrapf(err, "convert port failed") + } + + // Concat new endpoint url + endpoint := fmt.Sprintf("%s:%d", endpoints[0], port+1000) + + return endpoint, nil +} diff --git a/pkg/hostman/guestman/qemu-kvmhelper.go b/pkg/hostman/guestman/qemu-kvmhelper.go index 3d3992d605f..4701124aeb0 100644 --- a/pkg/hostman/guestman/qemu-kvmhelper.go +++ b/pkg/hostman/guestman/qemu-kvmhelper.go @@ -545,6 +545,15 @@ function nic_mtu() { } } + // set rescue flag to input + if s.Desc.RescueMode { + input.RescueInitdPath = s.SourceDesc.RescueInitdPath + input.RescueKernelPath = s.SourceDesc.RescueKernelPath + input.RescueDiskPath = s.SourceDesc.RescueDiskPath + input.RescueDiskDeviceBus = s.SourceDesc.RescueDiskDeviceBus + input.RescueDiskDeviceSlot = s.SourceDesc.RescueDiskDeviceSlot + } + qemuOpts, err := qemu.GenerateStartOptions(input) if err != nil { return "", errors.Wrap(err, "GenerateStartCommand") diff --git a/pkg/hostman/guestman/qemu/generate.go b/pkg/hostman/guestman/qemu/generate.go index c2c11126fee..eda35da4be9 100644 --- a/pkg/hostman/guestman/qemu/generate.go +++ b/pkg/hostman/guestman/qemu/generate.go @@ -219,6 +219,29 @@ func generateScsiOptions(scsi *desc.SGuestVirtioScsi) string { return opt } +func generateInitrdOptions(drvOpt QemuOptions, initrdPath, kernel, sys_img string, rescueDiskDeviceBus, rescueDiskDeviceSlot uint, nics []*desc.SGuestNetwork) []string { + opts := make([]string, 0) + opts = append(opts, fmt.Sprintf("-initrd %s", initrdPath)) + opts = append(opts, fmt.Sprintf("-kernel %s", kernel)) + + // create temp disk info + driveString := fmt.Sprintf("file=%s,if=none,id=initrd,cache=none,aio=native,file.locking=off", sys_img) + opts = append(opts, drvOpt.Drive(driveString)) + deviceString := fmt.Sprintf("virtio-blk-pci,drive=initrd,iothread=iothread0,bus=pci.%d,addr=0x%02x,id=initrd,bootindex=1", rescueDiskDeviceBus, rescueDiskDeviceSlot) + opts = append(opts, drvOpt.Device(deviceString)) + + // add ip config + //var ips []string + //for _, nic := range nics { + // ips = append(ips, fmt.Sprintf("ip=%s:%s:%s:%s:%s:%s:off,", nic.Ip, "", nic.Gateway, netutils.Masklen2Mask(nic.Masklen).String(), "", nic.Ifname)) + //} + //appendIps := strings.Join(ips, ",") + // + //opts = append(opts, fmt.Sprintf("-append %s", appendIps)) + + return opts +} + func generateDisksOptions(drvOpt QemuOptions, disks []*desc.SGuestDisk, isEncrypt, isMaster bool) []string { opts := make([]string, 0) for _, disk := range disks { @@ -635,6 +658,12 @@ type GenerateStartOptionsInput struct { EnablePvpanic bool EncryptKeyPath string + + RescueInitdPath string // rescue initramfs path + RescueKernelPath string // rescue kernel path + RescueDiskPath string // rescue disk path + RescueDiskDeviceBus uint + RescueDiskDeviceSlot uint } func (input *GenerateStartOptionsInput) HasBootIndex() bool { @@ -757,6 +786,20 @@ func GenerateStartOptions( } else if input.GuestDesc.PvScsi != nil { opts = append(opts, generatePCIDeviceOption(input.GuestDesc.PvScsi.PCIDevice)) } + + // generate initrd and kernel options + if input.RescueInitdPath != "" { + opts = append(opts, generateInitrdOptions( + drvOpt, + input.RescueInitdPath, + input.RescueKernelPath, + input.RescueDiskPath, + input.RescueDiskDeviceBus, + input.RescueDiskDeviceSlot, + input.GuestDesc.Nics, + )...) + } + // generate disk options opts = append(opts, generateDisksOptions( drvOpt, input.GuestDesc.Disks, isEncrypt, input.GuestDesc.IsMaster)...) diff --git a/pkg/util/logclient/consts.go b/pkg/util/logclient/consts.go index 5fac5a0a13f..fe2efd63dc5 100644 --- a/pkg/util/logclient/consts.go +++ b/pkg/util/logclient/consts.go @@ -59,6 +59,8 @@ const ( ACT_VM_SRC_CHECK = "vm_src_check" ACT_VM_START = "vm_start" ACT_VM_STOP = "vm_stop" + ACT_VM_START_RESCUE = "vm_start_rescue" + ACT_VM_STOP_RESCUE = "vm_stop_rescue" ACT_VM_SUSPEND = "vm_suspend" ACT_VM_RESTART = "vm_restart" ACT_VM_RESUME = "vm_resume"