diff --git a/tdxctl/src/fde_setup.rs b/tdxctl/src/fde_setup.rs index aed1b0d..91e79f9 100644 --- a/tdxctl/src/fde_setup.rs +++ b/tdxctl/src/fde_setup.rs @@ -264,7 +264,12 @@ impl SetupFdeArgs { Ok(()) } - fn mount_rootfs(&self, host_shared: &HostShared, disk_crypt_key: &str) -> Result<()> { + async fn mount_rootfs( + &self, + host_shared: &HostShared, + disk_crypt_key: &str, + nc: &NotifyClient, + ) -> Result<()> { let rootfs_mountpoint = self.rootfs_dir.display().to_string(); if !self.rootfs_encryption { warn!("Rootfs encryption is disabled, skipping disk encryption"); @@ -289,21 +294,13 @@ impl SetupFdeArgs { Self::mount_e2fs("/dev/mapper/rootfs_crypt", &rootfs_mountpoint)?; let hash_file = self.rootfs_dir.join(".rootfs_hash"); - let existing_rootfs_hash = match fs::read(&hash_file) { - Ok(rootfs_hash) => rootfs_hash, - Err(_) => { - // Old image touches .bootstraped instead of .rootfs_hash - if !self.rootfs_dir.join(".bootstraped").exists() { - bail!("Rootfs is not bootstrapped"); - } - Default::default() - } - }; - + let existing_rootfs_hash = fs::read(&hash_file).unwrap_or_default(); if existing_rootfs_hash != host_shared.vm_config.rootfs_hash { - let todo = "do upgrade"; + info!("Rootfs hash changed, upgrading the rootfs"); fs::remove_file(&hash_file).context("Failed to remove old rootfs hash file")?; - bail!("Rootfs hash mismatch"); + nc.notify_q("boot.progress", "upgrading rootfs").await; + self.extract_rootfs(&host_shared.vm_config.rootfs_hash) + .await?; } Ok(()) } @@ -344,17 +341,14 @@ impl SetupFdeArgs { Ok(()) } - fn bootstrap_rootfs( + async fn bootstrap_rootfs( &self, host_shared: &HostShared, disk_crypt_key: &str, instance_info: &InstanceInfo, - ) -> Result { + nc: &NotifyClient, + ) -> Result<()> { info!("Setting up disk encryption"); - fs::create_dir_all(&self.root_cdrom_mnt) - .context("Failed to create rootfs cdrom mount point")?; - mount_cdrom(&self.root_cdrom, &self.root_cdrom_mnt.display().to_string()) - .context("Failed to mount rootfs cdrom")?; info!("Formatting rootfs"); let rootfs_dev = if self.rootfs_encryption { self.luks_setup(disk_crypt_key)?; @@ -370,9 +364,21 @@ impl SetupFdeArgs { &[rootfs_dev, &self.rootfs_dir.display().to_string()], ) .context("Failed to mount rootfs")?; + self.extract_rootfs(&host_shared.vm_config.rootfs_hash) + .await?; + let mut instance_info = instance_info.clone(); + instance_info.bootstrapped = true; + nc.notify_q("instance.info", &serde_json::to_string(&instance_info)?) + .await; + Ok(()) + } + async fn extract_rootfs(&self, expected_rootfs_hash: &[u8]) -> Result<()> { info!("Extracting rootfs"); - + fs::create_dir_all(&self.root_cdrom_mnt) + .context("Failed to create rootfs cdrom mount point")?; + mount_cdrom(&self.root_cdrom, &self.root_cdrom_mnt.display().to_string()) + .context("Failed to mount rootfs cdrom")?; let rootfs_cpio = self.root_cdrom_mnt.join("rootfs.cpio"); if !rootfs_cpio.exists() { bail!("Rootfs cpio file not found on cdrom"); @@ -381,7 +387,7 @@ impl SetupFdeArgs { fs::File::open(rootfs_cpio).context("Failed to open rootfs cpio file")?; let mut hashing_rootfs_cpio = HashingFile::::new(rootfs_cpio_file); let mut status = Command::new("/usr/bin/env") - .args(["cpio", "-i"]) + .args(["cpio", "-i", "-d", "-u"]) .current_dir(&self.rootfs_dir) .stdin(Stdio::piped()) .spawn() @@ -408,16 +414,16 @@ impl SetupFdeArgs { bail!("Failed to extract rootfs, cpio returned {status:?}"); } let rootfs_hash = hashing_rootfs_cpio.finalize(); - if rootfs_hash[..] != host_shared.vm_config.rootfs_hash[..] { + if &rootfs_hash[..] != expected_rootfs_hash { bail!("Rootfs hash mismatch"); } info!("Rootfs hash is valid"); - let mut instance_info = instance_info.clone(); - instance_info.bootstrapped = true; fs::write(self.rootfs_dir.join(".rootfs_hash"), rootfs_hash) .context("Failed to write rootfs hash")?; + umount(&self.root_cdrom_mnt.display().to_string()) + .context("Failed to unmount rootfs cdrom")?; info!("Rootfs is ready"); - Ok(instance_info) + Ok(()) } fn write_decrypted_env(&self, decrypted_env: &BTreeMap) -> Result<()> { @@ -489,13 +495,11 @@ impl SetupFdeArgs { let disk_crypt_key = format!("{}\n", app_keys.disk_crypt_key); if instance_info.bootstrapped { nc.notify_q("boot.progress", "mounting rootfs").await; - self.mount_rootfs(host_shared, &disk_crypt_key)?; + self.mount_rootfs(host_shared, &disk_crypt_key, nc).await?; } else { nc.notify_q("boot.progress", "initializing rootfs").await; - let instance_info = - self.bootstrap_rootfs(host_shared, &disk_crypt_key, &instance_info)?; - nc.notify_q("instance.info", &serde_json::to_string(&instance_info)?) - .await; + self.bootstrap_rootfs(host_shared, &disk_crypt_key, &instance_info, nc) + .await?; } self.write_decrypted_env(&decrypted_env)?; nc.notify_q("boot.progress", "rootfs ready").await; diff --git a/teepod/rpc/proto/teepod_rpc.proto b/teepod/rpc/proto/teepod_rpc.proto index b9ed109..11423b7 100644 --- a/teepod/rpc/proto/teepod_rpc.proto +++ b/teepod/rpc/proto/teepod_rpc.proto @@ -122,6 +122,8 @@ message ResizeVmRequest { optional uint32 memory = 3; // Disk size in GB optional uint32 disk_size = 4; + // Image name + optional string image = 5; } // Service definition for Teepod @@ -138,6 +140,8 @@ service Teepod { rpc UpgradeApp(UpgradeAppRequest) returns (Id); // Shutdown a VM rpc ShutdownVm(Id) returns (google.protobuf.Empty); + // RPC to resize a VM + rpc ResizeVm(ResizeVmRequest) returns (google.protobuf.Empty); // RPC to list all VMs rpc Status(google.protobuf.Empty) returns (StatusResponse); @@ -149,7 +153,4 @@ service Teepod { // Get VM info by ID rpc GetInfo(Id) returns (GetInfoResponse); - - // RPC to resize a VM - rpc ResizeVm(ResizeVmRequest) returns (google.protobuf.Empty); } diff --git a/teepod/src/app.rs b/teepod/src/app.rs index 936cfc3..77e25d2 100644 --- a/teepod/src/app.rs +++ b/teepod/src/app.rs @@ -1,33 +1,19 @@ -//! App related code -//! -//! Directory structure: -//! ```text -//! .teepod/ -//! ├── image -//! │ └── ubuntu-24.04 -//! │ ├── hda.img -//! │ ├── info.json -//! │ ├── initrd.img -//! │ ├── kernel -//! │ └── rootfs.iso -//! └── vm -//! └── e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 -//! └── shared -//! └── app-compose.json -//! ``` use crate::config::{Config, Protocol}; use anyhow::{bail, Context, Result}; use bon::Builder; use fs_err as fs; +use guest_api::client::DefaultClient as GuestClient; use id_pool::IdPool; +use kms_rpc::kms_client::KmsClient; +use ra_rpc::client::RaClient; use serde::{Deserialize, Serialize}; use std::collections::HashMap; use std::net::IpAddr; use std::path::{Path, PathBuf}; use std::sync::{Arc, Mutex, MutexGuard}; use supervisor_client::SupervisorClient; -use teepod_rpc as pb; +use teepod_rpc::{self as pb, VmConfiguration}; use tracing::{error, info}; pub use image::{Image, ImageInfo}; @@ -102,40 +88,40 @@ impl App { let todo = "sanitize the image name"; let image_path = self.config.image_path.join(&manifest.image); let image = Image::load(&image_path).context("Failed to load image")?; - - let cid = cids_assigned.get(&manifest.id).cloned(); - let cid = match cid { - Some(cid) => cid, - None => self - .lock() - .cid_pool - .allocate() - .context("CID pool exhausted")?, - }; - - let vm_config = VmConfig { - manifest, - image, - cid, - networking: self.config.networking.clone(), - workdir: vm_work_dir.path().to_path_buf(), + let vm_id = manifest.id.clone(); + { + let mut teapot = self.lock(); + let cid = teapot + .get(&vm_id) + .map(|vm| vm.config.cid) + .or_else(|| cids_assigned.get(&vm_id).cloned()) + .or_else(|| teapot.cid_pool.allocate()) + .context("CID pool exhausted")?; + let vm_config = VmConfig { + manifest, + image, + cid, + networking: self.config.networking.clone(), + workdir: vm_work_dir.path().to_path_buf(), + }; + if vm_config.manifest.disk_size > self.config.cvm.max_disk_size { + bail!( + "disk size too large, max size is {}", + self.config.cvm.max_disk_size + ); + } + teapot.add(VmState::new(vm_config)); }; - if vm_config.manifest.disk_size > self.config.cvm.max_disk_size { - bail!( - "disk size too large, max size is {}", - self.config.cvm.max_disk_size - ); - } - let vm_id = vm_config.manifest.id.clone(); - self.lock().add(VmState::new(vm_config)); let started = vm_work_dir.started().context("Failed to read VM state")?; if started { self.start_vm(&vm_id).await?; } + Ok(()) } pub async fn start_vm(&self, id: &str) -> Result<()> { + self.sync_dynamic_config(id)?; let is_running = self .supervisor .info(id) @@ -316,6 +302,93 @@ impl App { } Ok(()) } + + pub(crate) fn compose_file_path(&self, id: &str) -> PathBuf { + self.shared_dir(id).join("app-compose.json") + } + + pub(crate) fn encrypted_env_path(&self, id: &str) -> PathBuf { + self.shared_dir(id).join("encrypted-env") + } + + pub(crate) fn shared_dir(&self, id: &str) -> PathBuf { + self.config.run_path.join(id).join("shared") + } + + pub(crate) fn prepare_work_dir(&self, id: &str, req: &VmConfiguration) -> Result { + let work_dir = self.work_dir(id); + if work_dir.exists() { + bail!("The instance is already exists at {}", work_dir.display()); + } + let shared_dir = work_dir.join("shared"); + fs::create_dir_all(&shared_dir).context("Failed to create shared directory")?; + fs::write(shared_dir.join("app-compose.json"), &req.compose_file) + .context("Failed to write compose file")?; + if !req.encrypted_env.is_empty() { + fs::write(shared_dir.join("encrypted-env"), &req.encrypted_env) + .context("Failed to write encrypted env")?; + } + let app_id = req.app_id.clone().unwrap_or_default(); + if !app_id.is_empty() { + let instance_info = serde_json::json!({ + "app_id": app_id, + }); + fs::write( + shared_dir.join(".instance_info"), + serde_json::to_string(&instance_info)?, + ) + .context("Failed to write vm config")?; + } + Ok(work_dir) + } + + pub(crate) fn sync_dynamic_config(&self, id: &str) -> Result<()> { + let work_dir = self.work_dir(id); + let shared_dir = self.shared_dir(id); + let manifest = work_dir.manifest().context("Failed to read manifest")?; + let certs_dir = shared_dir.join("certs"); + fs::create_dir_all(&certs_dir).context("Failed to create certs directory")?; + let cfg = &self.config; + let image_path = cfg.image_path.join(&manifest.image); + let image_info = ImageInfo::load(image_path.join("metadata.json")) + .context("Failed to load image info")?; + let rootfs_hash = image_info + .rootfs_hash + .context("Rootfs hash not found in image info")?; + let vm_config = serde_json::json!({ + "rootfs_hash": rootfs_hash, + "kms_url": cfg.cvm.kms_url, + "tproxy_url": cfg.cvm.tproxy_url, + "docker_registry": cfg.cvm.docker_registry, + "host_api_url": format!("vsock://2:{}/api", cfg.host_api.port), + }); + let vm_config_str = + serde_json::to_string(&vm_config).context("Failed to serialize vm config")?; + fs::write(shared_dir.join("config.json"), vm_config_str) + .context("Failed to write vm config")?; + fs::copy(&cfg.cvm.ca_cert, certs_dir.join("ca.cert")).context("Failed to copy ca cert")?; + fs::copy(&cfg.cvm.tmp_ca_cert, certs_dir.join("tmp-ca.cert")) + .context("Failed to copy tmp ca cert")?; + fs::copy(&cfg.cvm.tmp_ca_key, certs_dir.join("tmp-ca.key")) + .context("Failed to copy tmp ca key")?; + Ok(()) + } + + pub(crate) fn kms_client(&self) -> Result> { + if self.config.kms_url.is_empty() { + bail!("KMS is not configured"); + } + let url = format!("{}/prpc", self.config.kms_url); + let prpc_client = RaClient::new(url, true); + Ok(KmsClient::new(prpc_client)) + } + + pub(crate) fn tappd_client(&self, id: &str) -> Result { + let cid = self.lock().get(id).context("vm not found")?.config.cid; + Ok(guest_api::client::new_client(format!( + "vsock://{cid}:8000/api" + ))) + } } #[derive(Clone)] diff --git a/teepod/src/app/qemu.rs b/teepod/src/app/qemu.rs index 01f6e2c..f218b96 100644 --- a/teepod/src/app/qemu.rs +++ b/teepod/src/app/qemu.rs @@ -320,6 +320,7 @@ impl VmWorkDir { } pub fn put_manifest(&self, manifest: &Manifest) -> Result<()> { + fs::create_dir_all(&self.workdir).context("Failed to create workdir")?; let manifest_path = self.manifest_path(); fs::write(manifest_path, serde_json::to_string(manifest)?) .context("Failed to write manifest") diff --git a/teepod/src/console.html b/teepod/src/console.html index 6715d82..add76fa 100644 --- a/teepod/src/console.html +++ b/teepod/src/console.html @@ -490,7 +490,7 @@

Deploy a new instance

@@ -796,10 +796,20 @@

Update VM Config

-
+
+ + +
+
@@ -851,7 +861,7 @@

Derive VM

@@ -923,8 +933,7 @@

Derive VM

token_key: '' } }); - const images = ref([]); - + const availableImages = ref([]); const upgradeDialog = ref({ show: false, vm: null, @@ -933,7 +942,8 @@

Derive VM

resetSecrets: false, vcpu: 0, memory: 0, - disk_size: 0 + disk_size: 0, + image: '' }); const upgradeMessage = ref(''); const errorMessage = ref(''); @@ -1179,7 +1189,7 @@

Derive VM

try { const response = await rpcCall('ListImages'); const data = await response.json(); - images.value = data.images; + availableImages.value = data.images; } catch (error) { console.error('error loading images:', error); } @@ -1194,6 +1204,7 @@

Derive VM

vcpu: vm.configuration?.vcpu, memory: vm.configuration?.memory, disk_size: vm.configuration?.disk_size, + image: vm.configuration?.image, }; clearEncryptedEnv(); }; @@ -1211,16 +1222,18 @@

Derive VM

const upgradeVM = async () => { try { - if ( - upgradeDialog.value.vcpu != upgradeDialog.value.vm.configuration.vcpu || - upgradeDialog.value.memory != upgradeDialog.value.vm.configuration.memory || - upgradeDialog.value.disk_size != upgradeDialog.value.vm.configuration.disk_size - ) { + const vm = upgradeDialog.value.vm; + const original = vm.configuration; + const updated = upgradeDialog.value; + + const fieldsToCompare = ['vcpu', 'memory', 'disk_size', 'image']; + if (fieldsToCompare.some(field => updated[field] != original[field])) { const response = await rpcCall('ResizeVm', { - id: upgradeDialog.value.vm.id, - vcpu: upgradeDialog.value.vcpu, - memory: upgradeDialog.value.memory, - disk_size: upgradeDialog.value.disk_size + id: vm.id, + vcpu: updated.vcpu, + memory: updated.memory, + disk_size: updated.disk_size, + image: updated.image }); await response.json(); } @@ -1420,7 +1433,7 @@

Derive VM

return { vms, vmForm, - images, + availableImages, createVm, stopVm, startVm, diff --git a/teepod/src/main_service.rs b/teepod/src/main_service.rs index 2cb3774..0d09ce5 100644 --- a/teepod/src/main_service.rs +++ b/teepod/src/main_service.rs @@ -1,12 +1,8 @@ use std::ops::Deref; -use std::path::PathBuf; use std::time::{SystemTime, UNIX_EPOCH}; use anyhow::{anyhow, bail, Context, Result}; use fs_err as fs; -use guest_api::client::DefaultClient as GuestClient; -use kms_rpc::kms_client::KmsClient; -use ra_rpc::client::RaClient; use ra_rpc::{CallContext, RpcCall}; use teepod_rpc::teepod_server::{TeepodRpc, TeepodServer}; use teepod_rpc::{ @@ -15,7 +11,7 @@ use teepod_rpc::{ }; use tracing::{info, warn}; -use crate::app::{App, ImageInfo, Manifest, PortMapping, VmWorkDir}; +use crate::app::{App, Manifest, PortMapping, VmWorkDir}; fn hex_sha256(data: &str) -> String { use sha2::Digest; @@ -36,92 +32,6 @@ impl Deref for RpcHandler { } } -impl App { - pub(crate) fn compose_file_path(&self, id: &str) -> PathBuf { - self.shared_dir(id).join("app-compose.json") - } - - pub(crate) fn encrypted_env_path(&self, id: &str) -> PathBuf { - self.shared_dir(id).join("encrypted-env") - } - - pub(crate) fn shared_dir(&self, id: &str) -> PathBuf { - self.config.run_path.join(id).join("shared") - } - - pub(crate) fn prepare_work_dir(&self, id: &str, req: &VmConfiguration) -> Result { - let work_dir = self.work_dir(id); - if work_dir.exists() { - bail!("The instance is already exists at {}", work_dir.display()); - } - let shared_dir = work_dir.join("shared"); - fs::create_dir_all(&shared_dir).context("Failed to create shared directory")?; - fs::write(shared_dir.join("app-compose.json"), &req.compose_file) - .context("Failed to write compose file")?; - if !req.encrypted_env.is_empty() { - fs::write(shared_dir.join("encrypted-env"), &req.encrypted_env) - .context("Failed to write encrypted env")?; - } - let certs_dir = shared_dir.join("certs"); - fs::create_dir_all(&certs_dir).context("Failed to create certs directory")?; - - let cfg = &self.config; - fs::copy(&cfg.cvm.ca_cert, certs_dir.join("ca.cert")).context("Failed to copy ca cert")?; - fs::copy(&cfg.cvm.tmp_ca_cert, certs_dir.join("tmp-ca.cert")) - .context("Failed to copy tmp ca cert")?; - fs::copy(&cfg.cvm.tmp_ca_key, certs_dir.join("tmp-ca.key")) - .context("Failed to copy tmp ca key")?; - - let image_path = cfg.image_path.join(&req.image); - let image_info = ImageInfo::load(image_path.join("metadata.json")) - .context("Failed to load image info")?; - - let rootfs_hash = image_info - .rootfs_hash - .context("Rootfs hash not found in image info")?; - let vm_config = serde_json::json!({ - "rootfs_hash": rootfs_hash, - "kms_url": cfg.cvm.kms_url, - "tproxy_url": cfg.cvm.tproxy_url, - "docker_registry": cfg.cvm.docker_registry, - "host_api_url": format!("vsock://2:{}/api", cfg.host_api.port), - }); - let vm_config_str = - serde_json::to_string(&vm_config).context("Failed to serialize vm config")?; - fs::write(shared_dir.join("config.json"), vm_config_str) - .context("Failed to write vm config")?; - let app_id = req.app_id.clone().unwrap_or_default(); - if !app_id.is_empty() { - let instance_info = serde_json::json!({ - "app_id": app_id, - }); - fs::write( - shared_dir.join(".instance_info"), - serde_json::to_string(&instance_info)?, - ) - .context("Failed to write vm config")?; - } - - Ok(work_dir) - } - - pub(crate) fn kms_client(&self) -> Result> { - if self.config.kms_url.is_empty() { - bail!("KMS is not configured"); - } - let url = format!("{}/prpc", self.config.kms_url); - let prpc_client = RaClient::new(url, true); - Ok(KmsClient::new(prpc_client)) - } - - pub(crate) fn tappd_client(&self, id: &str) -> Result { - let cid = self.lock().get(id).context("vm not found")?.config.cid; - Ok(guest_api::client::new_client(format!( - "vsock://{cid}:8000/api" - ))) - } -} - fn app_id_of(compose_file: &str) -> String { fn truncate40(s: &str) -> &str { if s.len() > 40 { @@ -176,27 +86,26 @@ impl TeepodRpc for RpcHandler { None => app_id_of(&request.compose_file), }; let id = uuid::Uuid::new_v4().to_string(); - let work_dir = self.prepare_work_dir(&id, &request)?; let now = SystemTime::now() .duration_since(UNIX_EPOCH) .unwrap_or_default() .as_millis() as u64; let manifest = Manifest::builder() .id(id.clone()) - .name(request.name) + .name(request.name.clone()) .app_id(app_id.clone()) - .image(request.image) + .image(request.image.clone()) .vcpu(request.vcpu) .memory(request.memory) .disk_size(request.disk_size) .port_map(port_map) .created_at_ms(now) .build(); - - let vm_work_dir = VmWorkDir::new(&work_dir); + let vm_work_dir = self.app.work_dir(&id); vm_work_dir .put_manifest(&manifest) .context("Failed to write manifest")?; + let work_dir = self.prepare_work_dir(&id, &request)?; if let Err(err) = vm_work_dir.set_started(true) { warn!("Failed to set started: {}", err); } @@ -329,12 +238,13 @@ impl TeepodRpc for RpcHandler { #[tracing::instrument(skip(self, request), fields(id = request.id))] async fn resize_vm(self, request: ResizeVmRequest) -> Result<()> { + info!("Resizing VM: {:?}", request); let vm = self .app .vm_info(&request.id) .await? .context("vm not found")?; - if vm.status != "stopped" { + if !["stopped", "exited"].contains(&vm.status.as_str()) { return Err(anyhow!( "vm should be stopped before resize: {}", request.id @@ -349,6 +259,9 @@ impl TeepodRpc for RpcHandler { if let Some(memory) = request.memory { manifest.memory = memory; } + if let Some(image) = request.image { + manifest.image = image; + } if let Some(disk_size) = request.disk_size { let max_disk_size = self.app.config.cvm.max_disk_size; if disk_size > max_disk_size {