Skip to content

Commit

Permalink
Merge pull request #67 from Dstack-TEE/up-img
Browse files Browse the repository at this point in the history
Enable base image upgrades for existing instances
  • Loading branch information
kvinwang authored Dec 18, 2024
2 parents 51f95ee + 3201723 commit c16564b
Show file tree
Hide file tree
Showing 6 changed files with 196 additions and 191 deletions.
66 changes: 35 additions & 31 deletions tdxctl/src/fde_setup.rs
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,12 @@ impl SetupFdeArgs {
Ok(())
}

fn mount_rootfs(&self, host_shared: &HostShared, disk_crypt_key: &str) -> Result<()> {
async fn mount_rootfs(
&self,
host_shared: &HostShared,
disk_crypt_key: &str,
nc: &NotifyClient,
) -> Result<()> {
let rootfs_mountpoint = self.rootfs_dir.display().to_string();
if !self.rootfs_encryption {
warn!("Rootfs encryption is disabled, skipping disk encryption");
Expand All @@ -289,21 +294,13 @@ impl SetupFdeArgs {
Self::mount_e2fs("/dev/mapper/rootfs_crypt", &rootfs_mountpoint)?;

let hash_file = self.rootfs_dir.join(".rootfs_hash");
let existing_rootfs_hash = match fs::read(&hash_file) {
Ok(rootfs_hash) => rootfs_hash,
Err(_) => {
// Old image touches .bootstraped instead of .rootfs_hash
if !self.rootfs_dir.join(".bootstraped").exists() {
bail!("Rootfs is not bootstrapped");
}
Default::default()
}
};

let existing_rootfs_hash = fs::read(&hash_file).unwrap_or_default();
if existing_rootfs_hash != host_shared.vm_config.rootfs_hash {
let todo = "do upgrade";
info!("Rootfs hash changed, upgrading the rootfs");
fs::remove_file(&hash_file).context("Failed to remove old rootfs hash file")?;
bail!("Rootfs hash mismatch");
nc.notify_q("boot.progress", "upgrading rootfs").await;
self.extract_rootfs(&host_shared.vm_config.rootfs_hash)
.await?;
}
Ok(())
}
Expand Down Expand Up @@ -344,17 +341,14 @@ impl SetupFdeArgs {
Ok(())
}

fn bootstrap_rootfs(
async fn bootstrap_rootfs(
&self,
host_shared: &HostShared,
disk_crypt_key: &str,
instance_info: &InstanceInfo,
) -> Result<InstanceInfo> {
nc: &NotifyClient,
) -> Result<()> {
info!("Setting up disk encryption");
fs::create_dir_all(&self.root_cdrom_mnt)
.context("Failed to create rootfs cdrom mount point")?;
mount_cdrom(&self.root_cdrom, &self.root_cdrom_mnt.display().to_string())
.context("Failed to mount rootfs cdrom")?;
info!("Formatting rootfs");
let rootfs_dev = if self.rootfs_encryption {
self.luks_setup(disk_crypt_key)?;
Expand All @@ -370,9 +364,21 @@ impl SetupFdeArgs {
&[rootfs_dev, &self.rootfs_dir.display().to_string()],
)
.context("Failed to mount rootfs")?;
self.extract_rootfs(&host_shared.vm_config.rootfs_hash)
.await?;
let mut instance_info = instance_info.clone();
instance_info.bootstrapped = true;
nc.notify_q("instance.info", &serde_json::to_string(&instance_info)?)
.await;
Ok(())
}

async fn extract_rootfs(&self, expected_rootfs_hash: &[u8]) -> Result<()> {
info!("Extracting rootfs");

fs::create_dir_all(&self.root_cdrom_mnt)
.context("Failed to create rootfs cdrom mount point")?;
mount_cdrom(&self.root_cdrom, &self.root_cdrom_mnt.display().to_string())
.context("Failed to mount rootfs cdrom")?;
let rootfs_cpio = self.root_cdrom_mnt.join("rootfs.cpio");
if !rootfs_cpio.exists() {
bail!("Rootfs cpio file not found on cdrom");
Expand All @@ -381,7 +387,7 @@ impl SetupFdeArgs {
fs::File::open(rootfs_cpio).context("Failed to open rootfs cpio file")?;
let mut hashing_rootfs_cpio = HashingFile::<sha2::Sha256, _>::new(rootfs_cpio_file);
let mut status = Command::new("/usr/bin/env")
.args(["cpio", "-i"])
.args(["cpio", "-i", "-d", "-u"])
.current_dir(&self.rootfs_dir)
.stdin(Stdio::piped())
.spawn()
Expand All @@ -408,16 +414,16 @@ impl SetupFdeArgs {
bail!("Failed to extract rootfs, cpio returned {status:?}");
}
let rootfs_hash = hashing_rootfs_cpio.finalize();
if rootfs_hash[..] != host_shared.vm_config.rootfs_hash[..] {
if &rootfs_hash[..] != expected_rootfs_hash {
bail!("Rootfs hash mismatch");
}
info!("Rootfs hash is valid");
let mut instance_info = instance_info.clone();
instance_info.bootstrapped = true;
fs::write(self.rootfs_dir.join(".rootfs_hash"), rootfs_hash)
.context("Failed to write rootfs hash")?;
umount(&self.root_cdrom_mnt.display().to_string())
.context("Failed to unmount rootfs cdrom")?;
info!("Rootfs is ready");
Ok(instance_info)
Ok(())
}

fn write_decrypted_env(&self, decrypted_env: &BTreeMap<String, String>) -> Result<()> {
Expand Down Expand Up @@ -489,13 +495,11 @@ impl SetupFdeArgs {
let disk_crypt_key = format!("{}\n", app_keys.disk_crypt_key);
if instance_info.bootstrapped {
nc.notify_q("boot.progress", "mounting rootfs").await;
self.mount_rootfs(host_shared, &disk_crypt_key)?;
self.mount_rootfs(host_shared, &disk_crypt_key, nc).await?;
} else {
nc.notify_q("boot.progress", "initializing rootfs").await;
let instance_info =
self.bootstrap_rootfs(host_shared, &disk_crypt_key, &instance_info)?;
nc.notify_q("instance.info", &serde_json::to_string(&instance_info)?)
.await;
self.bootstrap_rootfs(host_shared, &disk_crypt_key, &instance_info, nc)
.await?;
}
self.write_decrypted_env(&decrypted_env)?;
nc.notify_q("boot.progress", "rootfs ready").await;
Expand Down
7 changes: 4 additions & 3 deletions teepod/rpc/proto/teepod_rpc.proto
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,8 @@ message ResizeVmRequest {
optional uint32 memory = 3;
// Disk size in GB
optional uint32 disk_size = 4;
// Image name
optional string image = 5;
}

// Service definition for Teepod
Expand All @@ -138,6 +140,8 @@ service Teepod {
rpc UpgradeApp(UpgradeAppRequest) returns (Id);
// Shutdown a VM
rpc ShutdownVm(Id) returns (google.protobuf.Empty);
// RPC to resize a VM
rpc ResizeVm(ResizeVmRequest) returns (google.protobuf.Empty);

// RPC to list all VMs
rpc Status(google.protobuf.Empty) returns (StatusResponse);
Expand All @@ -149,7 +153,4 @@ service Teepod {

// Get VM info by ID
rpc GetInfo(Id) returns (GetInfoResponse);

// RPC to resize a VM
rpc ResizeVm(ResizeVmRequest) returns (google.protobuf.Empty);
}
159 changes: 116 additions & 43 deletions teepod/src/app.rs
Original file line number Diff line number Diff line change
@@ -1,33 +1,19 @@
//! App related code
//!
//! Directory structure:
//! ```text
//! .teepod/
//! ├── image
//! │ └── ubuntu-24.04
//! │ ├── hda.img
//! │ ├── info.json
//! │ ├── initrd.img
//! │ ├── kernel
//! │ └── rootfs.iso
//! └── vm
//! └── e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
//! └── shared
//! └── app-compose.json
//! ```
use crate::config::{Config, Protocol};

use anyhow::{bail, Context, Result};
use bon::Builder;
use fs_err as fs;
use guest_api::client::DefaultClient as GuestClient;
use id_pool::IdPool;
use kms_rpc::kms_client::KmsClient;
use ra_rpc::client::RaClient;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::net::IpAddr;
use std::path::{Path, PathBuf};
use std::sync::{Arc, Mutex, MutexGuard};
use supervisor_client::SupervisorClient;
use teepod_rpc as pb;
use teepod_rpc::{self as pb, VmConfiguration};
use tracing::{error, info};

pub use image::{Image, ImageInfo};
Expand Down Expand Up @@ -102,40 +88,40 @@ impl App {
let todo = "sanitize the image name";
let image_path = self.config.image_path.join(&manifest.image);
let image = Image::load(&image_path).context("Failed to load image")?;

let cid = cids_assigned.get(&manifest.id).cloned();
let cid = match cid {
Some(cid) => cid,
None => self
.lock()
.cid_pool
.allocate()
.context("CID pool exhausted")?,
};

let vm_config = VmConfig {
manifest,
image,
cid,
networking: self.config.networking.clone(),
workdir: vm_work_dir.path().to_path_buf(),
let vm_id = manifest.id.clone();
{
let mut teapot = self.lock();
let cid = teapot
.get(&vm_id)
.map(|vm| vm.config.cid)
.or_else(|| cids_assigned.get(&vm_id).cloned())
.or_else(|| teapot.cid_pool.allocate())
.context("CID pool exhausted")?;
let vm_config = VmConfig {
manifest,
image,
cid,
networking: self.config.networking.clone(),
workdir: vm_work_dir.path().to_path_buf(),
};
if vm_config.manifest.disk_size > self.config.cvm.max_disk_size {
bail!(
"disk size too large, max size is {}",
self.config.cvm.max_disk_size
);
}
teapot.add(VmState::new(vm_config));
};
if vm_config.manifest.disk_size > self.config.cvm.max_disk_size {
bail!(
"disk size too large, max size is {}",
self.config.cvm.max_disk_size
);
}
let vm_id = vm_config.manifest.id.clone();
self.lock().add(VmState::new(vm_config));
let started = vm_work_dir.started().context("Failed to read VM state")?;
if started {
self.start_vm(&vm_id).await?;
}

Ok(())
}

pub async fn start_vm(&self, id: &str) -> Result<()> {
self.sync_dynamic_config(id)?;
let is_running = self
.supervisor
.info(id)
Expand Down Expand Up @@ -316,6 +302,93 @@ impl App {
}
Ok(())
}

pub(crate) fn compose_file_path(&self, id: &str) -> PathBuf {
self.shared_dir(id).join("app-compose.json")
}

pub(crate) fn encrypted_env_path(&self, id: &str) -> PathBuf {
self.shared_dir(id).join("encrypted-env")
}

pub(crate) fn shared_dir(&self, id: &str) -> PathBuf {
self.config.run_path.join(id).join("shared")
}

pub(crate) fn prepare_work_dir(&self, id: &str, req: &VmConfiguration) -> Result<VmWorkDir> {
let work_dir = self.work_dir(id);
if work_dir.exists() {
bail!("The instance is already exists at {}", work_dir.display());
}
let shared_dir = work_dir.join("shared");
fs::create_dir_all(&shared_dir).context("Failed to create shared directory")?;
fs::write(shared_dir.join("app-compose.json"), &req.compose_file)
.context("Failed to write compose file")?;
if !req.encrypted_env.is_empty() {
fs::write(shared_dir.join("encrypted-env"), &req.encrypted_env)
.context("Failed to write encrypted env")?;
}
let app_id = req.app_id.clone().unwrap_or_default();
if !app_id.is_empty() {
let instance_info = serde_json::json!({
"app_id": app_id,
});
fs::write(
shared_dir.join(".instance_info"),
serde_json::to_string(&instance_info)?,
)
.context("Failed to write vm config")?;
}
Ok(work_dir)
}

pub(crate) fn sync_dynamic_config(&self, id: &str) -> Result<()> {
let work_dir = self.work_dir(id);
let shared_dir = self.shared_dir(id);
let manifest = work_dir.manifest().context("Failed to read manifest")?;
let certs_dir = shared_dir.join("certs");
fs::create_dir_all(&certs_dir).context("Failed to create certs directory")?;
let cfg = &self.config;
let image_path = cfg.image_path.join(&manifest.image);
let image_info = ImageInfo::load(image_path.join("metadata.json"))
.context("Failed to load image info")?;
let rootfs_hash = image_info
.rootfs_hash
.context("Rootfs hash not found in image info")?;
let vm_config = serde_json::json!({
"rootfs_hash": rootfs_hash,
"kms_url": cfg.cvm.kms_url,
"tproxy_url": cfg.cvm.tproxy_url,
"docker_registry": cfg.cvm.docker_registry,
"host_api_url": format!("vsock://2:{}/api", cfg.host_api.port),
});
let vm_config_str =
serde_json::to_string(&vm_config).context("Failed to serialize vm config")?;
fs::write(shared_dir.join("config.json"), vm_config_str)
.context("Failed to write vm config")?;
fs::copy(&cfg.cvm.ca_cert, certs_dir.join("ca.cert")).context("Failed to copy ca cert")?;
fs::copy(&cfg.cvm.tmp_ca_cert, certs_dir.join("tmp-ca.cert"))
.context("Failed to copy tmp ca cert")?;
fs::copy(&cfg.cvm.tmp_ca_key, certs_dir.join("tmp-ca.key"))
.context("Failed to copy tmp ca key")?;
Ok(())
}

pub(crate) fn kms_client(&self) -> Result<KmsClient<RaClient>> {
if self.config.kms_url.is_empty() {
bail!("KMS is not configured");
}
let url = format!("{}/prpc", self.config.kms_url);
let prpc_client = RaClient::new(url, true);
Ok(KmsClient::new(prpc_client))
}

pub(crate) fn tappd_client(&self, id: &str) -> Result<GuestClient> {
let cid = self.lock().get(id).context("vm not found")?.config.cid;
Ok(guest_api::client::new_client(format!(
"vsock://{cid}:8000/api"
)))
}
}

#[derive(Clone)]
Expand Down
1 change: 1 addition & 0 deletions teepod/src/app/qemu.rs
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,7 @@ impl VmWorkDir {
}

pub fn put_manifest(&self, manifest: &Manifest) -> Result<()> {
fs::create_dir_all(&self.workdir).context("Failed to create workdir")?;
let manifest_path = self.manifest_path();
fs::write(manifest_path, serde_json::to_string(manifest)?)
.context("Failed to write manifest")
Expand Down
Loading

0 comments on commit c16564b

Please sign in to comment.