Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable base image upgrades for existing instances #67

Merged
merged 3 commits into from
Dec 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 35 additions & 31 deletions tdxctl/src/fde_setup.rs
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,12 @@ impl SetupFdeArgs {
Ok(())
}

fn mount_rootfs(&self, host_shared: &HostShared, disk_crypt_key: &str) -> Result<()> {
async fn mount_rootfs(
&self,
host_shared: &HostShared,
disk_crypt_key: &str,
nc: &NotifyClient,
) -> Result<()> {
let rootfs_mountpoint = self.rootfs_dir.display().to_string();
if !self.rootfs_encryption {
warn!("Rootfs encryption is disabled, skipping disk encryption");
Expand All @@ -289,21 +294,13 @@ impl SetupFdeArgs {
Self::mount_e2fs("/dev/mapper/rootfs_crypt", &rootfs_mountpoint)?;

let hash_file = self.rootfs_dir.join(".rootfs_hash");
let existing_rootfs_hash = match fs::read(&hash_file) {
Ok(rootfs_hash) => rootfs_hash,
Err(_) => {
// Old image touches .bootstraped instead of .rootfs_hash
if !self.rootfs_dir.join(".bootstraped").exists() {
bail!("Rootfs is not bootstrapped");
}
Default::default()
}
};

let existing_rootfs_hash = fs::read(&hash_file).unwrap_or_default();
if existing_rootfs_hash != host_shared.vm_config.rootfs_hash {
let todo = "do upgrade";
info!("Rootfs hash changed, upgrading the rootfs");
fs::remove_file(&hash_file).context("Failed to remove old rootfs hash file")?;
bail!("Rootfs hash mismatch");
nc.notify_q("boot.progress", "upgrading rootfs").await;
self.extract_rootfs(&host_shared.vm_config.rootfs_hash)
.await?;
}
Ok(())
}
Expand Down Expand Up @@ -344,17 +341,14 @@ impl SetupFdeArgs {
Ok(())
}

fn bootstrap_rootfs(
async fn bootstrap_rootfs(
&self,
host_shared: &HostShared,
disk_crypt_key: &str,
instance_info: &InstanceInfo,
) -> Result<InstanceInfo> {
nc: &NotifyClient,
) -> Result<()> {
info!("Setting up disk encryption");
fs::create_dir_all(&self.root_cdrom_mnt)
.context("Failed to create rootfs cdrom mount point")?;
mount_cdrom(&self.root_cdrom, &self.root_cdrom_mnt.display().to_string())
.context("Failed to mount rootfs cdrom")?;
info!("Formatting rootfs");
let rootfs_dev = if self.rootfs_encryption {
self.luks_setup(disk_crypt_key)?;
Expand All @@ -370,9 +364,21 @@ impl SetupFdeArgs {
&[rootfs_dev, &self.rootfs_dir.display().to_string()],
)
.context("Failed to mount rootfs")?;
self.extract_rootfs(&host_shared.vm_config.rootfs_hash)
.await?;
let mut instance_info = instance_info.clone();
instance_info.bootstrapped = true;
nc.notify_q("instance.info", &serde_json::to_string(&instance_info)?)
.await;
Ok(())
}

async fn extract_rootfs(&self, expected_rootfs_hash: &[u8]) -> Result<()> {
info!("Extracting rootfs");

fs::create_dir_all(&self.root_cdrom_mnt)
.context("Failed to create rootfs cdrom mount point")?;
mount_cdrom(&self.root_cdrom, &self.root_cdrom_mnt.display().to_string())
.context("Failed to mount rootfs cdrom")?;
let rootfs_cpio = self.root_cdrom_mnt.join("rootfs.cpio");
if !rootfs_cpio.exists() {
bail!("Rootfs cpio file not found on cdrom");
Expand All @@ -381,7 +387,7 @@ impl SetupFdeArgs {
fs::File::open(rootfs_cpio).context("Failed to open rootfs cpio file")?;
let mut hashing_rootfs_cpio = HashingFile::<sha2::Sha256, _>::new(rootfs_cpio_file);
let mut status = Command::new("/usr/bin/env")
.args(["cpio", "-i"])
.args(["cpio", "-i", "-d", "-u"])
.current_dir(&self.rootfs_dir)
.stdin(Stdio::piped())
.spawn()
Expand All @@ -408,16 +414,16 @@ impl SetupFdeArgs {
bail!("Failed to extract rootfs, cpio returned {status:?}");
}
let rootfs_hash = hashing_rootfs_cpio.finalize();
if rootfs_hash[..] != host_shared.vm_config.rootfs_hash[..] {
if &rootfs_hash[..] != expected_rootfs_hash {
bail!("Rootfs hash mismatch");
}
info!("Rootfs hash is valid");
let mut instance_info = instance_info.clone();
instance_info.bootstrapped = true;
fs::write(self.rootfs_dir.join(".rootfs_hash"), rootfs_hash)
.context("Failed to write rootfs hash")?;
umount(&self.root_cdrom_mnt.display().to_string())
.context("Failed to unmount rootfs cdrom")?;
info!("Rootfs is ready");
Ok(instance_info)
Ok(())
}

fn write_decrypted_env(&self, decrypted_env: &BTreeMap<String, String>) -> Result<()> {
Expand Down Expand Up @@ -489,13 +495,11 @@ impl SetupFdeArgs {
let disk_crypt_key = format!("{}\n", app_keys.disk_crypt_key);
if instance_info.bootstrapped {
nc.notify_q("boot.progress", "mounting rootfs").await;
self.mount_rootfs(host_shared, &disk_crypt_key)?;
self.mount_rootfs(host_shared, &disk_crypt_key, nc).await?;
} else {
nc.notify_q("boot.progress", "initializing rootfs").await;
let instance_info =
self.bootstrap_rootfs(host_shared, &disk_crypt_key, &instance_info)?;
nc.notify_q("instance.info", &serde_json::to_string(&instance_info)?)
.await;
self.bootstrap_rootfs(host_shared, &disk_crypt_key, &instance_info, nc)
.await?;
}
self.write_decrypted_env(&decrypted_env)?;
nc.notify_q("boot.progress", "rootfs ready").await;
Expand Down
7 changes: 4 additions & 3 deletions teepod/rpc/proto/teepod_rpc.proto
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,8 @@ message ResizeVmRequest {
optional uint32 memory = 3;
// Disk size in GB
optional uint32 disk_size = 4;
// Image name
optional string image = 5;
}

// Service definition for Teepod
Expand All @@ -138,6 +140,8 @@ service Teepod {
rpc UpgradeApp(UpgradeAppRequest) returns (Id);
// Shutdown a VM
rpc ShutdownVm(Id) returns (google.protobuf.Empty);
// RPC to resize a VM
rpc ResizeVm(ResizeVmRequest) returns (google.protobuf.Empty);

// RPC to list all VMs
rpc Status(google.protobuf.Empty) returns (StatusResponse);
Expand All @@ -149,7 +153,4 @@ service Teepod {

// Get VM info by ID
rpc GetInfo(Id) returns (GetInfoResponse);

// RPC to resize a VM
rpc ResizeVm(ResizeVmRequest) returns (google.protobuf.Empty);
}
159 changes: 116 additions & 43 deletions teepod/src/app.rs
Original file line number Diff line number Diff line change
@@ -1,33 +1,19 @@
//! App related code
//!
//! Directory structure:
//! ```text
//! .teepod/
//! ├── image
//! │ └── ubuntu-24.04
//! │ ├── hda.img
//! │ ├── info.json
//! │ ├── initrd.img
//! │ ├── kernel
//! │ └── rootfs.iso
//! └── vm
//! └── e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
//! └── shared
//! └── app-compose.json
//! ```
use crate::config::{Config, Protocol};

use anyhow::{bail, Context, Result};
use bon::Builder;
use fs_err as fs;
use guest_api::client::DefaultClient as GuestClient;
use id_pool::IdPool;
use kms_rpc::kms_client::KmsClient;
use ra_rpc::client::RaClient;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::net::IpAddr;
use std::path::{Path, PathBuf};
use std::sync::{Arc, Mutex, MutexGuard};
use supervisor_client::SupervisorClient;
use teepod_rpc as pb;
use teepod_rpc::{self as pb, VmConfiguration};
use tracing::{error, info};

pub use image::{Image, ImageInfo};
Expand Down Expand Up @@ -102,40 +88,40 @@ impl App {
let todo = "sanitize the image name";
let image_path = self.config.image_path.join(&manifest.image);
let image = Image::load(&image_path).context("Failed to load image")?;

let cid = cids_assigned.get(&manifest.id).cloned();
let cid = match cid {
Some(cid) => cid,
None => self
.lock()
.cid_pool
.allocate()
.context("CID pool exhausted")?,
};

let vm_config = VmConfig {
manifest,
image,
cid,
networking: self.config.networking.clone(),
workdir: vm_work_dir.path().to_path_buf(),
let vm_id = manifest.id.clone();
{
let mut teapot = self.lock();
let cid = teapot
.get(&vm_id)
.map(|vm| vm.config.cid)
.or_else(|| cids_assigned.get(&vm_id).cloned())
.or_else(|| teapot.cid_pool.allocate())
.context("CID pool exhausted")?;
let vm_config = VmConfig {
manifest,
image,
cid,
networking: self.config.networking.clone(),
workdir: vm_work_dir.path().to_path_buf(),
};
if vm_config.manifest.disk_size > self.config.cvm.max_disk_size {
bail!(
"disk size too large, max size is {}",
self.config.cvm.max_disk_size
);
}
teapot.add(VmState::new(vm_config));
};
if vm_config.manifest.disk_size > self.config.cvm.max_disk_size {
bail!(
"disk size too large, max size is {}",
self.config.cvm.max_disk_size
);
}
let vm_id = vm_config.manifest.id.clone();
self.lock().add(VmState::new(vm_config));
let started = vm_work_dir.started().context("Failed to read VM state")?;
if started {
self.start_vm(&vm_id).await?;
}

Ok(())
}

pub async fn start_vm(&self, id: &str) -> Result<()> {
self.sync_dynamic_config(id)?;
let is_running = self
.supervisor
.info(id)
Expand Down Expand Up @@ -316,6 +302,93 @@ impl App {
}
Ok(())
}

pub(crate) fn compose_file_path(&self, id: &str) -> PathBuf {
self.shared_dir(id).join("app-compose.json")
}

pub(crate) fn encrypted_env_path(&self, id: &str) -> PathBuf {
self.shared_dir(id).join("encrypted-env")
}

pub(crate) fn shared_dir(&self, id: &str) -> PathBuf {
self.config.run_path.join(id).join("shared")
}

pub(crate) fn prepare_work_dir(&self, id: &str, req: &VmConfiguration) -> Result<VmWorkDir> {
let work_dir = self.work_dir(id);
if work_dir.exists() {
bail!("The instance is already exists at {}", work_dir.display());
}
let shared_dir = work_dir.join("shared");
fs::create_dir_all(&shared_dir).context("Failed to create shared directory")?;
fs::write(shared_dir.join("app-compose.json"), &req.compose_file)
.context("Failed to write compose file")?;
if !req.encrypted_env.is_empty() {
fs::write(shared_dir.join("encrypted-env"), &req.encrypted_env)
.context("Failed to write encrypted env")?;
}
let app_id = req.app_id.clone().unwrap_or_default();
if !app_id.is_empty() {
let instance_info = serde_json::json!({
"app_id": app_id,
});
fs::write(
shared_dir.join(".instance_info"),
serde_json::to_string(&instance_info)?,
)
.context("Failed to write vm config")?;
}
Ok(work_dir)
}

pub(crate) fn sync_dynamic_config(&self, id: &str) -> Result<()> {
let work_dir = self.work_dir(id);
let shared_dir = self.shared_dir(id);
let manifest = work_dir.manifest().context("Failed to read manifest")?;
let certs_dir = shared_dir.join("certs");
fs::create_dir_all(&certs_dir).context("Failed to create certs directory")?;
let cfg = &self.config;
let image_path = cfg.image_path.join(&manifest.image);
let image_info = ImageInfo::load(image_path.join("metadata.json"))
.context("Failed to load image info")?;
let rootfs_hash = image_info
.rootfs_hash
.context("Rootfs hash not found in image info")?;
let vm_config = serde_json::json!({
"rootfs_hash": rootfs_hash,
"kms_url": cfg.cvm.kms_url,
"tproxy_url": cfg.cvm.tproxy_url,
"docker_registry": cfg.cvm.docker_registry,
"host_api_url": format!("vsock://2:{}/api", cfg.host_api.port),
});
let vm_config_str =
serde_json::to_string(&vm_config).context("Failed to serialize vm config")?;
fs::write(shared_dir.join("config.json"), vm_config_str)
.context("Failed to write vm config")?;
fs::copy(&cfg.cvm.ca_cert, certs_dir.join("ca.cert")).context("Failed to copy ca cert")?;
fs::copy(&cfg.cvm.tmp_ca_cert, certs_dir.join("tmp-ca.cert"))
.context("Failed to copy tmp ca cert")?;
fs::copy(&cfg.cvm.tmp_ca_key, certs_dir.join("tmp-ca.key"))
.context("Failed to copy tmp ca key")?;
Ok(())
}

pub(crate) fn kms_client(&self) -> Result<KmsClient<RaClient>> {
if self.config.kms_url.is_empty() {
bail!("KMS is not configured");
}
let url = format!("{}/prpc", self.config.kms_url);
let prpc_client = RaClient::new(url, true);
Ok(KmsClient::new(prpc_client))
}

pub(crate) fn tappd_client(&self, id: &str) -> Result<GuestClient> {
let cid = self.lock().get(id).context("vm not found")?.config.cid;
Ok(guest_api::client::new_client(format!(
"vsock://{cid}:8000/api"
)))
}
}

#[derive(Clone)]
Expand Down
1 change: 1 addition & 0 deletions teepod/src/app/qemu.rs
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,7 @@ impl VmWorkDir {
}

pub fn put_manifest(&self, manifest: &Manifest) -> Result<()> {
fs::create_dir_all(&self.workdir).context("Failed to create workdir")?;
let manifest_path = self.manifest_path();
fs::write(manifest_path, serde_json::to_string(manifest)?)
.context("Failed to write manifest")
Expand Down
Loading
Loading