Skip to content
This repository has been archived by the owner on Nov 7, 2024. It is now read-only.

lib/tar: Pre-filter tar archive in Rust #112

Merged
merged 1 commit into from
Oct 5, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 30 additions & 1 deletion lib/src/async_util.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use std::io::prelude::*;
use std::pin::Pin;
use tokio::io::{AsyncRead, AsyncReadExt};
use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt};

/// A [`std::io::Read`] implementation backed by an asynchronous source.
pub(crate) struct ReadBridge<T> {
Expand All @@ -26,6 +26,35 @@ impl<T: AsyncRead> ReadBridge<T> {
}
}

/// A [`std::io::Write`] implementation backed by an asynchronous source.
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BTW I submitted tokio-rs/tokio#4146

pub(crate) struct WriteBridge<T> {
w: Pin<Box<T>>,
rt: tokio::runtime::Handle,
}

impl<T: AsyncWrite> Write for WriteBridge<T> {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
let w = &mut self.w;
self.rt.block_on(async { w.write(buf).await })
}

fn flush(&mut self) -> std::io::Result<()> {
let w = &mut self.w;
self.rt.block_on(async { w.flush().await })
}
}

impl<T: AsyncWrite> WriteBridge<T> {
/// Create a [`std::io::Write`] implementation backed by an asynchronous source.
///
/// This is useful with e.g. [`tokio::task::spawn_blocking`].
pub(crate) fn new(reader: T) -> Self {
let w = Box::pin(reader);
let rt = tokio::runtime::Handle::current();
WriteBridge { w, rt }
}
}

#[cfg(test)]
mod test {
use std::convert::TryInto;
Expand Down
237 changes: 214 additions & 23 deletions lib/src/tar/write.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,33 +7,46 @@
//! In the future, this may also evolve into parsing the tar
//! stream in Rust, not in C.

use crate::async_util::{ReadBridge, WriteBridge};
use crate::cmdext::CommandRedirectionExt;
use crate::Result;
use anyhow::{anyhow, Context};
use camino::{Utf8Component, Utf8Path, Utf8PathBuf};
use ostree::gio;
use ostree::prelude::FileExt;
use std::collections::BTreeMap;
use std::convert::TryInto;
use std::io::{BufWriter, Write};
use std::os::unix::prelude::AsRawFd;
use std::path::Path;
use tokio::io::AsyncReadExt;
use std::process::Stdio;
use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite};
use tracing::instrument;

/// Configuration for tar layer commits.
#[derive(Debug, Default)]
pub struct WriteTarOptions<'a> {
pub struct WriteTarOptions {
/// Base ostree commit hash
pub base: Option<&'a str>,
pub base: Option<String>,
/// Enable SELinux labeling from the base commit
/// Requires the `base` option.
pub selinux: bool,
}

struct TempSePolicy {
tempdir: tempfile::TempDir,
/// The result of writing a tar stream.
///
/// This includes some basic data on the number of files that were filtered
/// out because they were not in `/usr`.
pub struct WriteTarResult {
/// The resulting OSTree commit SHA-256.
pub commit: String,
/// Number of paths in a prefix (e.g. `/var` or `/boot`) which were discarded.
pub filtered: BTreeMap<String, u32>,
}

// Copy of logic from https://github.com/ostreedev/ostree/pull/2447
// to avoid waiting for backport + releases
fn sepolicy_from_base(repo: &ostree::Repo, base: &str) -> Result<TempSePolicy> {
fn sepolicy_from_base(repo: &ostree::Repo, base: &str) -> Result<tempfile::TempDir> {
let cancellable = gio::NONE_CANCELLABLE;
let policypath = "usr/etc/selinux";
let tempdir = tempfile::tempdir()?;
Expand All @@ -49,23 +62,132 @@ fn sepolicy_from_base(repo: &ostree::Repo, base: &str) -> Result<TempSePolicy> {
};
repo.checkout_at(Some(&opts), ostree::AT_FDCWD, policydest, base, cancellable)?;
}
Ok(TempSePolicy { tempdir: tempdir })
Ok(tempdir)
}

#[derive(Debug)]
enum NormalizedPathResult<'a> {
Filtered(&'a str),
Normal(Utf8PathBuf),
}

fn normalize_validate_path<'a>(path: &'a Utf8Path) -> Result<NormalizedPathResult<'a>> {
// This converts e.g. `foo//bar/./baz` into `foo/bar/baz`.
let mut components = path
.components()
.map(|part| {
match part {
// Convert absolute paths to relative
camino::Utf8Component::RootDir => Ok(camino::Utf8Component::CurDir),
// Allow ./ and regular parts
camino::Utf8Component::Normal(_) | camino::Utf8Component::CurDir => Ok(part),
// Barf on Windows paths as well as Unix path uplinks `..`
_ => Err(anyhow!("Invalid path: {}", path)),
}
})
.peekable();
let mut ret = Utf8PathBuf::new();
// Insert a leading `./` if not present
if let Some(Ok(camino::Utf8Component::Normal(_))) = components.peek() {
ret.push(camino::Utf8Component::CurDir);
}
let mut found_first = false;
for part in components {
let part = part?;
if !found_first {
if let Utf8Component::Normal(part) = part {
found_first = true;
// Now, rewrite /etc -> /usr/etc, and discard everything not in /usr.
match part {
"usr" => ret.push(part),
"etc" => {
ret.push("usr/etc");
}
o => return Ok(NormalizedPathResult::Filtered(o)),
}
} else {
ret.push(part);
}
} else {
ret.push(part);
}
}

Ok(NormalizedPathResult::Normal(ret))
}

/// Perform various filtering on imported tar archives.
/// - Move /etc to /usr/etc
/// - Entirely drop files not in /usr
///
/// This also acts as a Rust "pre-parser" of the tar archive, hopefully
/// catching anything corrupt that might be exploitable from the C libarchive side.
/// Remember that we're parsing this while we're downloading it, and in order
/// to verify integrity we rely on the total sha256 of the blob, so all content
/// written before then must be considered untrusted.
fn filter_tar(src: impl std::io::Read, dest: impl std::io::Write) -> Result<BTreeMap<String, u32>> {
let src = std::io::BufReader::new(src);
let mut src = tar::Archive::new(src);
let dest = BufWriter::new(dest);
let mut dest = tar::Builder::new(dest);
let mut filtered = BTreeMap::new();

let ents = src.entries()?;
for entry in ents {
let entry = entry?;
let path = entry.path()?;
let path: &Utf8Path = (&*path).try_into()?;

let normalized = match normalize_validate_path(path)? {
NormalizedPathResult::Filtered(path) => {
if let Some(v) = filtered.get_mut(path) {
*v += 1;
} else {
filtered.insert(path.to_string(), 1);
}
continue;
}
NormalizedPathResult::Normal(path) => path,
};

let mut header = entry.header().clone();
dest.append_data(&mut header, normalized, entry)?;
}
dest.into_inner()?.flush()?;
Ok(filtered)
}

/// Asynchronous wrapper for filter_tar()
async fn filter_tar_async(
src: impl AsyncRead + Send + 'static,
mut dest: impl AsyncWrite + Send + Unpin,
) -> Result<BTreeMap<String, u32>> {
let (tx_buf, mut rx_buf) = tokio::io::duplex(8192);
let tar_transformer = tokio::task::spawn_blocking(move || -> Result<_> {
let src = ReadBridge::new(src);
let dest = WriteBridge::new(tx_buf);
filter_tar(src, dest)
});
let copier = tokio::io::copy(&mut rx_buf, &mut dest);
let (r, v) = tokio::join!(tar_transformer, copier);
let _v: u64 = v?;
Ok(r??)
}

/// Write the contents of a tarball as an ostree commit.
#[allow(unsafe_code)] // For raw fd bits
#[instrument(skip(repo, src))]
pub async fn write_tar(
repo: &ostree::Repo,
mut src: impl tokio::io::AsyncRead + Send + Unpin + 'static,
src: impl tokio::io::AsyncRead + Send + Unpin + 'static,
refname: &str,
options: Option<WriteTarOptions<'_>>,
) -> Result<String> {
use std::process::Stdio;
options: Option<WriteTarOptions>,
) -> Result<WriteTarResult> {
let repo = repo.clone();
let options = options.unwrap_or_default();
let sepolicy = if options.selinux {
if let Some(base) = options.base {
Some(sepolicy_from_base(repo, base).context("tar: Preparing sepolicy")?)
Some(sepolicy_from_base(&repo, &base).context("tar: Preparing sepolicy")?)
} else {
None
}
Expand All @@ -84,12 +206,11 @@ pub async fn write_tar(
c.arg("--repo=/proc/self/fd/3");
if let Some(sepolicy) = sepolicy.as_ref() {
c.arg("--selinux-policy");
c.arg(sepolicy.tempdir.path());
c.arg(sepolicy.path());
}
c.args(&[
"--no-bindings",
"--tar-autocreate-parents",
r#"--tar-pathname-filter=^etc(.*),usr/etc\1"#,
"--tree=tar=/proc/self/fd/0",
"--branch",
refname,
Expand All @@ -99,15 +220,11 @@ pub async fn write_tar(
c.kill_on_drop(true);
let mut r = c.spawn()?;
// Safety: We passed piped() for all of these
let mut child_stdin = r.stdin.take().unwrap();
let child_stdin = r.stdin.take().unwrap();
let mut child_stdout = r.stdout.take().unwrap();
let mut child_stderr = r.stderr.take().unwrap();
// Copy our input to child stdout
let input_copier = async move {
let _n = tokio::io::copy(&mut src, &mut child_stdin).await?;
drop(child_stdin);
Ok::<_, anyhow::Error>(())
};
// Copy the filtered tar stream to child stdin
let filtered_result = filter_tar_async(src, child_stdin);
// Gather stdout/stderr to buffers
let output_copier = async move {
let mut child_stdout_buf = String::new();
Expand All @@ -119,7 +236,8 @@ pub async fn write_tar(
Ok::<_, anyhow::Error>((child_stdout_buf, child_stderr_buf))
};

let (_, (child_stdout, child_stderr)) = tokio::try_join!(input_copier, output_copier)?;
let (filtered_result, (child_stdout, child_stderr)) =
tokio::try_join!(filtered_result, output_copier)?;
let status = r.wait().await?;
// Ensure this lasted until the process exited
drop(sepolicy);
Expand All @@ -132,5 +250,78 @@ pub async fn write_tar(
}
// TODO: trim string in place
let s = child_stdout.trim();
Ok(s.to_string())
Ok(WriteTarResult {
commit: s.to_string(),
filtered: filtered_result,
})
}

#[cfg(test)]
mod tests {
use super::*;
use std::io::Cursor;

#[test]
fn test_normalize_path() {
let valid = &[
("/usr/bin/blah", "./usr/bin/blah"),
("usr/bin/blah", "./usr/bin/blah"),
("usr///share/.//blah", "./usr/share/blah"),
("./", "."),
];
for &(k, v) in valid {
let r = normalize_validate_path(k.into()).unwrap();
match r {
NormalizedPathResult::Filtered(o) => {
panic!("Case {} should not be filtered as {}", k, o)
}
NormalizedPathResult::Normal(p) => {
assert_eq!(v, p.as_str());
}
}
}
let filtered = &[
("/boot/vmlinuz", "boot"),
("var/lib/blah", "var"),
("./var/lib/blah", "var"),
];
for &(k, v) in filtered {
match normalize_validate_path(k.into()).unwrap() {
NormalizedPathResult::Filtered(f) => {
assert_eq!(v, f);
}
NormalizedPathResult::Normal(_) => {
panic!("{} should be filtered", k)
}
}
}
let errs = &["usr/foo/../../bar"];
for &k in errs {
assert!(normalize_validate_path(k.into()).is_err());
}
}

#[tokio::test]
async fn tar_filter() -> Result<()> {
let tempd = tempfile::tempdir()?;
let rootfs = &tempd.path().join("rootfs");
std::fs::create_dir_all(rootfs.join("etc/systemd/system"))?;
std::fs::write(rootfs.join("etc/systemd/system/foo.service"), "fooservice")?;
std::fs::write(rootfs.join("blah"), "blah")?;
let rootfs_tar_path = &tempd.path().join("rootfs.tar");
let rootfs_tar = std::fs::File::create(rootfs_tar_path)?;
let mut rootfs_tar = tar::Builder::new(rootfs_tar);
rootfs_tar.append_dir_all(".", rootfs)?;
let _ = rootfs_tar.into_inner()?;
let mut dest = Vec::new();
let src = tokio::io::BufReader::new(tokio::fs::File::open(rootfs_tar_path).await?);
filter_tar_async(src, &mut dest).await?;
let dest = dest.as_slice();
let mut final_tar = tar::Archive::new(Cursor::new(dest));
let destdir = &tempd.path().join("destdir");
final_tar.unpack(destdir)?;
assert!(destdir.join("usr/etc/systemd/system/foo.service").exists());
assert!(!destdir.join("blah").exists());
Ok(())
}
}
Loading