From 4a6b40ecf5679f03dd2bbd9840c99ba1e6174564 Mon Sep 17 00:00:00 2001 From: Zac Mrowicki Date: Wed, 13 Oct 2021 18:20:31 +0000 Subject: [PATCH] logdog: Filter sensitive settings from API output Previously, `logdog` used the `exec` mode to run `apiclient` and dump everything from the datastore. Since a few settings now exist that can store sensitive data, we need to filter them from being written to disk. In order to make this change, `logdog` needed to be trained to understand the data it was gathering, in order to filter it. A new `settings` mode was added specifically for gathering settings. This mode uses the `apiclient` and `datastore` libraries to be able to de/serialize the output, filter it, and then serialize it back to disk. --- sources/Cargo.lock | 6 ++ sources/logdog/Cargo.toml | 6 ++ sources/logdog/README.md | 2 +- sources/logdog/conf/logdog.common.conf | 2 +- sources/logdog/src/error.rs | 28 ++++++++ sources/logdog/src/log_request.rs | 94 +++++++++++++++++++------- sources/logdog/src/main.rs | 19 +++--- 7 files changed, 123 insertions(+), 34 deletions(-) diff --git a/sources/Cargo.lock b/sources/Cargo.lock index b46fa81da8f..8b659496afd 100644 --- a/sources/Cargo.lock +++ b/sources/Cargo.lock @@ -1707,14 +1707,20 @@ dependencies = [ name = "logdog" version = "0.1.0" dependencies = [ + "apiclient", "cargo-readme", + "constants", + "datastore", "flate2", "glob", + "models", "reqwest", + "serde_json", "shell-words", "snafu", "tar", "tempfile", + "tokio", "url", "walkdir", ] diff --git a/sources/logdog/Cargo.toml b/sources/logdog/Cargo.toml index dd3b6638948..582fee24511 100644 --- a/sources/logdog/Cargo.toml +++ b/sources/logdog/Cargo.toml @@ -9,13 +9,19 @@ publish = false exclude = ["README.md"] [dependencies] +apiclient = { path = "../api/apiclient" } +constants = { path = "../constants" } +datastore = { path = "../api/datastore" } flate2 = "1.0" glob = "0.3" +models = { path = "../models" } reqwest = { version = "0.11.1", default-features = false, features = ["blocking", "rustls-tls"] } +serde_json = "1" shell-words = "1.0.0" snafu = { version = "0.6", features = ["backtraces-impl-backtrace-crate"] } tar = { version = "0.4", default-features = false } tempfile = { version = "3.1.0", default-features = false } +tokio = { version = "~1.8", default-features = false, features = ["macros", "rt-multi-thread"] } # LTS url = "2.1.1" walkdir = "2.3" diff --git a/sources/logdog/README.md b/sources/logdog/README.md index 5a7a60e2d2a..85026313708 100644 --- a/sources/logdog/README.md +++ b/sources/logdog/README.md @@ -25,4 +25,4 @@ based on the value of the `VARIANT` environment variable at build time. ## Colophon -This text was generated from `README.tpl` using [cargo-readme](https://crates.io/crates/cargo-readme), and includes the rustdoc from `src/main.rs`. +This text was generated from `README.tpl` using [cargo-readme](https://crates.io/crates/cargo-readme), and includes the rustdoc from `src/main.rs`. \ No newline at end of file diff --git a/sources/logdog/conf/logdog.common.conf b/sources/logdog/conf/logdog.common.conf index aa3e105bbd2..84f31dc15d9 100644 --- a/sources/logdog/conf/logdog.common.conf +++ b/sources/logdog/conf/logdog.common.conf @@ -10,8 +10,8 @@ exec journalctl.errors journalctl -p err -a --no-pager exec journalctl.log journalctl -a --no-pager # file copy does not work for this, use cat command instead exec proc-mounts cat /proc/mounts -exec settings.json apiclient --method GET --uri / exec signpost signpost status exec wicked wicked show all file os-release /etc/os-release glob /var/log/kdump/* +settings settings.json diff --git a/sources/logdog/src/error.rs b/sources/logdog/src/error.rs index 827e8c84d94..7a51ef974e5 100644 --- a/sources/logdog/src/error.rs +++ b/sources/logdog/src/error.rs @@ -3,12 +3,19 @@ use std::io; use std::path::PathBuf; +use datastore::{deserialization, serialization}; use reqwest::Url; use snafu::{Backtrace, Snafu}; #[derive(Debug, Snafu)] #[snafu(visibility = "pub(crate)")] pub(crate) enum Error { + #[snafu(display("Error calling Bottlerocket API '{}': {}", uri, source))] + ApiClient { + source: apiclient::Error, + uri: String, + }, + #[snafu(display("Error creating the command stderr file '{}': {}", path.display(), source))] CommandErrFile { source: io::Error, @@ -57,6 +64,9 @@ pub(crate) enum Error { backtrace: Backtrace, }, + #[snafu(display("Error deserializing Settings: {} ", source))] + DeserializeSettings { source: deserialization::Error }, + #[snafu(display("Error creating the error file '{}': {}", path.display(), source))] ErrorFile { source: io::Error, @@ -71,6 +81,12 @@ pub(crate) enum Error { backtrace: Backtrace, }, + #[snafu(display("Unable to create file '{}': {}", path.display(), source))] + FileCreate { + source: std::io::Error, + path: PathBuf, + }, + #[snafu(display("Unable to copy file from '{}' to '{}' for request '{}': {}", from, to.display(), request, source))] FileCopy { source: std::io::Error, @@ -85,6 +101,12 @@ pub(crate) enum Error { #[snafu(display("Output filename is missing in request: '{}'", request))] FilenameMissing { request: String }, + #[snafu(display("Unable to write to file '{}': {}", path.display(), source))] + FileWrite { + source: serde_json::Error, + path: PathBuf, + }, + #[snafu(display("Unable to create HTTP client for '{}': {}", url, source))] HttpClient { url: Url, source: reqwest::Error }, @@ -136,6 +158,12 @@ pub(crate) enum Error { #[snafu(display("Cannot write to / as a file."))] RootAsFile { backtrace: Backtrace }, + #[snafu(display("Error serializing Settings: {} ", source))] + SerializeSettings { source: serialization::Error }, + + #[snafu(display("Unable to deserialize Bottlerocket settings: {}", source))] + SettingsJson { source: serde_json::Error }, + #[snafu(display("Error closing the tarball '{}': {}", path.display(), source))] TarballClose { source: io::Error, diff --git a/sources/logdog/src/log_request.rs b/sources/logdog/src/log_request.rs index 34639ddbabe..969ee852398 100644 --- a/sources/logdog/src/log_request.rs +++ b/sources/logdog/src/log_request.rs @@ -9,7 +9,9 @@ //! these provide the list of log requests that `logdog` will run. use crate::error::{self, Result}; -use glob::glob; +use datastore::deserialization::from_map; +use datastore::serialization::to_pairs; +use glob::{glob, Pattern}; use reqwest::blocking::{Client, Response}; use snafu::{ensure, OptionExt, ResultExt}; use std::collections::HashSet; @@ -25,6 +27,15 @@ const COMMON_REQUESTS: &str = include_str!("../conf/logdog.common.conf"); /// The `logdog` log requests that are specific to the current variant. const VARIANT_REQUESTS: &str = include_str!("../conf/current/logdog.conf"); +/// Patterns to filter from settings output. These follow the Unix shell style pattern outlined +/// here: https://docs.rs/glob/0.3.0/glob/struct.Pattern.html. +const SENSITIVE_SETTINGS_PATTERNS: &[&str] = &[ + "*.user-data", + "settings.kubernetes.bootstrap-token", + // Can contain a username:password component + "settings.network.https-proxy", +]; + /// Returns the list of log requests to run by combining `VARIANT_REQUESTS` and `COMMON_REQUESTS`. /// These are read at compile time from files named `logdog.conf` and `logdog.common.conf` /// respectively. @@ -96,7 +107,7 @@ impl ToString for LogRequest<'_> { } /// Runs a `LogRequest` and writes its output to a file in `tempdir`. -pub(crate) fn handle_log_request(request: S, tempdir: P) -> Result<()> +pub(crate) async fn handle_log_request(request: S, tempdir: P) -> Result<()> where S: AsRef, P: AsRef, @@ -122,6 +133,7 @@ where }; // execute the log request with the correct handler based on the mode field. match req.mode { + "settings" => handle_settings_request(&req, tempdir).await?, "exec" => handle_exec_request(&req, tempdir)?, "http" | "https" => handle_http_request(&req, tempdir)?, "file" => handle_file_request(&req, tempdir)?, @@ -136,6 +148,40 @@ where Ok(()) } +/// Requests settings from the API, filters them, and writes the output to `tempdir` +async fn handle_settings_request

(request: &LogRequest<'_>, tempdir: P) -> Result<()> +where + P: AsRef, +{ + let settings = get_settings().await?; + let mut settings_map = to_pairs(&settings).context(error::SerializeSettings)?; + + // Filter all settings that match any of the "sensitive" patterns + for pattern in SENSITIVE_SETTINGS_PATTERNS { + let pattern = + Pattern::new(pattern).context(error::ParseGlobPattern { pattern: *pattern })?; + settings_map.retain(|k, _| !pattern.matches(k.name().as_str())) + } + + // Serialize the map back to a `Settings` to remove the escaping so it writes nicely to file + let settings: model::Settings = from_map(&settings_map).context(error::DeserializeSettings)?; + let outpath = tempdir.as_ref().join(request.filename); + let outfile = File::create(&outpath).context(error::FileCreate { path: &outpath })?; + serde_json::to_writer_pretty(&outfile, &settings) + .context(error::FileWrite { path: &outpath })?; + Ok(()) +} + +/// Uses `apiclient` to request all settings from the apiserver and deserializes into a `Settings` +async fn get_settings() -> Result { + let uri = constants::API_SETTINGS_URI; + let (_status, response_body) = apiclient::raw_request(constants::API_SOCKET, uri, "GET", None) + .await + .context(error::ApiClient { uri })?; + + serde_json::from_str(&response_body).context(error::SettingsJson) +} + /// Runs an `exec` `LogRequest`'s `instructions` and writes its output to to `tempdir`. fn handle_exec_request

(request: &LogRequest<'_>, tempdir: P) -> Result<()> where @@ -327,62 +373,62 @@ mod test { assert_eq!(got, want); } - #[test] - fn file_request() { + #[tokio::test] + async fn file_request() { let source_dir = TempDir::new().unwrap(); let source_filepath = source_dir.path().join("foo-bar.source"); let want = "123"; write(&source_filepath, want).unwrap(); let request = format!("file foo-bar {}", source_filepath.display()); let outdir = TempDir::new().unwrap(); - handle_log_request(&request, outdir.path()).unwrap(); + handle_log_request(&request, outdir.path()).await.unwrap(); let outfile = outdir.path().join("foo-bar"); let got = std::fs::read_to_string(&outfile).unwrap(); assert_eq!(got, want); } - #[test] - fn exec_request() { + #[tokio::test] + async fn exec_request() { let want = "hello world! \"quoted\"\n"; let request = r#"exec output-file.txt echo 'hello' "world!" "\"quoted\"""#; let outdir = TempDir::new().unwrap(); - handle_log_request(&request, outdir.path()).unwrap(); + handle_log_request(&request, outdir.path()).await.unwrap(); let outfile = outdir.path().join("output-file.txt"); let got = std::fs::read_to_string(&outfile).unwrap(); assert_eq!(got, want); } - #[test] + #[tokio::test] // ensures single file pattern works - fn glob_single_file_pattern_request() { + async fn glob_single_file_pattern_request() { let source_dir = TempDir::new().unwrap(); create_source_dir(&source_dir); let outdir = TempDir::new().unwrap(); let request = format!("glob {}/foo.source", source_dir.path().display()); - handle_log_request(&request, outdir.path()).unwrap(); + handle_log_request(&request, outdir.path()).await.unwrap(); assert_file_match(&outdir, get_dest_filepath(&source_dir, "foo.source"), "1"); } - #[test] + #[tokio::test] // ensures multiple file pattern works - fn glob_multiple_files_pattern_request() { + async fn glob_multiple_files_pattern_request() { let source_dir = TempDir::new().unwrap(); create_source_dir(&source_dir); let outdir = TempDir::new().unwrap(); let request = format!("glob {}/*.source", source_dir.path().display()); - handle_log_request(&request, outdir.path()).unwrap(); + handle_log_request(&request, outdir.path()).await.unwrap(); assert_file_match(&outdir, get_dest_filepath(&source_dir, "foo.source"), "1"); assert_file_match(&outdir, get_dest_filepath(&source_dir, "bar.source"), "2"); } - #[test] + #[tokio::test] // ensures multiple file in nested directory pattern works - fn glob_nested_file_pattern_request() { + async fn glob_nested_file_pattern_request() { let source_dir = TempDir::new().unwrap(); create_source_dir(&source_dir); let outdir = TempDir::new().unwrap(); let request = format!("glob {}/**/*.source", source_dir.path().display()); - handle_log_request(&request, outdir.path()).unwrap(); + handle_log_request(&request, outdir.path()).await.unwrap(); assert_file_match(&outdir, get_dest_filepath(&source_dir, "foo.source"), "1"); assert_file_match(&outdir, get_dest_filepath(&source_dir, "bar.source"), "2"); assert_file_match( @@ -407,14 +453,14 @@ mod test { ); } - #[test] + #[tokio::test] // ensures directory pattern works - fn glob_dir_pattern_request() { + async fn glob_dir_pattern_request() { let source_dir = TempDir::new().unwrap(); create_source_dir(&source_dir); let outdir = TempDir::new().unwrap(); let request = format!("glob {}/**/", source_dir.path().display()); - handle_log_request(&request, outdir.path()).unwrap(); + handle_log_request(&request, outdir.path()).await.unwrap(); assert_file_match( &outdir, get_dest_filepath(&source_dir, "depth1/foo.source"), @@ -447,12 +493,14 @@ mod test { ); } - #[test] + #[tokio::test] // ensure if pattern is empty it should not panic - fn glob_empty_pattern_request() { + async fn glob_empty_pattern_request() { let outdir = TempDir::new().unwrap(); let request = "glob"; - let err = handle_log_request(&request, outdir.path()).unwrap_err(); + let err = handle_log_request(&request, outdir.path()) + .await + .unwrap_err(); assert!(matches!(err, crate::error::Error::PatternMissing {})); } } diff --git a/sources/logdog/src/main.rs b/sources/logdog/src/main.rs index 2756d58cc61..d537a042fe6 100644 --- a/sources/logdog/src/main.rs +++ b/sources/logdog/src/main.rs @@ -86,7 +86,7 @@ fn parse_args(args: env::Args) -> PathBuf { /// noted in the file named by `ERROR_FILENAME`. Note: In the case of `exec` log requests, non-zero /// exit codes are not considered errors and the command's stdout and stderr will be still be /// written. -pub(crate) fn collect_logs>(log_requests: &[&str], outdir: P) -> Result<()> { +pub(crate) async fn collect_logs>(log_requests: &[&str], outdir: P) -> Result<()> { // if a command fails, we will pipe its error here and continue. let outdir = outdir.as_ref(); let error_path = outdir.join(crate::ERROR_FILENAME); @@ -97,7 +97,7 @@ pub(crate) fn collect_logs>(log_requests: &[&str], outdir: P) -> for &log_request in log_requests { // show the user what command we are running println!("Running: {}", log_request); - if let Err(e) = handle_log_request(log_request, &outdir) { + if let Err(e) = handle_log_request(log_request, &outdir).await { // ignore the error, but make note of it in the error file. write!( &mut error_file, @@ -113,18 +113,19 @@ pub(crate) fn collect_logs>(log_requests: &[&str], outdir: P) -> } /// Runs the bulk of the program's logic, main wraps this. -fn run(outfile: &Path, commands: &[&str]) -> Result<()> { +async fn run(outfile: &Path, commands: &[&str]) -> Result<()> { let temp_dir = TempDir::new().context(error::TempDirCreate)?; - collect_logs(&commands, &temp_dir.path().to_path_buf())?; + collect_logs(&commands, &temp_dir.path().to_path_buf()).await?; create_tarball(&temp_dir.path().to_path_buf(), &outfile)?; println!("logs are at: {}", outfile.display()); Ok(()) } -fn main() -> ! { +#[tokio::main] +async fn main() -> ! { let outpath = parse_args(env::args()); let log_requests = log_requests(); - process::exit(match run(&outpath, &log_requests) { + process::exit(match run(&outpath, &log_requests).await { Ok(()) => 0, Err(err) => { eprintln!("{}", err); @@ -147,14 +148,14 @@ mod tests { use std::fs::File; use tar::Archive; - #[test] - fn test_program() { + #[tokio::test] + async fn test_program() { let output_tempdir = TempDir::new().unwrap(); let outfile = output_tempdir.path().join("logstest"); // we assume that `echo` will not do something unexpected on the machine running this test. let commands = vec!["exec hello.txt echo hello world"]; - run(&outfile, &commands).unwrap(); + run(&outfile, &commands).await.unwrap(); // this function will panic if the given path is not found in the tarball. let find = |path_to_find: &PathBuf| {