Skip to content
This repository has been archived by the owner on Nov 1, 2023. It is now read-only.

Fail fast if managed task workers are near-OOM #1657

Merged
merged 21 commits into from
Mar 1, 2022
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/agent/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

82 changes: 79 additions & 3 deletions src/agent/onefuzz-agent/src/managed/cmd.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,17 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
use std::path::PathBuf;

#[cfg(not(target_os = "macos"))]
use std::time::Duration;

use crate::tasks::config::{CommonConfig, Config};
use anyhow::Result;
use clap::{App, Arg, SubCommand};
use std::path::PathBuf;

use crate::tasks::config::{CommonConfig, Config};

#[cfg(not(target_os = "macos"))]
const OOM_CHECK_INTERVAL: Duration = Duration::from_secs(5);

pub async fn run(args: &clap::ArgMatches<'_>) -> Result<()> {
env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init();
Expand All @@ -13,7 +20,22 @@ pub async fn run(args: &clap::ArgMatches<'_>) -> Result<()> {
let config = Config::from_file(config_path, setup_dir)?;

init_telemetry(config.common());
let result = config.run().await;

let min_available_memory_bytes = 1_000_000 * config.common().min_available_memory_mb;

// If the memory limit is 0, this will resolve immediately with an error.
let check_oom = out_of_memory(min_available_memory_bytes);

let result = tokio::select! {
result = config.run() => result,

// Ignore this task if it returns due to a querying error.
Ok(oom) = check_oom => {
// Convert the OOM notification to an error, so we can log it below.
let err = format_err!("out of memory: {} bytes available, {} required", oom.available_bytes, oom.min_bytes);
Err(err)
ranweiler marked this conversation as resolved.
Show resolved Hide resolved
},
};

if let Err(err) = &result {
error!("error running task: {:?}", err);
Expand All @@ -23,6 +45,60 @@ pub async fn run(args: &clap::ArgMatches<'_>) -> Result<()> {
result
}

const MAX_OOM_QUERY_ERRORS: usize = 5;

// Periodically check available system memory.
//
// If available memory drops below the minimum, exit informatively.
//
// Parameterized to enable future configuration by VMSS.
#[cfg(not(target_os = "macos"))]
async fn out_of_memory(min_bytes: u64) -> Result<OutOfMemory> {
ranweiler marked this conversation as resolved.
Show resolved Hide resolved
if min_bytes == 0 {
bail!("available memory minimum is unreachable");
}

let mut consecutive_query_errors = 0;

loop {
match onefuzz::memory::available_bytes() {
Ok(available_bytes) => {
// Reset so we count consecutive errors.
consecutive_query_errors = 0;

if available_bytes < min_bytes {
return Ok(OutOfMemory {
available_bytes,
min_bytes,
});
}
}
Err(err) => {
warn!("error querying system memory usage: {}", err);

consecutive_query_errors += 1;

if consecutive_query_errors > MAX_OOM_QUERY_ERRORS {
return Err(err);
}
}
}

tokio::time::sleep(OOM_CHECK_INTERVAL).await;
}
}

#[cfg(target_os = "macos")]
async fn out_of_memory(_min_bytes: u64) -> Result<OutOfMemory> {
// Resolve immediately.
bail!("out-of-memory check not implemented on macOS")
}

struct OutOfMemory {
available_bytes: u64,
min_bytes: u64,
}

fn init_telemetry(config: &CommonConfig) {
onefuzz_telemetry::set_appinsights_clients(
config.instance_telemetry_key.clone(),
Expand Down
14 changes: 14 additions & 0 deletions src/agent/onefuzz-agent/src/tasks/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@ use serde::{self, Deserialize};
use std::{path::PathBuf, sync::Arc, time::Duration};
use uuid::Uuid;

const DEFAULT_MIN_AVAILABLE_MEMORY_MB: u64 = 100;

fn default_min_available_memory_mb() -> u64 {
DEFAULT_MIN_AVAILABLE_MEMORY_MB
}

#[derive(Debug, Deserialize, PartialEq, Clone)]
pub enum ContainerType {
#[serde(alias = "inputs")]
Expand All @@ -42,6 +48,14 @@ pub struct CommonConfig {

#[serde(default)]
pub setup_dir: PathBuf,

/// Lower bound on available system memory. If the available memory drops
/// below the limit, the task will exit with an error. This is a fail-fast
/// mechanism to support debugging.
///
/// Can be disabled by setting to 0.
#[serde(default = "default_min_available_memory_mb")]
pub min_available_memory_mb: u64,
}

impl CommonConfig {
Expand Down
1 change: 1 addition & 0 deletions src/agent/onefuzz/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ backoff = { version = "0.3", features = ["tokio"] }
winreg = "0.10"
input-tester = { path = "../input-tester" }
debugger = { path = "../debugger" }
winapi = { version = "0.3", features = ["impl-default", "psapi"] }

[target.'cfg(target_family = "unix")'.dependencies]
cpp_demangle = "0.3"
Expand Down
11 changes: 11 additions & 0 deletions src/agent/onefuzz/examples/memory.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#[cfg(not(target_os = "macos"))]
fn main() {
ranweiler marked this conversation as resolved.
Show resolved Hide resolved
let bytes = onefuzz::memory::available_bytes().unwrap();
let gb = (bytes as f64) * 1e-9;
println!("available bytes: {} ({:.1} GB)", bytes, gb);
}

#[cfg(target_os = "macos")]
fn main() {
unimplemented!()
}
1 change: 1 addition & 0 deletions src/agent/onefuzz/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ pub mod input_tester;
pub mod jitter;
pub mod libfuzzer;
pub mod machine_id;
pub mod memory;
pub mod monitor;
pub mod process;
pub mod sha256;
Expand Down
81 changes: 81 additions & 0 deletions src/agent/onefuzz/src/memory.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
#[cfg(target_os = "windows")]
use std::convert::TryFrom;

#[cfg(not(target_os = "macos"))]
use anyhow::Result;

#[cfg(target_os = "linux")]
use regex::Regex;

#[cfg(target_os = "windows")]
use winapi::um::psapi::PERFORMANCE_INFORMATION;

#[cfg(target_os = "windows")]
pub fn available_bytes() -> Result<u64> {
let info = get_performance_info()?;
let pages = info.CommitLimit.saturating_sub(info.CommitTotal);
let bytes = pages * info.PageSize;
let bytes = u64::try_from(bytes)?;

Ok(bytes)
}

#[cfg(target_os = "windows")]
fn get_performance_info() -> Result<PERFORMANCE_INFORMATION> {
use winapi::shared::minwindef::FALSE;
use winapi::um::errhandlingapi::GetLastError;
use winapi::um::psapi::GetPerformanceInfo;

let mut info = PERFORMANCE_INFORMATION::default();

let success = unsafe {
// Will always fit in a `u32`.
//
// https://docs.microsoft.com/en-us/windows/win32/api/psapi/ns-psapi-performance_information
let size = std::mem::size_of::<PERFORMANCE_INFORMATION>();
let size = u32::try_from(size)?;
GetPerformanceInfo(&mut info, size)
};

if success == FALSE {
let code = unsafe { GetLastError() };
bail!("error querying performance information: {:x}", code);
}

Ok(info)
}

#[cfg(target_os = "linux")]
pub fn available_bytes() -> Result<u64> {
const BYTES_PER_KB: u64 = 1024;

let meminfo = std::fs::read_to_string("/proc/meminfo")?;
let available_kb = parse_available_kb(&meminfo)?;
let available_bytes = available_kb * BYTES_PER_KB;

Ok(available_bytes)
}

#[cfg(target_os = "linux")]
fn parse_available_kb(meminfo: &str) -> Result<u64> {
let captures = AVAILABLE_KB
.captures(meminfo)
.ok_or_else(|| format_err!("`MemAvailable` not found in `/proc/meminfo`"))?;

let available_kb = captures
.get(1)
.ok_or_else(|| format_err!("`MemAvailable` not found in `/proc/meminfo`"))?
.as_str()
.parse()?;

Ok(available_kb)
}

#[cfg(target_os = "linux")]
lazy_static::lazy_static! {
static ref AVAILABLE_KB: Regex = Regex::new(r"MemAvailable:\s*(\d+) kB").unwrap();
}

#[cfg(test)]
#[cfg(target_os = "linux")]
mod tests_linux;
99 changes: 99 additions & 0 deletions src/agent/onefuzz/src/memory/tests_linux.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
use anyhow::Result;

use super::parse_available_kb;

#[test]
fn test_parse_available_kb() -> Result<()> {
assert_eq!(parse_available_kb(MEMINFO)?, 1001092);
assert_eq!(parse_available_kb("MemAvailable: 1001092 kB")?, 1001092);
assert_eq!(
parse_available_kb("MemAvailable: 1001092 kB\tMemAvailable: 123 kB")?,
1001092
);
assert_eq!(
parse_available_kb(" MemAvailable: 1001092 kB")?,
1001092
);
assert_eq!(parse_available_kb(" MemAvailable:1001092 kB")?, 1001092);
assert_eq!(parse_available_kb(" MemAvailable: 1001092 kB")?, 1001092);
assert_eq!(
parse_available_kb(" MemAvailable: 1001092 kB")?,
1001092
);
assert_eq!(
parse_available_kb("extra MemAvailable: 1001092 kB")?,
1001092
);
assert_eq!(
parse_available_kb("extra MemAvailable:1001092 kB")?,
1001092
);
assert_eq!(
parse_available_kb("extra MemAvailable: 1001092 kB")?,
1001092
);
assert_eq!(
parse_available_kb("extra MemAvailable: 1001092 kB")?,
1001092
);

Ok(())
}

#[test]
fn test_parse_available_kb_missing() {
assert!(parse_available_kb("").is_err());
assert!(parse_available_kb("1001092").is_err());
assert!(parse_available_kb("MemAvailable: ").is_err());
assert!(parse_available_kb("MemAvailable: 1001092 MB").is_err());
assert!(parse_available_kb("MemFree: 198308 kB").is_err());
}

const MEMINFO: &str = "MemTotal: 16036984 kB
MemFree: 198308 kB
MemAvailable: 1001092 kB
Buffers: 521880 kB
Cached: 459416 kB
SwapCached: 1580 kB
Active: 830140 kB
Inactive: 206728 kB
Active(anon): 22492 kB
Inactive(anon): 28876 kB
Active(file): 807648 kB
Inactive(file): 177852 kB
Unevictable: 0 kB
Mlocked: 0 kB
SwapTotal: 4194300 kB
SwapFree: 4181440 kB
Dirty: 8 kB
Writeback: 0 kB
AnonPages: 54368 kB
Mapped: 31344 kB
Shmem: 792 kB
Slab: 192900 kB
SReclaimable: 131056 kB
SUnreclaim: 61844 kB
KernelStack: 3104 kB
PageTables: 5324 kB
NFS_Unstable: 0 kB
Bounce: 0 kB
WritebackTmp: 0 kB
CommitLimit: 12212792 kB
Committed_AS: 575108 kB
VmallocTotal: 34359738367 kB
VmallocUsed: 0 kB
VmallocChunk: 0 kB
HardwareCorrupted: 0 kB
AnonHugePages: 0 kB
ShmemHugePages: 0 kB
ShmemPmdMapped: 0 kB
CmaTotal: 0 kB
CmaFree: 0 kB
HugePages_Total: 0
HugePages_Free: 0
HugePages_Rsvd: 0
HugePages_Surp: 0
Hugepagesize: 2048 kB
DirectMap4k: 152880 kB
DirectMap2M: 4696064 kB
DirectMap1G: 11534336 kB";