diff --git a/crates/ruff_linter/resources/test/fixtures/flake8_bandit/S202.py b/crates/ruff_linter/resources/test/fixtures/flake8_bandit/S202.py new file mode 100644 index 0000000000000..bba1deda4ab36 --- /dev/null +++ b/crates/ruff_linter/resources/test/fixtures/flake8_bandit/S202.py @@ -0,0 +1,65 @@ +import sys +import tarfile +import tempfile + + +def unsafe_archive_handler(filename): + tar = tarfile.open(filename) + tar.extractall(path=tempfile.mkdtemp()) + tar.close() + + +def managed_members_archive_handler(filename): + tar = tarfile.open(filename) + tar.extractall(path=tempfile.mkdtemp(), members=members_filter(tar)) + tar.close() + + +def list_members_archive_handler(filename): + tar = tarfile.open(filename) + tar.extractall(path=tempfile.mkdtemp(), members=[]) + tar.close() + + +def provided_members_archive_handler(filename): + tar = tarfile.open(filename) + tarfile.extractall(path=tempfile.mkdtemp(), members=tar) + tar.close() + + +def filter_data(filename): + tar = tarfile.open(filename) + tarfile.extractall(path=tempfile.mkdtemp(), filter="data") + tar.close() + + +def filter_fully_trusted(filename): + tar = tarfile.open(filename) + tarfile.extractall(path=tempfile.mkdtemp(), filter="fully_trusted") + tar.close() + + +def filter_tar(filename): + tar = tarfile.open(filename) + tarfile.extractall(path=tempfile.mkdtemp(), filter="tar") + tar.close() + + +def members_filter(tarfile): + result = [] + for member in tarfile.getmembers(): + if '../' in member.name: + print('Member name container directory traversal sequence') + continue + elif (member.issym() or member.islnk()) and ('../' in member.linkname): + print('Symlink to external resource') + continue + result.append(member) + return result + + +if __name__ == "__main__": + if len(sys.argv) > 1: + filename = sys.argv[1] + unsafe_archive_handler(filename) + managed_members_archive_handler(filename) diff --git a/crates/ruff_linter/src/checkers/ast/analyze/expression.rs b/crates/ruff_linter/src/checkers/ast/analyze/expression.rs index 313218cca2ded..f418fbf1212d5 100644 --- a/crates/ruff_linter/src/checkers/ast/analyze/expression.rs +++ b/crates/ruff_linter/src/checkers/ast/analyze/expression.rs @@ -615,6 +615,9 @@ pub(crate) fn expression(expr: &Expr, checker: &mut Checker) { if checker.enabled(Rule::DjangoRawSql) { flake8_bandit::rules::django_raw_sql(checker, call); } + if checker.enabled(Rule::TarfileUnsafeMembers) { + flake8_bandit::rules::tarfile_unsafe_members(checker, call); + } if checker.enabled(Rule::UnnecessaryGeneratorList) { flake8_comprehensions::rules::unnecessary_generator_list( checker, expr, func, args, keywords, diff --git a/crates/ruff_linter/src/codes.rs b/crates/ruff_linter/src/codes.rs index 759ccc54ac49e..1e04278e0f15b 100644 --- a/crates/ruff_linter/src/codes.rs +++ b/crates/ruff_linter/src/codes.rs @@ -595,6 +595,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> { (Flake8Bandit, "112") => (RuleGroup::Stable, rules::flake8_bandit::rules::TryExceptContinue), (Flake8Bandit, "113") => (RuleGroup::Stable, rules::flake8_bandit::rules::RequestWithoutTimeout), (Flake8Bandit, "201") => (RuleGroup::Preview, rules::flake8_bandit::rules::FlaskDebugTrue), + (Flake8Bandit, "202") => (RuleGroup::Preview, rules::flake8_bandit::rules::TarfileUnsafeMembers), (Flake8Bandit, "301") => (RuleGroup::Stable, rules::flake8_bandit::rules::SuspiciousPickleUsage), (Flake8Bandit, "302") => (RuleGroup::Stable, rules::flake8_bandit::rules::SuspiciousMarshalUsage), (Flake8Bandit, "303") => (RuleGroup::Stable, rules::flake8_bandit::rules::SuspiciousInsecureHashUsage), diff --git a/crates/ruff_linter/src/rules/flake8_bandit/mod.rs b/crates/ruff_linter/src/rules/flake8_bandit/mod.rs index feb2dcb507651..ce041669f1782 100644 --- a/crates/ruff_linter/src/rules/flake8_bandit/mod.rs +++ b/crates/ruff_linter/src/rules/flake8_bandit/mod.rs @@ -51,6 +51,7 @@ mod tests { #[test_case(Rule::UnsafeYAMLLoad, Path::new("S506.py"))] #[test_case(Rule::WeakCryptographicKey, Path::new("S505.py"))] #[test_case(Rule::DjangoRawSql, Path::new("S611.py"))] + #[test_case(Rule::TarfileUnsafeMembers, Path::new("S202.py"))] fn rules(rule_code: Rule, path: &Path) -> Result<()> { let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy()); let diagnostics = test_path( diff --git a/crates/ruff_linter/src/rules/flake8_bandit/rules/mod.rs b/crates/ruff_linter/src/rules/flake8_bandit/rules/mod.rs index b7a655366876f..ee1de347d6152 100644 --- a/crates/ruff_linter/src/rules/flake8_bandit/rules/mod.rs +++ b/crates/ruff_linter/src/rules/flake8_bandit/rules/mod.rs @@ -21,6 +21,7 @@ pub(crate) use snmp_insecure_version::*; pub(crate) use snmp_weak_cryptography::*; pub(crate) use ssh_no_host_key_verification::*; pub(crate) use suspicious_function_call::*; +pub(crate) use tarfile_unsafe_members::*; pub(crate) use try_except_continue::*; pub(crate) use try_except_pass::*; pub(crate) use unsafe_yaml_load::*; @@ -49,6 +50,7 @@ mod snmp_insecure_version; mod snmp_weak_cryptography; mod ssh_no_host_key_verification; mod suspicious_function_call; +mod tarfile_unsafe_members; mod try_except_continue; mod try_except_pass; mod unsafe_yaml_load; diff --git a/crates/ruff_linter/src/rules/flake8_bandit/rules/tarfile_unsafe_members.rs b/crates/ruff_linter/src/rules/flake8_bandit/rules/tarfile_unsafe_members.rs new file mode 100644 index 0000000000000..1fd35e3c7ad35 --- /dev/null +++ b/crates/ruff_linter/src/rules/flake8_bandit/rules/tarfile_unsafe_members.rs @@ -0,0 +1,75 @@ +use crate::checkers::ast::Checker; +use ruff_diagnostics::Diagnostic; +use ruff_diagnostics::Violation; +use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::{self as ast}; +use ruff_text_size::Ranged; + +/// ## What it does +/// Checks for uses of `tarfile.extractall`. +/// +/// ## Why is this bad? +/// +/// Extracting archives from untrusted sources without prior inspection is +/// a security risk, as maliciously crafted archives may contain files that +/// will be written outside of the target directory. For example, the archive +/// could include files with absolute paths (e.g., `/etc/passwd`), or relative +/// paths with parent directory references (e.g., `../etc/passwd`). +/// +/// On Python 3.12 and later, use `filter='data'` to prevent the most dangerous +/// security issues (see: [PEP 706]). On earlier versions, set the `members` +/// argument to a trusted subset of the archive's members. +/// +/// ## Example +/// ```python +/// import tarfile +/// import tempfile +/// +/// tar = tarfile.open(filename) +/// tar.extractall(path=tempfile.mkdtemp()) +/// tar.close() +/// ``` +/// +/// ## References +/// - [Common Weakness Enumeration: CWE-22](https://cwe.mitre.org/data/definitions/22.html) +/// - [Python Documentation: `TarFile.extractall`](https://docs.python.org/3/library/tarfile.html#tarfile.TarFile.extractall) +/// - [Python Documentation: Extraction filters](https://docs.python.org/3/library/tarfile.html#tarfile-extraction-filter) +/// +/// [PEP 706]: https://peps.python.org/pep-0706/#backporting-forward-compatibility +#[violation] +pub struct TarfileUnsafeMembers; + +impl Violation for TarfileUnsafeMembers { + #[derive_message_formats] + fn message(&self) -> String { + format!("Uses of `tarfile.extractall()`") + } +} + +/// S202 +pub(crate) fn tarfile_unsafe_members(checker: &mut Checker, call: &ast::ExprCall) { + if !call + .func + .as_attribute_expr() + .is_some_and(|attr| attr.attr.as_str() == "extractall") + { + return; + } + + if call + .arguments + .find_keyword("filter") + .and_then(|keyword| keyword.value.as_string_literal_expr()) + .is_some_and(|value| matches!(value.value.as_str(), "data" | "tar")) + { + return; + } + + if !checker.semantic().seen(&["tarfile"]) { + return; + } + + checker + .diagnostics + .push(Diagnostic::new(TarfileUnsafeMembers, call.func.range())); +} diff --git a/crates/ruff_linter/src/rules/flake8_bandit/snapshots/ruff_linter__rules__flake8_bandit__tests__S202_S202.py.snap b/crates/ruff_linter/src/rules/flake8_bandit/snapshots/ruff_linter__rules__flake8_bandit__tests__S202_S202.py.snap new file mode 100644 index 0000000000000..fb951d05d1d44 --- /dev/null +++ b/crates/ruff_linter/src/rules/flake8_bandit/snapshots/ruff_linter__rules__flake8_bandit__tests__S202_S202.py.snap @@ -0,0 +1,49 @@ +--- +source: crates/ruff_linter/src/rules/flake8_bandit/mod.rs +--- +S202.py:8:5: S202 Uses of `tarfile.extractall()` + | +6 | def unsafe_archive_handler(filename): +7 | tar = tarfile.open(filename) +8 | tar.extractall(path=tempfile.mkdtemp()) + | ^^^^^^^^^^^^^^ S202 +9 | tar.close() + | + +S202.py:14:5: S202 Uses of `tarfile.extractall()` + | +12 | def managed_members_archive_handler(filename): +13 | tar = tarfile.open(filename) +14 | tar.extractall(path=tempfile.mkdtemp(), members=members_filter(tar)) + | ^^^^^^^^^^^^^^ S202 +15 | tar.close() + | + +S202.py:20:5: S202 Uses of `tarfile.extractall()` + | +18 | def list_members_archive_handler(filename): +19 | tar = tarfile.open(filename) +20 | tar.extractall(path=tempfile.mkdtemp(), members=[]) + | ^^^^^^^^^^^^^^ S202 +21 | tar.close() + | + +S202.py:26:5: S202 Uses of `tarfile.extractall()` + | +24 | def provided_members_archive_handler(filename): +25 | tar = tarfile.open(filename) +26 | tarfile.extractall(path=tempfile.mkdtemp(), members=tar) + | ^^^^^^^^^^^^^^^^^^ S202 +27 | tar.close() + | + +S202.py:38:5: S202 Uses of `tarfile.extractall()` + | +36 | def filter_fully_trusted(filename): +37 | tar = tarfile.open(filename) +38 | tarfile.extractall(path=tempfile.mkdtemp(), filter="fully_trusted") + | ^^^^^^^^^^^^^^^^^^ S202 +39 | tar.close() + | + + diff --git a/ruff.schema.json b/ruff.schema.json index c4acb767cd585..8ae7c3fe37fb2 100644 --- a/ruff.schema.json +++ b/ruff.schema.json @@ -3368,6 +3368,7 @@ "S2", "S20", "S201", + "S202", "S3", "S30", "S301",