Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve Python detection based on the PyPI malregistry #584

Merged
merged 10 commits into from
Nov 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ fix: $(FIXERS)
# END: lint-install ../malcontent

SAMPLES_REPO ?= chainguard-dev/malcontent-samples
SAMPLES_COMMIT ?= ec1ba5f2dc0e1f7085a0af73aa0f6fb1043e7534
SAMPLES_COMMIT ?= e58368a24b930f7dcf555678a8bc63f9d45aef24
OUT_DIR=out/samples-$(SAMPLES_COMMIT).tmp
out/samples-$(SAMPLES_COMMIT):
mkdir -p out
Expand Down
3 changes: 2 additions & 1 deletion pkg/action/archive.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ var archiveMap = map[string]bool{
".tar.xz": true,
".tar": true,
".tgz": true,
".whl": true,
".xz": true,
".zip": true,
}
Expand Down Expand Up @@ -416,7 +417,7 @@ func extractArchiveToTempDir(ctx context.Context, path string) (string, error) {

func extractionMethod(ext string) func(context.Context, string, string) error {
switch ext {
case ".jar", ".zip":
case ".jar", ".zip", ".whl":
return extractZip
case ".gz":
return extractGzip
Expand Down
4 changes: 2 additions & 2 deletions pkg/action/testdata/scan_archive
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,13 @@ c2/server_address: medium
collect/archives/zip: medium
credential/keychain: medium
credential/password: low
credential/ssh: medium
credential/ssl/private_key: low
crypto/aes: low
crypto/ecdsa: low
crypto/ed25519: low
crypto/tls: low
data/compression/bzip2: low
data/compression/gzip: low
data/compression/lzma: low
data/compression/zstd: low
data/embedded/pem_certificate: low
data/embedded/pem_test_key: low
Expand Down Expand Up @@ -47,6 +46,7 @@ fs/fifo_create: low
fs/file/delete: low
fs/file/delete_forcibly: low
fs/file/read: low
fs/file/rename: low
fs/file/stat: low
fs/file/write: low
fs/link_create: low
Expand Down
3 changes: 3 additions & 0 deletions pkg/compile/compile.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,9 @@ var rulesWithWarnings = map[string]bool{
"exfil_libcurl_elf": true,
"small_opaque_archaic_gcc": true,
"elf_hardcoded_ip": true,
"python_hex_decimal": true,
"python_long_hex": true,
"python_long_hex_multiple": true,
}

func Recursive(ctx context.Context, fss []fs.FS) (*yara.Rules, error) {
Expand Down
1 change: 1 addition & 0 deletions pkg/programkind/programkind.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ var supportedKind = map[string]string{
"sh": "application/x-sh",
"so": "application/x-sharedlib",
"ts": "application/typescript",
"whl": "application/x-wheel+zip",
"yaml": "",
"yara": "",
"yml": "",
Expand Down
8 changes: 4 additions & 4 deletions rules/anti-static/base64/eval.yara
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ rule eval_base64: high {
hash_2023_0xShell = "a6f1f9c9180cb77952398e719e4ef083ccac1e54c5242ea2bc6fe63e6ab4bb29"

strings:
$eval = /eval\(.{0,64}base64/
$eval = /eval\(.{0,256}base64/

condition:
any of them
Expand Down Expand Up @@ -51,10 +51,10 @@ rule ruby_eval2_near_enough: critical {
$base64 = "b64decode"

condition:
all of them and math.abs(@base64 - @eval) <= 64
all of them and math.abs(@base64 - @eval) <= 200
}

rule python_exec_near_enough: critical {
rule python_exec_near_enough: high {
meta:
description = "Evaluates base64 content"
hash_2023_UPX_7f5fd8c7cad4873993468c0c0a4cabdd8540fd6c2679351f58580524c1bfd0af_elf_x86_64 = "3b9f8c159df5d342213ed7bd5bc6e07bb103a055f4ac90ddb4b981957cd0ab53"
Expand All @@ -66,7 +66,7 @@ rule python_exec_near_enough: critical {
$base64 = "b64decode"

condition:
all of them and math.abs(@base64 - @exec) < 128
all of them and math.abs(@base64 - @exec) < 200
}

rule echo_decode_bash_probable: high {
Expand Down
1 change: 1 addition & 0 deletions rules/anti-static/base64/exec.yara
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ rule base64_commands: high {
$b_tar_c = "tar -c" base64
$b_tar_x = "tar -x" base64
$b_bash_c = "bash -c" base64
$b_type_nul = "type nul" base64
$not_kandji = "kandji-parameter-agent"
$not_mdmprofile = "mdmprofile"
$not_example = "commands are encoded"
Expand Down
1 change: 1 addition & 0 deletions rules/anti-static/base64/function_names.yara
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ rule base64_python_functions: critical {
$f_b64decode = "base64.b64decode" base64
$f_exc = "except Exception as" base64
$f_os_system = "os.system" base64
$f_os_startfile = "os.startfile" base64
$f_os_popen = "os.popen" base64
$f_thread = "threading.Thread" base64
$f_os_environ = "os.environ" base64
Expand Down
4 changes: 3 additions & 1 deletion rules/anti-static/obfuscation/generic/hex_values.yara
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@ rule excessive_hex_refs: medium {

strings:
$x = /0x[\dabcdefABCDEF]{2,8}/
$y = /\\x[\dabcdefABCDEF]{2,8}/

condition:
filesize < 1MB and #x > 64
filesize < 1MB and (#x > 64 or #y > 256)
}

Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
rule php_hex_functons: high {
rule php_hex_functions: high {
meta:
description = "contains function references encoded in hex"
hash_2023_0xShell_crot = "900c0453212babd82baa5151bba3d8e6fa56694aff33053de8171a38ff1bef09"
Expand Down
7 changes: 4 additions & 3 deletions rules/anti-static/obfuscation/python/builtins.yara
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,10 @@ rule py_indirect_builtins: suspicious {

private rule pythonSetup {
strings:
$i_distutils = "from distutils.core import setup"
$i_setuptools = "from setuptools import setup"
$setup = "setup("
$if_distutils = /from distutils.core import .{0,32}setup/
$if_setuptools = /from setuptools import .{0,32}setup/
$i_setuptools = "import setuptools"
$setup = "setup("

$not_setup_example = ">>> setup("
$not_setup_todict = "setup(**config.todict()"
Expand Down
34 changes: 34 additions & 0 deletions rules/anti-static/obfuscation/python/eval.yara
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
rule python_eval_hex: high {
meta:
description = "evaluates code from an obfuscated data stream"

strings:
$hex = /eval\(\"\\x\d{1,3}.{0,32}/
$chars = /eval\(\"\\\d{1,3}.{0,32}/

condition:
any of them
}

rule python_eval_marshal: high {
meta:
description = "evaluates code from marshalled data"

strings:
$marshal = "eval(marshal.loads"
$json = "eval(json.loads"

condition:
any of them
}

rule python_eval_gzip: high {
meta:
description = "evaluates code from gzip content"

strings:
$ref = /eval\(.{0,32}\(gzip\.decompress\(b.{0,32}/

condition:
any of them
}
34 changes: 34 additions & 0 deletions rules/anti-static/obfuscation/python/exec.yara
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
rule python_exec_hex: high {
meta:
description = "executs code from an obfuscated data stream"

strings:
$hex = /exec\(\"\\x\d{1,3}.{0,32}/
$chars = /exec\(\"\\\d{1,3}.{0,32}/

condition:
any of them
}

rule python_exec_marshal: high {
meta:
description = "evaluates code from marshalled data"

strings:
$marshal = "exec(marshal.loads"
$json = "exec(json.loads"

condition:
any of them
}

rule python_exec_gzip: high {
meta:
description = "executes code from gzip content"

strings:
$ref = /exec\(.{0,32}\(gzip\.decompress\(b.{0,32}/

condition:
any of them
}
20 changes: 20 additions & 0 deletions rules/anti-static/obfuscation/python/fernet_base64.yara
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
rule fernet_base64: high {
meta:
description = "Decodes base64, uses Fernet"
filetypes = "py"

strings:
$fernet = "Fernet" fullword
$fernet2 = "fernet" fullword
$bdecode_64 = "b64decode" fullword
$bdecode_32 = "b32decode" fullword
$o1 = "decode()"
$o2 = "decompress("
$o4 = "bytes.fromhex"
$o5 = "decrypt("
$o6 = "exec("
$o7 = "eval("

condition:
filesize < 2MB and any of ($fernet*) and any of ($bdecode*) and any of ($o*)
}
23 changes: 23 additions & 0 deletions rules/anti-static/obfuscation/python/hex.yara
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
rule python_long_hex: medium {
meta:
description = "contains a large hexadecimal string variable"
filetypes = "py"

strings:
$assign = /\w{0,16}=["'][a-z0-9]{1024}/

condition:
filesize < 1MB and $assign
}

rule python_long_hex_multiple: high {
meta:
description = "contains multiple large hexadecimal string variables"
filetypes = "py"

strings:
$assign = /\w{0,16}=["'][a-z0-9]{1024}/

condition:
filesize < 1MB and #assign > 3
}
16 changes: 16 additions & 0 deletions rules/anti-static/obfuscation/python/hex_decimal.yara
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
rule python_hex_decimal: high {
meta:
description = "contains a large amount of escaped hex/decimal content"
filetypes = "py"

strings:
$f_return = "return"
$f_decode = "decode("
$f_eval = "eval("
$f_exec = "exec("

$trash = /\\x{0,1}\d{1,3}\\/
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the closing escape correct?


condition:
filesize < 1MB and any of ($f*) and #trash in (filesize - 1024..filesize) > 100
}
12 changes: 12 additions & 0 deletions rules/anti-static/obfuscation/python/int_compares.yara
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
rule dumb_int_compares: high {
meta:
description = "compares arbitrary integers, likely encoding something"
filetypes = "py"

strings:
$import = "import" fullword
$decode_or_b64decode = /if \d{2,16} == \d{2,16}/

condition:
filesize < 1MB and all of them
}
26 changes: 26 additions & 0 deletions rules/anti-static/obfuscation/python/multi_decode.yara
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
rule multi_decode_3: high {
meta:
description = "multiple (3+) levels of decoding"
filetypes = "py"

strings:
$return = "return"
$decode_or_b64decode = /\.[b64]{0,3}decode\(.{0,256}\.[b64]{0,3}decode\(.{0,256}\.[b64]{0,3}decode/

condition:
filesize < 1MB and all of them
}

rule multi_decode: medium {
meta:
description = "multiple (2) levels of decoding"
filetypes = "py"

strings:
$return = "return"
$decode_or_b64decode = /\.[b64]{0,3}decode\(.{0,32}\.[b64]{0,3}decode\(/

condition:
filesize < 1MB and all of them
}

Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@ import "math"

private rule pythonSetup {
strings:
$i_distutils = "from distutils.core import setup"
$i_setuptools = "from setuptools import setup"
$setup = "setup("
$if_distutils = /from distutils.core import .{0,32}setup/
$if_setuptools = /from setuptools import .{0,32}setup/
$i_setuptools = "import setuptools"
$setup = "setup("

$not_setup_example = ">>> setup("
$not_setup_todict = "setup(**config.todict()"
Expand Down
20 changes: 17 additions & 3 deletions rules/anti-static/obfuscation/python/python_rename_imports.yara
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ rule rename_requests: medium {
$ref = /import requests as \w{0,64}/

condition:
filesize < 65535 and all of them
filesize < 512KB and all of them
}

rule rename_requests_2char: high {
Expand Down Expand Up @@ -49,7 +49,7 @@ rule rename_marshal: critical {
$ref = /import marshal as \w{0,64}/

condition:
filesize < 65535 and all of them
filesize < 512KB and all of them
}

rule rename_base64: critical {
Expand All @@ -63,5 +63,19 @@ rule rename_base64: critical {
$ref = /import base64 as \w{0,64}/

condition:
filesize < 65535 and all of them
filesize < 1MB and all of them
}

rule rename_zlib: high {
meta:
description = "imports 'base64' library and gives it another name"
hash_2022_very_hackerman_0_0_1_setup = "66a4a39a3c79a24bdf150cb87106920442a3db20a59810eb3e06149b028c7bff"
hash_2022_example_package_init = "5c0db191458fe648d6799d1461d20e79e65986ba6db522db3737ebbf99c577cb"
hash_2022_xoloaghvurilnh_init = "87a23edfa8fbcc13d1a25b9ac808dbc36c417fda508f98186455a7991a52b6c0"

strings:
$ref = /import zlib as \w{0,64}/

condition:
filesize < 512KB and all of them
}
12 changes: 12 additions & 0 deletions rules/anti-static/obfuscation/python/vare.yara
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
rule Vare_Obfuscator: high {
meta:
description = "obfuscated with https://github.com/saintdaddy/Vare-Obfuscator"
filetype = "py"

strings:
$var = "__VareObfuscator__"
$var2 = "Vare Obfuscator"

condition:
any of them
}
Loading