Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Towards improving read and rendering of results #1396

Merged
merged 24 commits into from
Mar 30, 2023
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
## master (unreleased)

### New Features
- add new format to parse output json back to capa #1396 @ooprathamm

### Breaking Changes

Expand Down
1 change: 1 addition & 0 deletions capa/features/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -448,6 +448,7 @@ def evaluate(self, ctx, **kwargs):
FORMAT_SC32 = "sc32"
FORMAT_SC64 = "sc64"
FORMAT_FREEZE = "freeze"
FORMAT_RESULT = "result"
FORMAT_UNKNOWN = "unknown"


Expand Down
9 changes: 8 additions & 1 deletion capa/features/extractors/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import capa.features.extractors.elf
import capa.features.extractors.pefile
import capa.features.extractors.strings
from capa.features.common import OS, FORMAT_PE, FORMAT_ELF, OS_WINDOWS, FORMAT_FREEZE, Arch, Format, String, Feature
from capa.features.common import OS, FORMAT_PE, FORMAT_ELF, OS_ANY, OS_WINDOWS, FORMAT_FREEZE, FORMAT_RESULT, ARCH_ANY, Arch, Format, String, Feature
from capa.features.freeze import is_freeze
from capa.features.address import NO_ADDRESS, Address, FileOffsetAddress

Expand All @@ -35,6 +35,8 @@ def extract_format(buf) -> Iterator[Tuple[Feature, Address]]:
yield Format(FORMAT_ELF), NO_ADDRESS
elif is_freeze(buf):
yield Format(FORMAT_FREEZE), NO_ADDRESS
elif buf.startswith(b"{\"meta\":"):
ooprathamm marked this conversation as resolved.
Show resolved Hide resolved
yield Format(FORMAT_RESULT), NO_ADDRESS
else:
# we likely end up here:
# 1. handling a file format (e.g. macho)
Expand All @@ -51,6 +53,9 @@ def extract_arch(buf) -> Iterator[Tuple[Feature, Address]]:
elif buf.startswith(b"\x7fELF"):
with contextlib.closing(io.BytesIO(buf)) as f:
arch = capa.features.extractors.elf.detect_elf_arch(f)

elif buf.startswith(b"{\"meta\":"):
arch = ARCH_ANY

if arch not in capa.features.common.VALID_ARCH:
logger.debug("unsupported arch: %s", arch)
Expand Down Expand Up @@ -79,6 +84,8 @@ def extract_os(buf) -> Iterator[Tuple[Feature, Address]]:
elif buf.startswith(b"\x7fELF"):
with contextlib.closing(io.BytesIO(buf)) as f:
os = capa.features.extractors.elf.detect_elf_os(f)
elif buf.startswith(b"{\"meta\":"):
os = OS_ANY

if os not in capa.features.common.VALID_OS:
logger.debug("unsupported os: %s", os)
Expand Down
27 changes: 16 additions & 11 deletions capa/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@
FORMAT_SC64,
FORMAT_DOTNET,
FORMAT_FREEZE,
FORMAT_RESULT
)
from capa.features.address import NO_ADDRESS, Address
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, FeatureExtractor
Expand Down Expand Up @@ -1123,8 +1124,12 @@ def main(argv=None):
if not (args.verbose or args.vverbose or args.json):
logger.debug("file limitation short circuit, won't analyze fully.")
return E_FILE_LIMITATION

if format_ == FORMAT_FREEZE:
if format_ == FORMAT_RESULT:
with open(args.sample, "rb") as f:
buf = f.read()
buf.decode("utf-8")
meta, capabilities = capa.render.result_document.ResultDocument.parse_raw(buf,rules)
elif format_ == FORMAT_FREEZE:
with open(args.sample, "rb") as f:
extractor = capa.features.freeze.load(f.read())
williballenthin marked this conversation as resolved.
Show resolved Hide resolved
else:
Expand Down Expand Up @@ -1154,17 +1159,17 @@ def main(argv=None):
log_unsupported_os_error()
return E_INVALID_FILE_OS

meta = collect_metadata(argv, args.sample, args.rules, extractor)
meta = collect_metadata(argv, args.sample, args.rules, extractor)

capabilities, counts = find_capabilities(rules, extractor, disable_progress=args.quiet)
meta["analysis"].update(counts)
meta["analysis"]["layout"] = compute_layout(rules, extractor, capabilities)
capabilities, counts = find_capabilities(rules, extractor, disable_progress=args.quiet)
meta["analysis"].update(counts)
meta["analysis"]["layout"] = compute_layout(rules, extractor, capabilities)

if has_file_limitation(rules, capabilities):
# bail if capa encountered file limitation e.g. a packed binary
# do show the output in verbose mode, though.
if not (args.verbose or args.vverbose or args.json):
return E_FILE_LIMITATION
if has_file_limitation(rules, capabilities):
# bail if capa encountered file limitation e.g. a packed binary
# do show the output in verbose mode, though.
if not (args.verbose or args.vverbose or args.json):
return E_FILE_LIMITATION

if args.json:
print(capa.render.json.render(meta, rules, capabilities))
Expand Down
80 changes: 79 additions & 1 deletion capa/render/result_document.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import datetime
from typing import Any, Dict, Tuple, Union, Optional
import json
from typing import Any, Dict, List, Tuple, Union, Optional

from pydantic import Field, BaseModel

Expand Down Expand Up @@ -124,6 +125,45 @@ def from_capa(cls, meta: Any) -> "Metadata":
),
)

def to_capa(self) -> Dict[str, Any]:
williballenthin marked this conversation as resolved.
Show resolved Hide resolved
capa_meta = {
"timestamp": self.timestamp.isoformat(),
"version": self.version,
"sample": {
"md5": self.sample.md5,
"sha1": self.sample.sha1,
"sha256": self.sample.sha256,
"path": self.sample.path,
},
"analysis": {
"format": self.analysis.format,
"arch": self.analysis.arch,
"os": self.analysis.os,
"extractor": self.analysis.extractor,
"rules": self.analysis.rules,
"base_address": self.analysis.base_address.to_capa(),
"layout": {
"functions": {
f.address.to_capa(): {
"matched_basic_blocks": [bb.address.to_capa() for bb in f.matched_basic_blocks]
}
for f in self.analysis.layout.functions
}
},
"feature_counts": {
"file": self.analysis.feature_counts.file,
"functions": {
fc.address.to_capa(): fc.count for fc in self.analysis.feature_counts.functions
},
},
"library_functions": {
lf.address.to_capa(): lf.name for lf in self.analysis.library_functions
},
},
}

return capa_meta


class CompoundStatementType:
AND = "and"
Expand Down Expand Up @@ -525,6 +565,8 @@ class ResultDocument(BaseModel):
def from_capa(cls, meta, rules: RuleSet, capabilities: MatchResults) -> "ResultDocument":
rule_matches: Dict[str, RuleMatches] = {}
for rule_name, matches in capabilities.items():
if rule_name not in rules:
continue
ooprathamm marked this conversation as resolved.
Show resolved Hide resolved
rule = rules[rule_name]

if rule.meta.get("capa/subscope-rule"):
Expand All @@ -540,3 +582,39 @@ def from_capa(cls, meta, rules: RuleSet, capabilities: MatchResults) -> "ResultD
)

return ResultDocument(meta=Metadata.from_capa(meta), rules=rule_matches)

@classmethod
def parse_raw(cls, raw: str, rules: RuleSet):
ooprathamm marked this conversation as resolved.
Show resolved Hide resolved
data = json.loads(raw)
result_doc = ResultDocument(**data)

capabilities: Dict[str, List[Tuple[frz.Address, capa.features.common.Result]]] ={}

for rule_name, rule_match in result_doc.rules.items():

# Extract the capabilities from the RuleMatches object
for addr, match in rule_match.matches:

if isinstance(match.node, StatementNode):
if isinstance(match.node.statement, CompoundStatement):
statement = rules[rule_name].statement
else:
statement = statement_from_capa(match.node.statement)
elif isinstance(match.node, FeatureNode):
statement = match.node.feature.to_capa()
if isinstance(statement, (capa.features.common.String, capa.features.common.Regex)):
statement.matches = match.captures
else:
raise ValueError("Invalid node type")

result = capa.features.common.Result(
statement=statement,
success=match.success,
locations=[frz.Address.to_capa(loc) for loc in match.locations],
children=[])

if rule_name not in capabilities:
capabilities[rule_name]=[]
capabilities[rule_name].append((frz.Address.from_capa(addr),result))

return result_doc.meta.to_capa(), capabilities