Skip to content

Commit

Permalink
olevba: enabled relaxed mode by default (issues decalage2#477, decala…
Browse files Browse the repository at this point in the history
…ge2#593), fixed detect_vba_macros to always return VBA code as unicode on Python 3 (issues  decalage2#455, decalage2#477, decalage2#587, decalage2#593)
  • Loading branch information
decalage2 authored and c-rosenberg committed Sep 17, 2020
1 parent 28955fe commit cf96444
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 10 deletions.
33 changes: 24 additions & 9 deletions oletools/olevba.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,7 @@
import email # for MHTML parsing
import string # for printable
import json # for json output mode (argument --json)
import chardet

# import lxml or ElementTree for XML parsing:
try:
Expand Down Expand Up @@ -1650,7 +1651,7 @@ class VBA_Project(object):
metadata and VBA modules.
"""

def __init__(self, ole, vba_root, project_path, dir_path, relaxed=False):
def __init__(self, ole, vba_root, project_path, dir_path, relaxed=True):
"""
Extract VBA macros from an OleFileIO object.
Expand Down Expand Up @@ -2070,7 +2071,7 @@ def decode_bytes(self, bytes_string, errors='replace'):



def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False):
def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=True):
"""
Extract VBA macros from an OleFileIO object.
Internal function, do not call directly.
Expand Down Expand Up @@ -2692,7 +2693,7 @@ def __init__(self, filename, data=None, container=None, relaxed=True, encoding=D
self.vba_forms = None
self.contains_macros = None # will be set to True or False by detect_macros
self.vba_code_all_modules = None # to store the source code of all modules
# list of tuples for each module: (subfilename, stream_path, vba_filename, vba_code)
# list of tuples for each module: (subfilename, stream_path, vba_filename, vba_code, vba_encoding)
self.modules = None
# Analysis results: list of tuples (type, keyword, description) - See VBA_Scanner
self.analysis_results = None
Expand Down Expand Up @@ -3432,9 +3433,23 @@ def extract_macros(self):
log.debug('Found VBA compressed code at index %X' % start)
compressed_code = data[start:]
try:
vba_code = bytes2str(decompress_stream(bytearray(compressed_code)))
# TODO vba_code = self.encode_string(vba_code)
yield (self.filename, d.name, d.name, vba_code)
vba_code_bytes = decompress_stream(bytearray(compressed_code))
# vba_code_bytes is in bytes, we need to convert it to str
# but here we don't know the encoding of the VBA project
# (for example code page 1252 or 1251), because it's in the
# VBA_Project class and if we're here it may be because
# the VBA project parsing failed (e.g. issue #593).
# So let's convert using cp1252 as a guess
# TODO get the actual encoding from the VBA_Project
chardet_result = chardet.detect(vba_code_bytes)

if chardet_result['confidence'] > 0.6:
charenc = chardet_result['encoding']
else:
charenc = 'cp1252'

vba_code_str = bytes2str(vba_code_bytes, charenc)
yield (self.filename, d.name, d.name, vba_code_str, chardet_result['encoding'], chardet_result['confidence'])
except Exception as exc:
# display the exception with full stack trace for debugging
log.debug('Error processing stream %r in file %r (%s)' % (d.name, self.filename, exc))
Expand Down Expand Up @@ -3465,7 +3480,7 @@ def extract_all_macros(self):
if self.modules is None:
self.modules = []
for (subfilename, stream_path, vba_filename, vba_code) in self.extract_macros():
self.modules.append((subfilename, stream_path, vba_filename, vba_code))
self.modules.append((subfilename, stream_path, vba_filename, vba_code, vba_encoding))
self.nb_macros = len(self.modules)
return self.modules

Expand Down Expand Up @@ -3980,7 +3995,7 @@ def process_file(self, show_decoded_strings=False,
# run analysis before displaying VBA code, in order to colorize found keywords
self.run_analysis(show_decoded_strings=show_decoded_strings, deobfuscate=deobfuscate)
#print 'Contains VBA Macros:'
for (subfilename, stream_path, vba_filename, vba_code) in self.extract_all_macros():
for (subfilename, stream_path, vba_filename, vba_code, vba_encoding) in self.extract_all_macros():
if hide_attributes:
# hide attribute lines:
vba_code_filtered = filter_vba(vba_code)
Expand Down Expand Up @@ -4109,7 +4124,7 @@ def process_file_json(self, show_decoded_strings=False,
result['type'] = self.type
macros = []
if self.detect_vba_macros():
for (subfilename, stream_path, vba_filename, vba_code) in self.extract_all_macros():
for (subfilename, stream_path, vba_filename, vba_code, vba_encoding) in self.extract_all_macros():
curr_macro = {}
if hide_attributes:
# hide attribute lines:
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
#--- METADATA -----------------------------------------------------------------

name = "oletools"
version = '0.55'
version = '0.55-heinlein-testing'
desc = "Python tools to analyze security characteristics of MS Office and OLE files (also called Structured Storage, Compound File Binary Format or Compound Document File Format), for Malware Analysis and Incident Response #DFIR"
long_desc = open('oletools/README.rst').read()
author = "Philippe Lagadec"
Expand Down

0 comments on commit cf96444

Please sign in to comment.