forked from takeruko/extract_vba_source
-
Notifications
You must be signed in to change notification settings - Fork 0
/
extract_vba_source.py
99 lines (80 loc) · 4.32 KB
/
extract_vba_source.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#!/usr/bin/env python3
# -*- encode: utf8 -*-
from shutil import rmtree
from pathlib import Path
from argparse import ArgumentParser
from oletools.olevba import VBA_Parser, VBA_Project, filter_vba
OFFICE_FILE_EXTENSIONS = (
'.xlsb', '.xls', '.xlsm', '.xla', '.xlt', '.xlam', # Excel book with macro
)
def get_args():
parser = ArgumentParser(description='Extract vba source files from an MS Office file with macro.')
parser.add_argument('sources', metavar='MS_OFFICE_FILE', type=str, nargs='+',
help='Paths to source MS Office file or directory.')
parser.add_argument('--dest', type=str, default='vba_src',
help='Destination directory path to output vba source files [default: ./vba_src].')
parser.add_argument('--orig-extension', dest='use_orig_extension', action='store_true',
help='Use an original extension (.bas, .cls, .frm) for extracted vba source files [default: use .vb].')
parser.add_argument('--src-encoding', dest='src_encoding', type=str, default='shift_jis',
help='Encoding for vba source files in an MS Office file [default: shift_jis].')
parser.add_argument('--out-encoding', dest='out_encoding', type=str, default='utf8',
help='Encoding for generated vba source files [default: utf8].')
parser.add_argument('--recursive', action='store_true',
help='Find sub directories recursively when a directory is specified as the sources parameter.')
return parser.parse_args()
def get_source_paths(sources, recursive):
for src in sources:
p = Path(src)
if p.is_dir(): # If source is a directory, then find source files under it.
for file in p.glob("**/*" if recursive else "*"):
f = Path(file)
if not f.name.startswith('~$') and f.suffix.lower() in OFFICE_FILE_EXTENSIONS:
yield f.absolute()
else: # If source is a file, then return its absolute path.
yield p.absolute()
def get_outputpath(parent_dir: Path, filename: str, use_orig_extension: bool):
extension = filename.split('.')[-1]
if extension == 'cls':
subdir = parent_dir.joinpath('class')
elif extension == 'frm':
subdir = parent_dir.joinpath('form')
else:
subdir = parent_dir.joinpath('module')
if not subdir.exists():
subdir.mkdir(parents=True, exist_ok=True)
return Path(subdir.joinpath(filename + '.vb' if not use_orig_extension else ''))
def extract_macros(parser: VBA_Parser, vba_encoding):
if parser.ole_file is None:
for subfile in parser.ole_subfiles:
for results in extract_macros(subfile, vba_encoding):
yield results
else:
parser.find_vba_projects()
for (vba_root, project_path, dir_path) in parser.vba_projects:
project = VBA_Project(parser.ole_file, vba_root, project_path, dir_path, relaxed=False)
project.codec = vba_encoding
project.parse_project_stream()
for code_path, vba_filename, code_data in project.parse_modules():
yield (vba_filename, code_data)
if __name__ == '__main__':
args = get_args()
# Get the root path of destination (if not exists then make it).
root = Path(args.dest)
if not root.exists():
root.mkdir(parents=True)
elif not root.is_dir():
raise FileExistsError
# Get the source MS Office file where extract the vba source files from.
for source in get_source_paths(args.sources, args.recursive):
src = Path(source)
basename = src.stem
dest = Path(root.joinpath(basename))
dest.mkdir(parents=True, exist_ok=True)
rmtree(dest.absolute())
print('Extract vba files from {source} to {dest}'.format(source=source, dest=dest))
# Extract vba source files from the MS Office file and save each vba file into the sub directory as of its MS Office file name.
vba_parser = VBA_Parser(src)
for vba_filename, vba_code in extract_macros(vba_parser, args.src_encoding):
vba_file = get_outputpath(dest, vba_filename, args.use_orig_extension)
vba_file.write_text(filter_vba(vba_code), encoding=args.out_encoding)
print('[{basename}] {vba_file} is generated.'.format(basename=basename, vba_file=vba_file))