-
Notifications
You must be signed in to change notification settings - Fork 28
/
SacaraAssembler.fs
419 lines (362 loc) · 18.1 KB
/
SacaraAssembler.fs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
namespace ES.Sacara.Ir.Assembler
open System
open System.Reflection
open System.Text.RegularExpressions
open System.Collections.Generic
open ES.Sacara.Ir.Parser
open ES.Sacara.Ir.Parser.IrAst
open ES.Sacara.Ir.Core
open ES.Sacara.Ir.Obfuscator
open System.IO
type IrAssemblyCode = {
Functions: VmFunction list
Warnings: String list
} with
member this.GetBuffer() =
this.Functions
|> List.map(fun vmFunction -> vmFunction.Body)
|> List.concat
|> List.map(fun vmOpCode -> vmOpCode.Buffer)
|> Array.concat
override this.ToString() =
this.Functions
|> List.map(fun vmFunction -> vmFunction.Body)
|> List.concat
|> List.map(string)
|> fun l -> String.Join(Environment.NewLine, l)
type SacaraAssembler(settings: AssemblerSettings) =
let mutable _currentFunction = new IrFunction(String.Empty)
let mutable _functions = new List<IrFunction>()
let mutable _currentLabel: String option = None
let mutable _currentIp = 0
let addOperation(opCode: IrOpCode) =
if _currentLabel.IsSome then
opCode.Label <- _currentLabel
_currentLabel <- None
_currentFunction.Body.Add(opCode)
let rec parseOperationExpression(expression: Expression) =
match expression with
| Number num ->
new Operand(num.Value) |> Some
| Identifier identifier ->
new Operand(identifier.Name) |> Some
| StatementExpression statement ->
parseStatement(statement)
None
and parseStatement(statement: Statement) =
match statement with
| Statement.Procedure procType ->
_currentFunction <- new IrFunction(procType.Name)
_functions.Add(_currentFunction)
procType.Body |> List.iter(parseStatement)
| Statement.Push pushType ->
parseOperationExpression(pushType.Operand)
|> Option.iter(fun op ->
let push = new IrOpCode(IrInstruction.Push, settings.UseMultipleOpcodeForSameInstruction)
push.Operands.Add(op)
addOperation(push)
)
| Statement.Pop popType ->
let pop = new IrOpCode(IrInstruction.Pop, settings.UseMultipleOpcodeForSameInstruction)
pop.Operands.Add(new Operand(popType.Identifier))
addOperation(pop)
| Statement.Label labelType ->
_currentLabel <- Some labelType.Name
parseStatement(labelType.Statement)
| Statement.Call callType ->
addOperation(new IrOpCode((if callType.Native then IrInstruction.NativeCall else IrInstruction.Call), settings.UseMultipleOpcodeForSameInstruction))
| Statement.Read readType ->
addOperation(new IrOpCode((if readType.Native then IrInstruction.NativeRead else IrInstruction.Read), settings.UseMultipleOpcodeForSameInstruction))
| Statement.Write writeType ->
addOperation(new IrOpCode((if writeType.Native then IrInstruction.NativeWrite else IrInstruction.Write), settings.UseMultipleOpcodeForSameInstruction))
| Statement.Nop ->
addOperation(new IrOpCode(IrInstruction.Nop, settings.UseMultipleOpcodeForSameInstruction))
| Statement.GetIp ->
addOperation(new IrOpCode(IrInstruction.GetIp, settings.UseMultipleOpcodeForSameInstruction))
| Statement.Add ->
addOperation(new IrOpCode(IrInstruction.Add, settings.UseMultipleOpcodeForSameInstruction))
| Statement.Ret ->
addOperation(new IrOpCode(IrInstruction.Ret, settings.UseMultipleOpcodeForSameInstruction))
| Statement.JumpIf jumpIfType ->
let opCode =
match (jumpIfType.JumpIfEquals, jumpIfType.JumpIfLess) with
| (true, true) -> IrInstruction.JumpIfLessEquals
| (true, false) -> IrInstruction.JumpIfGreaterEquals
| (false, true) -> IrInstruction.JumpIfLess
| (false, false) -> IrInstruction.JumpIfGreater
addOperation(new IrOpCode(opCode, settings.UseMultipleOpcodeForSameInstruction))
| Statement.Jump ->
addOperation(new IrOpCode(IrInstruction.Jump, settings.UseMultipleOpcodeForSameInstruction))
| Statement.Empty -> ()
| Statement.Alloca ->
addOperation(new IrOpCode(IrInstruction.Alloca, settings.UseMultipleOpcodeForSameInstruction))
| Statement.Byte b ->
let byte = new IrOpCode(IrInstruction.Byte, settings.UseMultipleOpcodeForSameInstruction)
byte.Operands.Add(new Operand(b))
addOperation(byte)
| Statement.Word w ->
let word = new IrOpCode(IrInstruction.Word, settings.UseMultipleOpcodeForSameInstruction)
word.Operands.Add(new Operand(w))
addOperation(word)
| Statement.DoubleWord dw ->
let dword = new IrOpCode(IrInstruction.DoubleWord, settings.UseMultipleOpcodeForSameInstruction)
dword.Operands.Add(new Operand(dw))
addOperation(dword)
| Statement.Halt ->
addOperation(new IrOpCode(IrInstruction.Halt, settings.UseMultipleOpcodeForSameInstruction))
| Statement.Cmp ->
addOperation(new IrOpCode(IrInstruction.Cmp, settings.UseMultipleOpcodeForSameInstruction))
| Statement.GetSp ->
addOperation(new IrOpCode(IrInstruction.GetSp, settings.UseMultipleOpcodeForSameInstruction))
| Statement.StackRead ->
addOperation(new IrOpCode(IrInstruction.StackRead, settings.UseMultipleOpcodeForSameInstruction))
| Statement.StackWrite ->
addOperation(new IrOpCode(IrInstruction.StackWrite, settings.UseMultipleOpcodeForSameInstruction))
| Statement.Sub ->
addOperation(new IrOpCode(IrInstruction.Sub, settings.UseMultipleOpcodeForSameInstruction))
| Statement.Mul ->
addOperation(new IrOpCode(IrInstruction.Mul, settings.UseMultipleOpcodeForSameInstruction))
| Statement.Div ->
addOperation(new IrOpCode(IrInstruction.Div, settings.UseMultipleOpcodeForSameInstruction))
| Statement.And ->
addOperation(new IrOpCode(IrInstruction.And, settings.UseMultipleOpcodeForSameInstruction))
| Statement.Or ->
addOperation(new IrOpCode(IrInstruction.Or, settings.UseMultipleOpcodeForSameInstruction))
| Statement.Not ->
addOperation(new IrOpCode(IrInstruction.Not, settings.UseMultipleOpcodeForSameInstruction))
| Statement.Xor ->
addOperation(new IrOpCode(IrInstruction.Xor, settings.UseMultipleOpcodeForSameInstruction))
| Statement.Nor ->
addOperation(new IrOpCode(IrInstruction.Nor, settings.UseMultipleOpcodeForSameInstruction))
| Statement.ShiftLeft ->
addOperation(new IrOpCode(IrInstruction.ShiftLeft, settings.UseMultipleOpcodeForSameInstruction))
| Statement.ShiftRight ->
addOperation(new IrOpCode(IrInstruction.ShiftRight, settings.UseMultipleOpcodeForSameInstruction))
| Statement.SetIp ->
addOperation(new IrOpCode(IrInstruction.SetIp, settings.UseMultipleOpcodeForSameInstruction))
| Statement.SetSp ->
addOperation(new IrOpCode(IrInstruction.SetSp, settings.UseMultipleOpcodeForSameInstruction))
| Statement.Inc ->
addOperation(new IrOpCode(IrInstruction.Inc, settings.UseMultipleOpcodeForSameInstruction))
| Statement.Block statementList ->
statementList |> Seq.iter(parseStatement)
| Statement.IncludeFile fileName ->
includeFile(fileName)
| Statement.Mod ->
addOperation(new IrOpCode(IrInstruction.Mod, settings.UseMultipleOpcodeForSameInstruction))
and includeFile(rawFileName: String) =
let fileName = rawFileName.Replace('/', Path.DirectorySeparatorChar).Replace('\\', Path.DirectorySeparatorChar)
let fullPath = Path.Combine(Path.GetDirectoryName(Assembly.GetEntryAssembly().Location), fileName)
if not <| File.Exists(fullPath) then
failwith(String.Format("Filename to include '{0}' not found.", fullPath))
let irCode = File.ReadAllText(fullPath)
parseCode(irCode)
and parseAst(ast: Program) =
match ast with
| Program sl -> sl |> List.iter(parseStatement)
and parseCode(irCode: String) =
let astBuilder = new SacaraAstBuilder()
let ast = astBuilder.Parse(irCode)
parseAst(ast)
let assemblyIrOpCode(symbolTable: SymbolTable) (opCode: IrOpCode) =
if opCode.Label.IsSome then
symbolTable.AddLabel(opCode.Label.Value, _currentIp)
let vmOpCode = opCode.Assemble(_currentIp, symbolTable)
_currentIp <- _currentIp + vmOpCode.Buffer.Length
vmOpCode
let obfuscate(vmFunctions: VmFunction list, settings: AssemblerSettings) =
vmFunctions
|> List.iter(fun irFunction ->
irFunction.Body
|> List.iter(fun vmOpCode ->
// encrypt the opcode if necessary
if settings.RandomlyEncryptOpCode then
Engines.encryptVmOpCode(vmOpCode)
// encrypt operands if necessary
if settings.EncryptOperands then
Engines.encryptVmOperands(vmOpCode)
)
)
let assemblyFunctionBody(irFunctionBody: IrOpCode seq, symbolTable: SymbolTable, settings: AssemblerSettings) =
irFunctionBody
|> Seq.map(assemblyIrOpCode(symbolTable))
|> Seq.toList
let addAllocaInstruction(symbolTable: SymbolTable, opCodes: IrOpCode list) =
let allVariables = new HashSet<String>()
// extract all local variables
let opCodeAcceptingVariables = [
IrInstruction.Push
IrInstruction.Pop
]
opCodes
|> Seq.filter(fun opCode -> opCodeAcceptingVariables |> List.contains opCode.Type)
|> Seq.iter(fun opCode ->
opCode.Operands
|> Seq.iter(fun operand ->
match operand.Value with
| :? String ->
if not(symbolTable.IsLabel(operand.Value.ToString())) then
allVariables.Add(operand.Value.ToString()) |> ignore
| _ -> ()
)
)
// create alloca instruction
if allVariables.Count > 0 then
let pushInstr = new IrOpCode(IrInstruction.Push, settings.UseMultipleOpcodeForSameInstruction)
pushInstr.Operands.Add(new Operand(allVariables.Count))
let allocaInstr = new IrOpCode(IrInstruction.Alloca, settings.UseMultipleOpcodeForSameInstruction)
[pushInstr;allocaInstr]@opCodes
else
opCodes
let parseIdentifierName(rawName: Object) =
let rawName = rawName.ToString()
let m = Regex.Match(rawName.ToString(), "^([0-9]+)#(.+)")
if m.Success then
let index = Int32.Parse(m.Groups.[1].Value)
let name = m.Groups.[2].Value
(name, Some index)
else
(rawName, None)
let getLocalVariables(irFunction: IrFunction, symbolTable: SymbolTable) =
irFunction.Body
|> Seq.collect(fun irCode -> irCode.Operands)
|> Seq.filter(fun operand ->
operand.IsIdentifier && not(symbolTable.IsLabel(operand.Value.ToString()))
)
|> Seq.distinctBy(fun op -> op.Value.ToString())
|> Seq.toList
let sortAllVariables(localVariables: Operand list, sortedIndexedVariables: IDictionary<String, Operand>) =
let mutable indexedOffset = 0
let keys = sortedIndexedVariables.Keys |> Seq.toList
let returnedOperands = new Dictionary<String, Operand>()
localVariables
|> Seq.toList
|> List.map(fun op -> (op, parseIdentifierName(op)))
|> List.map(fun (op, (name, _)) ->
match sortedIndexedVariables.TryGetValue(name) with
| (true, _) ->
match returnedOperands.TryGetValue(name) with
| (true, operand) -> operand
| _ ->
let key = keys.[indexedOffset]
indexedOffset <- indexedOffset + 1
let operand = sortedIndexedVariables.[key]
returnedOperands.[name] <- operand
operand
| _ ->
op
)
|> List.distinctBy(fun op -> op.Value)
let sortIndexedVariables(localVariables: Operand list) =
localVariables
|> List.map(fun op -> (op, parseIdentifierName(op)))
|> List.filter(fun (_, (_, index)) -> index.IsSome)
|> List.sortBy(fun (_, (_, index)) -> index.Value)
|> List.map(fun (op, (name, _)) -> (name, op))
|> dict
let rewriteLocalVariableOffset(sortedVariables: Operand list, irFunction: IrFunction) =
irFunction.Body
|> Seq.collect(fun irOpcode -> irOpcode.Operands)
|> Seq.iter(fun op ->
let (name, _) = parseIdentifierName(op)
sortedVariables
|> List.iteri(fun index sop ->
let (sname, _) = parseIdentifierName(sop)
if name.Equals(sname, StringComparison.Ordinal) then
let offsetName = String.Format("{0}#{1}", index, sname)
op.Value <- offsetName
)
)
let generateFunctionVmOpCodes(symbolTable: SymbolTable, settings: AssemblerSettings) (irFunction: IrFunction) =
symbolTable.StartFunction()
// the analyzed function is a symbol, this will ensure that instruction like call foo, will be correctly assembled
symbolTable.AddLabel(irFunction.Name, _currentIp)
// correctly set offset for local variables
let localVariables = getLocalVariables(irFunction, symbolTable)
let sortedIndexedVariables = sortIndexedVariables(localVariables)
let sortedVariables = sortAllVariables(localVariables, sortedIndexedVariables)
rewriteLocalVariableOffset(sortedVariables, irFunction)
let rawBody =
if settings.UseNorOperator
then Engines.reWriteInstructionWithNorOperator(irFunction.Body, settings.UseMultipleOpcodeForSameInstruction) |> Seq.toList
else irFunction.Body |> Seq.toList
// add alloca instruction to compute space for local variables
let fullBody = addAllocaInstruction(symbolTable, rawBody)
// proceed to assemble VM opcodes
{Body=assemblyFunctionBody(fullBody, symbolTable, settings)}
let orderFunctions(functions: List<IrFunction>, settings: AssemblerSettings) =
let entryPointFunction = functions |> Seq.find(fun f -> f.IsEntryPoint())
let otherFunctions =
if settings.ReorderFunctions then
let rnd = new Random()
functions
|> Seq.filter(fun f -> not(f.Name.Equals("main", StringComparison.OrdinalIgnoreCase)))
|> Seq.sortBy(fun _ -> rnd.Next())
|> Seq.toList
else
functions
|> Seq.filter(fun f -> not(f.Name.Equals("main", StringComparison.OrdinalIgnoreCase)))
|> Seq.toList
entryPointFunction::otherFunctions
let addLabelNamesToSymbolTable(symbolTable: SymbolTable, functions: List<IrFunction>) =
functions
|> Seq.iter(fun irFunction ->
symbolTable.AddLabelName(irFunction.Name)
irFunction.Body
|> Seq.filter(fun irOpCode -> irOpCode.Label.IsSome)
|> Seq.iter(fun irOpCode -> symbolTable.AddLabelName(irOpCode.Label.Value))
)
let hasReturnOrHalt(irFun: IrFunction) =
if irFun.Body |> Seq.exists(fun opCode -> [IrInstruction.Ret; IrInstruction.Halt] |> List.contains opCode.Type)
then List.empty
else [(String.Format("Function '{0}' doesn't contains a RET or HALT instruction, this may cause an infinite loop", irFun.Name))]
let checkFunctionForWarnings(warnings: String list) (irFunction: IrFunction) =
hasReturnOrHalt(irFunction)@
warnings
let checkForWarnings(functions: List<IrFunction>) =
functions
|> Seq.toList
|> List.fold(checkFunctionForWarnings) List.empty
new() = new SacaraAssembler(new AssemblerSettings())
member private this.GenerateIrAssemblyCode() = {
Functions=this.GenerateBinaryCode(_functions)
Warnings = checkForWarnings(_functions)
}
member this.GenerateBinaryCode(functions: List<IrFunction>) =
let symbolTable = new SymbolTable()
// add all function names and labels to the symbol table, in order to be
// able to correctly assemble specific VM opCode
addLabelNamesToSymbolTable(symbolTable, functions)
// assemble the code
let vmFunctions =
orderFunctions(functions, settings)
|> Seq.map(generateFunctionVmOpCodes(symbolTable, settings))
|> Seq.toList
// fix the offset
symbolTable.FixPlaceholders(vmFunctions)
// obfuscate
obfuscate(vmFunctions, settings)
vmFunctions
member this.Assemble(instructions: Action<Ctx> array) =
_functions.Clear()
_currentIp <- 0
let ctx = new Ctx(Settings=settings)
// complete all instructions in the given context
instructions
|> Seq.iter(fun irFunction -> irFunction.Invoke(ctx))
_functions <-
new List<IrFunction>(ctx.Functions
|> Seq.map(fun kv ->
let (funcName, funOpCodes) = (kv.Key, kv.Value |> Seq.filter(Option.isSome) |> Seq.map(Option.get))
let irFunction = new IrFunction(funcName)
irFunction.Body.AddRange(funOpCodes)
irFunction
))
// generate VM opcode
this.GenerateIrAssemblyCode()
member this.Assemble(irCode: String) =
_functions.Clear()
_currentIp <- 0
parseCode(irCode)
// generate VM opcode
this.GenerateIrAssemblyCode()