Skip to content

Commit

Permalink
Add cs_buffer. Breaking API change.
Browse files Browse the repository at this point in the history
Remove cs_disasm_iter, cs_malloc and cs_free.

API change unifies disassembly process. Before, there were two separate
functions to disassemble one instruction at a time in a loop and many
instructions into a dynamic buffer. Commit will introduce user allocatable
buffer that can be used in both situations with one function.

Updating the use of cs_disasm_iter() is a little more complicated than
cs_disasm():

    // old api
    cs_insn *insn = cs_malloc(handle);
    while (cs_disasm_iter(handle, &code, &code_size, &ip, insn)) {
        disassembled_instructions += 1;
    }
    cs_free(insn);

Must be changed to:

    // new api
    cs_buffer *buffer = cs_buffer_new(1); // create buffer with 1 element

    while (cs_disasm(handle, code, code_size, ip, 1, buffer)) {
        cs_insn *insn = &buffer->insn[0]; // get first insn in a buffer

        disassembled_instructions += 1;

        // update code pointer, code size and instruction pointer
        code += insn->size;
        code_size += insn->size;
        ip += insn->size;
    }

    cs_buffer_free(buffer); // free buffer

Updating the use of cs_disasm() is straightforward, just use
cs_buffer_new(0) to create a buffer and pass it to cs_disasm().
  • Loading branch information
numas13 committed May 25, 2024
1 parent fe60b13 commit b0e8a13
Show file tree
Hide file tree
Showing 34 changed files with 403 additions and 522 deletions.
407 changes: 135 additions & 272 deletions cs.c

Large diffs are not rendered by default.

12 changes: 9 additions & 3 deletions cstool/cstool.c
Original file line number Diff line number Diff line change
Expand Up @@ -444,7 +444,7 @@ int main(int argc, char **argv)
uint8_t *assembly;
size_t count, size;
uint64_t address = 0LL;
cs_insn *insn;
cs_buffer *buffer;
cs_err err;
cs_mode md;
cs_arch arch = CS_ARCH_ALL;
Expand Down Expand Up @@ -643,8 +643,14 @@ int main(int argc, char **argv)
cs_option(handle, CS_OPT_DETAIL, CS_OPT_DETAIL_REAL);
}

count = cs_disasm(handle, assembly, size, address, 0, &insn);
buffer = cs_buffer_new(0);
if (!buffer) {
printf("ERROR: Failed on cs_buffer_new(), quit!\n");
return -1;
}
count = cs_disasm(handle, assembly, size, address, 0, buffer);
if (count > 0) {
cs_insn *insn = buffer->insn;
for (i = 0; i < count; i++) {
int j;

Expand Down Expand Up @@ -677,12 +683,12 @@ int main(int argc, char **argv)
}
}

cs_free(insn, count);
} else {
printf("ERROR: invalid assembly code\n");
return(-4);
}

cs_buffer_free(buffer);
cs_close(&handle);
free(assembly);

Expand Down
156 changes: 72 additions & 84 deletions include/capstone/capstone.h
Original file line number Diff line number Diff line change
Expand Up @@ -487,12 +487,21 @@ typedef struct cs_insn {
/// NOTE: this macro works with position (>=1), not index
#define CS_INSN_OFFSET(insns, post) (insns[post - 1].address - insns[0].address)

/// Dynamic buffer for disassembled instructions.
typedef struct cs_buffer {
/// Dynamic array for disassembled instructions.
cs_insn *insn;
/// Capacity of @insn array.
size_t capacity;
/// The number of disassembled instructions in @insn.
size_t count;
} cs_buffer;

/// All type of errors encountered by Capstone API.
/// These are values returned by cs_errno()
typedef enum cs_err {
CS_ERR_OK = 0, ///< No error: everything was fine
CS_ERR_MEM, ///< Out-Of-Memory error: cs_open(), cs_disasm(), cs_disasm_iter()
CS_ERR_MEM, ///< Out-Of-Memory error: cs_open(), cs_disasm()
CS_ERR_ARCH, ///< Unsupported architecture: cs_open()
CS_ERR_HANDLE, ///< Invalid handle: cs_op_count(), cs_op_index()
CS_ERR_CSH, ///< Invalid csh argument: cs_close(), cs_errno(), cs_option()
Expand Down Expand Up @@ -654,106 +663,85 @@ CAPSTONE_EXPORT
const char * CAPSTONE_API cs_strerror(cs_err code);

/**
Disassemble binary code, given the code buffer, size, address and number
of instructions to be decoded.
This API dynamically allocate memory to contain disassembled instruction.
Resulting instructions will be put into @*insn
NOTE 1: this API will automatically determine memory needed to contain
output disassembled instructions in @insn.
NOTE 2: caller must free the allocated memory itself to avoid memory leaking.
NOTE 3: for system with scarce memory to be dynamically allocated such as
OS kernel or firmware, the API cs_disasm_iter() might be a better choice than
cs_disasm(). The reason is that with cs_disasm(), based on limited available
memory, we have to calculate in advance how many instructions to be disassembled,
which complicates things. This is especially troublesome for the case @count=0,
when cs_disasm() runs uncontrollably (until either end of input buffer, or
when it encounters an invalid instruction).
@handle: handle returned by cs_open()
@code: buffer containing raw binary code to be disassembled.
@code_size: size of the above code buffer.
@address: address of the first instruction in given raw code buffer.
@insn: array of instructions filled in by this API.
NOTE: @insn will be allocated by this function, and should be freed
with cs_free() API.
@count: number of instructions to be disassembled, or 0 to get all of them
Allocate dynamic buffer for instructions to be used by cs_disasm().
@return: the number of successfully disassembled instructions,
or 0 if this function failed to disassemble the given code
Use cs_buffer_free() to free memory.
On failure, call cs_errno() for error code.
@capacity: initial capacity of the buffer. Pass 0 to use default.
@return: returns a pointer to new instruction buffer.
*/
CAPSTONE_EXPORT
size_t CAPSTONE_API cs_disasm(csh handle,
const uint8_t *code, size_t code_size,
uint64_t address,
size_t count,
cs_insn **insn);
cs_buffer * CAPSTONE_API cs_buffer_new(size_t capacity);

/**
Free memory allocated by cs_malloc() or cs_disasm() (argument @insn)
Free an instruction buffer.
@insn: pointer returned by @insn argument in cs_disasm() or cs_malloc()
@count: number of cs_insn structures returned by cs_disasm(), or 1
to free memory allocated by cs_malloc().
@buffer: buffer returned by cs_buffer_new().
*/
CAPSTONE_EXPORT
void CAPSTONE_API cs_free(cs_insn *insn, size_t count);
void CAPSTONE_API cs_buffer_free(cs_buffer *buffer);

/**
Clears an instruction buffer.
cs_buffer_clear() will not free allocated memory use cs_buffer_free() instead.
@buffer: an instruction buffer create by cs_buffer_new().
*/
CAPSTONE_EXPORT
void CAPSTONE_API cs_buffer_clear(cs_buffer *buffer);

/**
Allocate memory for 1 instruction to be used by cs_disasm_iter().
Reserve exact size in an instruction buffer.
@handle: handle returned by cs_open()
@buffer: buffer returned by cs_buffer_new().
@capacity: required capacity.
NOTE: when no longer in use, you can reclaim the memory allocated for
this instruction with cs_free(insn, 1)
*/
@return: returns false if failed to allocate memory.
*/
CAPSTONE_EXPORT
cs_insn * CAPSTONE_API cs_malloc(csh handle);
bool CAPSTONE_API cs_buffer_reserve_exact(cs_buffer *buffer, size_t capacity);

/**
Fast API to disassemble binary code, given the code buffer, size, address
and number of instructions to be decoded.
This API puts the resulting instruction into a given cache in @insn.
See tests/test_iter.c for sample code demonstrating this API.
NOTE 1: this API will update @code, @size & @address to point to the next
instruction in the input buffer. Therefore, it is convenient to use
cs_disasm_iter() inside a loop to quickly iterate all the instructions.
While decoding one instruction at a time can also be achieved with
cs_disasm(count=1), some benchmarks shown that cs_disasm_iter() can be 30%
faster on random input.
NOTE 2: the cache in @insn can be created with cs_malloc() API.
NOTE 3: for system with scarce memory to be dynamically allocated such as
OS kernel or firmware, this API is recommended over cs_disasm(), which
allocates memory based on the number of instructions to be disassembled.
The reason is that with cs_disasm(), based on limited available memory,
we have to calculate in advance how many instructions to be disassembled,
which complicates things. This is especially troublesome for the case
@count=0, when cs_disasm() runs uncontrollably (until either end of input
buffer, or when it encounters an invalid instruction).
Reserve additional size in a buffer.
NOTE: cs_buffer_reserve() can reserve more capacity then requested. Use cs_buffer_reserve_exact()
if you need exact buffer capacity.
@buffer: buffer returned by cs_buffer_new().
@additional: additional capacity to allocate.
@return: returns false if failed to allocate memory.
*/
CAPSTONE_EXPORT
bool CAPSTONE_API cs_buffer_reserve(cs_buffer *buffer, size_t additional);

/**
Disassemble binary code, given the code @buffer, size, address and number
of instructions to be decoded.
This API dynamically expands @buffer to fill it with @count instructions.
Clears @buffer before filling it.
@handle: handle returned by cs_open()
@code: buffer containing raw binary code to be disassembled
@size: size of above code
@address: address of the first insn in given raw code buffer
@insn: pointer to instruction to be filled in by this API.
@code: buffer containing raw binary code to be disassembled.
@code_size: size of the above code buffer.
@address: address of the first instruction in given raw code buffer.
@count: number of instructions to be disassembled, or 0 to get all of them
@buffer: buffer filled in by this API.
@return: true if this API successfully decode 1 instruction,
or false otherwise.
@return: the number of successfully disassembled instructions,
or 0 if this function failed to disassemble the given code
On failure, call cs_errno() for error code.
*/
CAPSTONE_EXPORT
bool CAPSTONE_API cs_disasm_iter(csh handle,
const uint8_t **code, size_t *size,
uint64_t *address, cs_insn *insn);
size_t CAPSTONE_API cs_disasm(csh handle,
const uint8_t *code, size_t code_size,
uint64_t address, size_t count,
cs_buffer *buffer);


/**
Return friendly name of register in a string.
Expand Down Expand Up @@ -812,7 +800,7 @@ const char * CAPSTONE_API cs_group_name(csh handle, unsigned int group_id);
update @groups array.
@handle: handle returned by cs_open()
@insn: disassembled instruction structure received from cs_disasm() or cs_disasm_iter()
@insn: disassembled instruction structure received from cs_disasm()
@group_id: group that you want to check if this instruction belong to.
@return: true if this instruction indeed belongs to the given group, or false otherwise.
Expand All @@ -830,7 +818,7 @@ bool CAPSTONE_API cs_insn_group(csh handle, const cs_insn *insn, unsigned int gr
WARN: when in 'diet' mode, this API is irrelevant because the engine does not
update @regs_read array.
@insn: disassembled instruction structure received from cs_disasm() or cs_disasm_iter()
@insn: disassembled instruction structure received from cs_disasm()
@reg_id: register that you want to check if this instruction used it.
@return: true if this instruction indeed implicitly used the given register, or false otherwise.
Expand All @@ -848,7 +836,7 @@ bool CAPSTONE_API cs_reg_read(csh handle, const cs_insn *insn, unsigned int reg_
WARN: when in 'diet' mode, this API is irrelevant because the engine does not
update @regs_write array.
@insn: disassembled instruction structure received from cs_disasm() or cs_disasm_iter()
@insn: disassembled instruction structure received from cs_disasm()
@reg_id: register that you want to check if this instruction modified it.
@return: true if this instruction indeed implicitly modified the given register, or false otherwise.
Expand All @@ -863,7 +851,7 @@ bool CAPSTONE_API cs_reg_write(csh handle, const cs_insn *insn, unsigned int reg
NOTE: this API is only valid when detail option is ON (which is OFF by default)
@handle: handle returned by cs_open()
@insn: disassembled instruction structure received from cs_disasm() or cs_disasm_iter()
@insn: disassembled instruction structure received from cs_disasm()
@op_type: Operand type to be found.
@return: number of operands of given type @op_type in instruction @insn,
Expand All @@ -880,7 +868,7 @@ int CAPSTONE_API cs_op_count(csh handle, const cs_insn *insn, unsigned int op_ty
NOTE: this API is only valid when detail option is ON (which is OFF by default)
@handle: handle returned by cs_open()
@insn: disassembled instruction structure received from cs_disasm() or cs_disasm_iter()
@insn: disassembled instruction structure received from cs_disasm()
@op_type: Operand type to be found.
@position: position of the operand to be found. This must be in the range
[1, cs_op_count(handle, insn, op_type)]
Expand All @@ -903,7 +891,7 @@ typedef uint16_t cs_regs[64];
store registers.
@handle: handle returned by cs_open()
@insn: disassembled instruction structure returned from cs_disasm() or cs_disasm_iter()
@insn: disassembled instruction structure returned from cs_disasm()
@regs_read: on return, this array contains all registers read by instruction.
@regs_read_count: number of registers kept inside @regs_read array.
@regs_write: on return, this array contains all registers written by instruction.
Expand Down
22 changes: 12 additions & 10 deletions suite/arm/test_arm_regression.c
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ static void test_invalids()
struct invalid_instructions * invalid = NULL;

uint64_t address = 0x1000;
cs_insn *insn;
cs_buffer *buffer;
int i;
int j;
size_t count;
Expand Down Expand Up @@ -205,26 +205,28 @@ static void test_invalids()

free(hex_str);

count = cs_disasm(handle,
invalid_code->code, invalid_code->size, address, 0, &insn
);
buffer = cs_buffer_new(0);
count = cs_disasm(handle, invalid_code->code, invalid_code->size,
address, 0, buffer);

if (count) {
size_t k;
cs_insn *insn = buffer->insn;

printf(" ERROR:\n");

for (k = 0; k < count; k++) {
printf(" 0x%"PRIx64":\t%s\t%s\n",
insn[k].address, insn[k].mnemonic, insn[k].op_str);
print_insn_detail(&insn[k]);
}
cs_free(insn, count);

} else {
printf(" SUCCESS: invalid\n");
}
}

cs_buffer_free(buffer);
cs_close(&handle);
}
}
Expand Down Expand Up @@ -286,7 +288,7 @@ static void test_valids()

struct valid_instructions *valid = NULL;

cs_insn *insn;
cs_buffer *buffer;
int i;
int j;
size_t count;
Expand Down Expand Up @@ -333,15 +335,17 @@ static void test_valids()

free(hex_str);

buffer = cs_buffer_new(0);
count = cs_disasm(handle,
valid_code->code, valid_code->size,
valid_code->start_addr, 0, &insn
valid_code->start_addr, 0, buffer
);

if (count) {
size_t k;
size_t max_len = 0;
size_t tmp_len = 0;
cs_insn *insn = buffer->insn;

for (k = 0; k < count; k++) {
_this_printf(
Expand All @@ -368,14 +372,12 @@ static void test_valids()
} else {
printf(" SUCCESS: valid\n");
}

cs_free(insn, count);

} else {
printf("ERROR: invalid\n");
}
}

cs_buffer_free(buffer);
cs_close(&handle);
}

Expand Down
Loading

0 comments on commit b0e8a13

Please sign in to comment.