From 3c0a1e2c6640fab2444b0250312b660ddb685c45 Mon Sep 17 00:00:00 2001 From: Ali Rizvi-Santiago Date: Sun, 22 May 2022 19:38:36 -0500 Subject: [PATCH 1/7] internal/interface: corrected the implementation of the `typemap.dissolve` function so that it handles `type` correctly. --- base/_interface.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/base/_interface.py b/base/_interface.py index 0b4408b345..2e7f974988 100644 --- a/base/_interface.py +++ b/base/_interface.py @@ -70,9 +70,9 @@ class typemap(object): and thus can be used to quickly read or apply a type to a field within a structure. """ - + MS_0TYPE, MS_1TYPE = idaapi.MS_0TYPE, idaapi.MS_1TYPE FF_MASKSIZE = idaapi.as_uint32(idaapi.DT_TYPE) # Mask that select's the flag's size - FF_MASK = FF_MASKSIZE | 0xfff00000 # Mask that select's the flag's repr + FF_MASK = FF_MASKSIZE | MS_0TYPE | MS_1TYPE # Mask that select's the flag's repr # FIXME: In some cases FF_nOFF (where n is 0 or 1) does not actually # get auto-treated as an pointer by ida. Instead, it appears to @@ -155,8 +155,12 @@ class typemap(object): inverted[f & FF_MASKSIZE] = (float, s) for s, (f, _) in stringmap.items(): inverted[f & FF_MASKSIZE] = (str, s) + + # Add all the available flag types to support all available pointer types. for s, (f, _) in ptrmap.items(): inverted[f & FF_MASK] = (type, s) + inverted[f & FF_MASK & ~MS_0TYPE] = (type, s) + inverted[f & FF_MASK & ~MS_1TYPE] = (type, s) del f # FIXME: this is a hack for dealing with structures that @@ -189,32 +193,33 @@ def dissolve(cls, flag, typeid, size): '''Convert the specified `flag`, `typeid`, and `size` into a pythonic type.''' structure = sys.modules.get('structure', __import__('structure')) FF_STRUCT = idaapi.FF_STRUCT if hasattr(idaapi, 'FF_STRUCT') else idaapi.FF_STRU - dt = flag & cls.FF_MASKSIZE + dtype, dsize = flag & cls.FF_MASK, flag & cls.FF_MASKSIZE sf = -1 if flag & idaapi.FF_SIGN == idaapi.FF_SIGN else +1 # Check if the dtype is a structure and our type-id is an integer so that we # figure out the structure's size. We also do an explicit check if the type-id # is a structure because in some cases, IDA will forget to set the FF_STRUCT # flag but still assign the structure type-id to a union member. - if (dt == FF_STRUCT and isinstance(typeid, six.integer_types)) or (typeid is not None and structure.has(typeid)): + if (dtype == FF_STRUCT and isinstance(typeid, six.integer_types)) or (typeid is not None and structure.has(typeid)): # FIXME: figure out how to fix this recursive module dependency t = structure.by_identifier(typeid) sz = t.size return t if sz == size else [t, size // sz] # Verify that we actually have the datatype mapped and that we can look it up. - if dt not in cls.inverted: - raise internal.exceptions.InvalidTypeOrValueError(u"{:s}.dissolve({!r}, {!r}, {!r}) : Unable to locate a pythonic type that matches the specified flag.".format('.'.join([__name__, cls.__name__]), dt, typeid, size)) + if all(item not in cls.inverted for item in [dsize, dtype]): + raise internal.exceptions.InvalidTypeOrValueError(u"{:s}.dissolve({!r}, {!r}, {!r}) : Unable to locate a pythonic type that matches the specified flag.".format('.'.join([__name__, cls.__name__]), dtype, typeid, size)) # Now that we know the datatype exists, extract the actual type and the - # type's size from the inverted map that we previously created. - t, sz = cls.inverted[dt] + # type's size from the inverted map that we previously created. We start + # by checking the dtype first for pointers and then fall back to the size. + t, sz = cls.inverted[dtype] if dtype in cls.inverted else cls.inverted[dsize] # If the datatype size is not an integer, then we need to calculate the # size ourselves using the size parameter we were given and the element # size of the datatype that we extracted from the flags. if not isinstance(sz, six.integer_types): - count = size // idaapi.get_data_elsize(idaapi.BADADDR, dt, idaapi.opinfo_t()) + count = size // idaapi.get_data_elsize(idaapi.BADADDR, dtype, idaapi.opinfo_t()) return [t, count] if count > 1 else t # If the size matches the datatype size, then this is a single element From a4ff868db98e8966054399103ee2c77ab593b08d Mon Sep 17 00:00:00 2001 From: Ali Rizvi-Santiago Date: Mon, 23 May 2022 01:28:36 -0500 Subject: [PATCH 2/7] internal/interface: tweaked the `typemap.ptrmap` dictionary so that the returned `opinfo_t.tid` is 0 instead of `BADADDR` to avoid a warning. --- base/_interface.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/base/_interface.py b/base/_interface.py index 2e7f974988..28d19b43a2 100644 --- a/base/_interface.py +++ b/base/_interface.py @@ -106,7 +106,7 @@ class typemap(object): if hasattr(builtins, 'unicode'): stringmap.setdefault(builtins.unicode, (idaapi.asciflag(), idaapi.ASCSTR_UNICODE)) - ptrmap = { sz : (idaapi.offflag() | flg, tid) for sz, (flg, tid) in integermap.items() } + ptrmap = { sz : (idaapi.offflag() | flg, 0) for sz, (flg, _) in integermap.items() } nonemap = { None :(idaapi.alignflag(), -1) } ## IDA 7.0 types @@ -133,7 +133,7 @@ class typemap(object): if hasattr(builtins, 'unicode'): stringmap.setdefault(builtins.unicode, (idaapi.strlit_flag(), idaapi.STRTYPE_C_16)) - ptrmap = { sz : (idaapi.off_flag() | flg, tid) for sz, (flg, tid) in integermap.items() } + ptrmap = { sz : (idaapi.off_flag() | flg, 0) for sz, (flg, _) in integermap.items() } nonemap = { None :(idaapi.align_flag(), -1) } # Generate the lookup table for looking up the correct tables for a given type. From eb16e0a09b59894b7ce73ddf4c987299b1f0bfe7 Mon Sep 17 00:00:00 2001 From: Ali Rizvi-Santiago Date: Mon, 23 May 2022 01:31:54 -0500 Subject: [PATCH 3/7] internal/interface: fixed the inversion map used by the `typemap.dissolve` function to allow representing alignment with `None`. --- base/_interface.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/base/_interface.py b/base/_interface.py index 28d19b43a2..3e19268d49 100644 --- a/base/_interface.py +++ b/base/_interface.py @@ -155,6 +155,8 @@ class typemap(object): inverted[f & FF_MASKSIZE] = (float, s) for s, (f, _) in stringmap.items(): inverted[f & FF_MASKSIZE] = (str, s) + for s, (f, _) in nonemap.items(): + inverted[f & FF_MASKSIZE] = (None, s) # Add all the available flag types to support all available pointer types. for s, (f, _) in ptrmap.items(): From 572f1cecd2720c73a732a016b07f138f850d21f9 Mon Sep 17 00:00:00 2001 From: Ali Rizvi-Santiago Date: Thu, 26 May 2022 14:32:13 -0500 Subject: [PATCH 4/7] internal/interface: implemented the `typemap.refinfo` function for updating the reference information for an identifier. --- base/_interface.py | 52 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 48 insertions(+), 4 deletions(-) diff --git a/base/_interface.py b/base/_interface.py index 3e19268d49..8655009bcf 100644 --- a/base/_interface.py +++ b/base/_interface.py @@ -74,10 +74,6 @@ class typemap(object): FF_MASKSIZE = idaapi.as_uint32(idaapi.DT_TYPE) # Mask that select's the flag's size FF_MASK = FF_MASKSIZE | MS_0TYPE | MS_1TYPE # Mask that select's the flag's repr - # FIXME: In some cases FF_nOFF (where n is 0 or 1) does not actually - # get auto-treated as an pointer by ida. Instead, it appears to - # only get marked as an "offset" and rendered as an integer. - # FIXME: Figure out how to update this to use/create an idaapi.tinfo_t() # and also still remain backwards-compatible with the older idaapi.opinfo_t() @@ -169,6 +165,12 @@ class typemap(object): # have the flag set but aren't actually structures.. inverted[idaapi.FF_STRUCT if hasattr(idaapi, 'FF_STRUCT') else idaapi.FF_STRU] = (int, 1) + # refinfo map for the sizes (IDA 6.9 uses the same names) + refinfomap = { + 1 : idaapi.REF_OFF8, 2 : idaapi.REF_OFF16, + 4 : idaapi.REF_OFF32, 8 : idaapi.REF_OFF64, + } + # Assign the default values for the processor that was selected for the database. @classmethod def __newprc__(cls, pnum): @@ -298,6 +300,48 @@ def resolve(cls, pythonType): typeid = idaapi.BADADDR if typeid < 0 else typeid return flag | (idaapi.FF_SIGN if sz < 0 else 0), typeid, abs(sz) * count + @classmethod + def update_refinfo(cls, identifier, flag): + '''This updates the refinfo for the given `identifer` according to the provided `flag`.''' + get_refinfo = (lambda ri, ea, opnum: idaapi.get_refinfo(ea, opnum, ri)) if idaapi.__version__ < 7.0 else idaapi.get_refinfo + set_refinfo, opmasks = idaapi.set_refinfo, [idaapi.FF_0OFF, idaapi.FF_1OFF] + + # Refinfo seems to be relevant to a given operand, but users really only + # apply types to addresse unless it's an explicit operand type. So, what + # we'll do to deal with this is take the flag that we're given and use + # it to figure out which actual operand is being updated so that we don't + # have to assume the one that IDA uses based on whatever's being updated. + dtype, dsize = flag & cls.FF_MASK, flag & cls.FF_MASKSIZE + + # First we'll grab the size and make sure that we actually support it. + # We should.. because we support all of IDA's native types. Then we + # generate a list of all of the available operands to apply the ref to. + _, size = cls.inverted[dsize] + ptrmask, _ = cls.ptrmap[size] + operands = [index for index, opmask in enumerate([idaapi.FF_0OFF, idaapi.FF_1OFF]) if dtype & ptrmask & opmask] + + # Before we change anything, do a smoke-test to ensure that we actually + # are able to choose a default reference size if we're going to update. + if len(operands) > 0 and size not in cls.refinfomap: + logging.warning(u"{:s}.refinfo({:#x}, {:#x}) : Unable to determine a default reference type for the given size ({:d}).".format('.'.join([__name__, cls.__name__]), identifier, flag, size)) + return 0 + + # Now we can choose our type from the refinfomap, and apply it to each + # operand in our list of operands that we just resolved. The set_refinfo + # api should _never_ fail, so we only log warnings if they do. + api = [set_refinfo.__module__, set_refinfo.__name__] if hasattr(set_refinfo, '__module__') else [set_refinfo.__name__] + for opnum in operands: + if not set_refinfo(identifier, opnum, cls.refinfomap[size]): + logging.warning(u"{:s}.refinfo({:#x}, {:#x}) : The api call to `{:s}(ea={:#x}, n={:d}, ri={:d})` returned failure.".format('.'.join([__name__, cls.__name__]), identifier, flag, '.'.join(api), identifier, opnum, cls.refinfomap[size])) + continue + + # FIXME: figure out how to update the ui so that it references the new + # information but without any dumb performance issues (that might + # be caused by asking it to redraw everything). + + # Just return the total number of operands that we updated...for now. + return len(operands) + class prioritybase(object): result = type('result', (object,), {}) CONTINUE = type('continue', (result,), {})() From 72344d0fb47250e54a2aed873365ffa968bf7b86 Mon Sep 17 00:00:00 2001 From: Ali Rizvi-Santiago Date: Thu, 26 May 2022 15:10:25 -0500 Subject: [PATCH 5/7] base/structure: fixed both the `members_t` and `member_t` classes so that their reference info is updated. --- base/structure.py | 44 +++++++++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/base/structure.py b/base/structure.py index 2e85d40da8..5d3076909e 100644 --- a/base/structure.py +++ b/base/structure.py @@ -1504,17 +1504,10 @@ def add(self, name, type, offset): # parameters that we were given and/or figured out. res = idaapi.add_struc_member(owner.ptr, utils.string.to(name), realoffset, flag, opinfo, nbytes) - # Now we can check whether the addition was succesful or not. If the - # addition didn't return an error code, then log the success in order - # to assist with debugging. - if res == idaapi.STRUC_ERROR_MEMBER_OK: - cls = self.__class__ - logging.debug(u"{:s}({:#x}).members.add({!r}, {!s}, {:+#x}) : The api call to `idaapi.add_struc_member(sptr=\"{:s}\", fieldname=\"{:s}\", offset={:+#x}, flag={:#x}, mt={:#x}, nbytes={:#x})` returned success.".format('.'.join([__name__, cls.__name__]), owner.ptr.id, name, type, offset, utils.string.escape(owner.name, '"'), utils.string.escape(name, '"'), realoffset, flag, typeid, nbytes)) - # If we received a failure error code, then we convert the error code to # an error message so that we can raise an exception that actually means # something and enables the user to correct it. - else: + if res != idaapi.STRUC_ERROR_MEMBER_OK: error = { idaapi.STRUC_ERROR_MEMBER_NAME : 'Duplicate field name', idaapi.STRUC_ERROR_MEMBER_OFFSET : 'Invalid offset', @@ -1525,7 +1518,7 @@ def add(self, name, type, offset): cls = self.__class__ raise e(u"{:s}({:#x}).members.add({!r}, {!s}, {:+#x}) : The api call to `{:s}` returned {:s}".format('.'.join([__name__, cls.__name__]), owner.ptr.id, name, type, offset, callee, error.get(res, u"Error code {:#x}".format(res)))) - # We added the member, but now we need to return it to the caller. Since + # Now we need to return the newly created member to the caller. Since # all we get is an error code from IDAPython's api, we try and fetch the # member that was just added by the offset it's supposed to be at. mptr = idaapi.get_member(owner.ptr, realoffset) @@ -1533,6 +1526,11 @@ def add(self, name, type, offset): cls = self.__class__ raise E.MemberNotFoundError(u"{:s}({:#x}).members.add({!r}, {!s}, {:+#x}) : Unable to locate recently created member \"{:s}\" at offset {:s}{:+#x}.".format('.'.join([__name__, cls.__name__]), owner.ptr.id, name, type, offset, utils.string.escape(name, '"'), realoffset, nbytes)) + # We can now log our small success and update the member's refinfo if it + # was actually necessary. + cls, refcount = self.__class__, interface.typemap.update_refinfo(mptr.id, flag) + logging.debug(u"{:s}({:#x}).members.add({!r}, {!s}, {:+#x}) : The api call to `idaapi.add_struc_member(sptr=\"{:s}\", fieldname=\"{:s}\", offset={:+#x}, flag={:#x}, mt={:#x}, nbytes={:#x})` returned success{:s}.".format('.'.join([__name__, cls.__name__]), owner.ptr.id, name, type, offset, utils.string.escape(owner.name, '"'), utils.string.escape(name, '"'), realoffset, flag, typeid, nbytes, " ({:d} references)".format(refcount) if refcount > 0 else '')) + # If we successfully grabbed the member, then we need to figure out its # actual index in our structure. Then we can use the index to instantiate # a member_t that we'll return back to the caller. @@ -2487,21 +2485,29 @@ def type(self, type): cls = self.__class__ raise E.DisassemblerError(u"{:s}({:#x}).type({!s}) : Unable to assign the provided type ({!s}) to the structure member {:s}.".format('.'.join([__name__, cls.__name__]), self.id, type, type, utils.string.repr(self.name))) - items = flag, typeid, nbytes - newflag, newtypeid, newsize = self.flag, self.typeid or idaapi.BADADDR, self.size - if newflag != flag: - cls = self.__class__ - logging.info(u"{:s}({:#x}).type({!s}) : The provided flags ({:#x}) were incorrectly assigned as {:#x}.".format('.'.join([__name__, cls.__name__]), self.id, items, flags, newflags)) + # verify that our type has been applied before we update its refinfo, + # because if it hasn't then we need to warn the user about it so that + # they know what's up and why didn't do what we were asked. + expected, expected_tid = (flag, nbytes), typeid + resulting, resulting_tid = (self.flag, self.size), self.typeid - if newtypeid != typeid: + if expected == resulting: + interface.typemap.update_refinfo(self.id, flag) + else: cls = self.__class__ - logging.info(u"{:s}({:#x}).type({!s}) : The provided typeid ({:#x}) was incorrectly assigned as {:#x}.".format('.'.join([__name__, cls.__name__]), self.id, items, typeid, newtypeid)) + logging.warning(u"{:s}({:#x}).type({!s}) : Applying the given flags and size ({:#x}, {:d}) resulted in different flags and size being assigned ({:#x}, {:d}).".format('.'.join([__name__, cls.__name__]), self.id, type, *itertools.chain(expected, resulting))) - if newsize != nbytes: + # smoke-test that we actually updated the type identifier and log it if it + # didn't actually work. this is based on my ancient logic which assumed + # that opinfo.tid should be BADADDR which isn't actually the truth when + # you're working with a refinfo. hence we try to be quiet about it. + if expected_tid != (resulting_tid or idaapi.BADADDR): cls = self.__class__ - logging.info(u"{:s}({:#x}).type({!s}) : The provided size ({:+#x}) was incorrectly assigned as {:+#x}.".format('.'.join([__name__, cls.__name__]), self.id, items, nbytes, newsize)) + logging.info(u"{:s}({:#x}).type({!s}) : The provided typeid ({:#x}) was incorrectly assigned as {:#x}.".format('.'.join([__name__, cls.__name__]), self.id, type, expected_tid, resulting_tid)) - return newflag, newtypeid, newsize + # return the stuff that actually applied. + flag, size = resulting + return flag, resulting_tid, size @property def typeinfo(self): From 95d544b16197bbb4c11fd00676189310779a914c Mon Sep 17 00:00:00 2001 From: Ali Rizvi-Santiago Date: Thu, 26 May 2022 15:12:21 -0500 Subject: [PATCH 6/7] base/database: updated the `set.array` function so that it properly updates its reference information. --- base/database.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/base/database.py b/base/database.py index ba04e824f0..ab7df04d6e 100644 --- a/base/database.py +++ b/base/database.py @@ -6220,11 +6220,14 @@ def array(cls, ea, type, length): else: realtype, reallength = [type, length], length - # now we can figure out its IDA type + # now we can figure out its IDA type and create the data. after + # that, though, we need to update its refinfo before we leave. flags, typeid, nbytes = interface.typemap.resolve(realtype) - ok = idaapi.create_data(ea, flags, nbytes, typeid) - if not ok: + if not idaapi.create_data(ea, flags, nbytes, typeid): raise E.DisassemblerError(u"{:s}.array({:#x}, {!r}, {:d}) : Unable to define the specified address as an array.".format('.'.join([__name__, cls.__name__]), ea, type, length)) + interface.typemap.update_refinfo(ea, flags) + + # return the array that we just created. return get.array(ea, length=reallength) class get(object): From 30019cfe4195df8757412345057d9bcc4896d3d8 Mon Sep 17 00:00:00 2001 From: Ali Rizvi-Santiago Date: Thu, 26 May 2022 16:22:30 -0500 Subject: [PATCH 7/7] internal/interface: corrected the flags used by the `typemap.dissolve` function when determining the element size and identifying a structure. --- base/_interface.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/base/_interface.py b/base/_interface.py index 8655009bcf..2a2679e0c9 100644 --- a/base/_interface.py +++ b/base/_interface.py @@ -200,11 +200,12 @@ def dissolve(cls, flag, typeid, size): dtype, dsize = flag & cls.FF_MASK, flag & cls.FF_MASKSIZE sf = -1 if flag & idaapi.FF_SIGN == idaapi.FF_SIGN else +1 - # Check if the dtype is a structure and our type-id is an integer so that we + # Check if the dtype's size field (dsize) is describing a structure and + # verify that our type-id is an integer so that we know that we need to # figure out the structure's size. We also do an explicit check if the type-id # is a structure because in some cases, IDA will forget to set the FF_STRUCT # flag but still assign the structure type-id to a union member. - if (dtype == FF_STRUCT and isinstance(typeid, six.integer_types)) or (typeid is not None and structure.has(typeid)): + if (dsize == FF_STRUCT and isinstance(typeid, six.integer_types)) or (typeid is not None and structure.has(typeid)): # FIXME: figure out how to fix this recursive module dependency t = structure.by_identifier(typeid) sz = t.size @@ -214,16 +215,17 @@ def dissolve(cls, flag, typeid, size): if all(item not in cls.inverted for item in [dsize, dtype]): raise internal.exceptions.InvalidTypeOrValueError(u"{:s}.dissolve({!r}, {!r}, {!r}) : Unable to locate a pythonic type that matches the specified flag.".format('.'.join([__name__, cls.__name__]), dtype, typeid, size)) - # Now that we know the datatype exists, extract the actual type and the - # type's size from the inverted map that we previously created. We start - # by checking the dtype first for pointers and then fall back to the size. + # Now that we know the datatype exists, extract the actual type (dtype) + # and the type's size (dsize) from the inverted map while giving priority + # to the type. This way we're checking the dtype for pointers (references) + # and then only afterwards do we fall back to depending on the size. t, sz = cls.inverted[dtype] if dtype in cls.inverted else cls.inverted[dsize] # If the datatype size is not an integer, then we need to calculate the # size ourselves using the size parameter we were given and the element - # size of the datatype that we extracted from the flags. + # size of the datatype as determined by the flags (DT_TYPE | MS_CLS). if not isinstance(sz, six.integer_types): - count = size // idaapi.get_data_elsize(idaapi.BADADDR, dtype, idaapi.opinfo_t()) + count = size // idaapi.get_data_elsize(idaapi.BADADDR, flag, idaapi.opinfo_t()) return [t, count] if count > 1 else t # If the size matches the datatype size, then this is a single element