diff --git a/docs/machostructure-dsl.md b/docs/machostructure-dsl.md new file mode 100644 index 000000000..9a666f6be --- /dev/null +++ b/docs/machostructure-dsl.md @@ -0,0 +1,62 @@ +# Internal MachOStructure DSL +## Documentation +The MachOStructure class makes it easy to describe binary chunks by using the #field method. This method generates the byte size and format strings necessary to parse a chunk of binary data. It also automatically generates the constructor and readers for all fields as well. + +The fields are created in order so you will be expected to pass those arguements to the constructor in the same order. Fields with no arguments should be defined last and fields with default arguments should be defined right before them. + +The type and options of inherited fields can be changed but their argument position and the number of arguments (used to calculate min_args) will also not change. + +Usually, endianness is handled by the Utils#specialize_format method but occasionally a field needs to specifiy that beforehand. That is what the :endian option is for. If not specified, a placeholder is used so that can be specified later. + +## Syntax +```ruby +field [field name], [field type], [option1 => value1], [option2 => value2], ... +``` + +## Example +```ruby +class AllFields < MachO::MachOStructure + field :name1, :string, :size => 16 + field :name3, :int32 + field :name4, :uint32 + field :name5, :uint64 + field :name6, :view + field :name7, :lcstr + field :name8, :two_level_hints_table + field :name9, :tool_entries +end +``` + +## Field Types +- `:string` [requires `:size` option] [optional `:padding` option] + - a string +- `:int32 ` + - a signed 32 bit integer +- `:uint32 ` + - an unsigned 32 bit integer +- `:uint64 ` + - an unsigned 64 bit integer +- `:view` [initalized] + - an instance of the MachOView class (lib/macho/view.rb) +- `:lcstr` [NOT initalized] + - an instance of the LCStr class (lib/macho/load_commands.rb) +- `:two_level_hints_table` [NOT initalized] [NO argument] + - an instance of the TwoLevelHintsTable class (lib/macho/load_commands.rb) +- `:tool_entries` [NOT initalized] + - an instance of the ToolEntries class (lib/macho/load_commands.rb) + +## Option Types +- Exclusive (only one can be used at a time) + - `:mask` [Integer] bitmask to be applied to field + - `:unpack` [String] binary unpack string used for further unpacking of :string + - `:default` [Value] default field value +- Inclusive (can be used with other options) + - `:to_s` [Boolean] generate `#to_s` method based on field +- Used with Integer field types + - `:endian` [Symbol] optionally specify `:big` or `:little` endian +- Used with `:string` field type + - `:size` [Integer] size in bytes + - `:padding` [Symbol] optionally specify `:null` padding + +## More Infomation +Hop over to lib/macho/structure.rb to see the class itself. diff --git a/lib/macho/headers.rb b/lib/macho/headers.rb index a8a931e67..4abdd533d 100644 --- a/lib/macho/headers.rb +++ b/lib/macho/headers.rb @@ -751,13 +751,13 @@ class PrelinkedKernelHeader < MachOStructure field :prelink_version, :uint32, :endian => :big # @return [void] - field :reserved, :binary, :size => 40, :unpack => "L>10" + field :reserved, :string, :size => 40, :unpack => "L>10" # @return [void] - field :platform_name, :binary, :size => 64 + field :platform_name, :string, :size => 64 # @return [void] - field :root_path, :binary, :size => 256 + field :root_path, :string, :size => 256 # @return [Boolean] whether this prelinked kernel supports KASLR def kaslr? diff --git a/lib/macho/load_commands.rb b/lib/macho/load_commands.rb index ce5375c05..6e4e57e9e 100644 --- a/lib/macho/load_commands.rb +++ b/lib/macho/load_commands.rb @@ -367,7 +367,7 @@ def initialize(endianness, alignment) # LC_UUID. class UUIDCommand < LoadCommand # @return [Array] the UUID - field :uuid, :binary, :size => 16, :unpack => "C16" + field :uuid, :string, :size => 16, :unpack => "C16" # @return [String] a string representation of the UUID def uuid_string @@ -398,7 +398,7 @@ def to_h # the task's address space. Corresponds to LC_SEGMENT. class SegmentCommand < LoadCommand # @return [String] the name of the segment - field :segname, :string, :size => 16, :to_s => true + field :segname, :string, :padding => :null, :size => 16, :to_s => true # @return [Integer] the memory address of the segment field :vmaddr, :uint32 @@ -1325,7 +1325,7 @@ def to_h # Corresponds to LC_NOTE. class NoteCommand < LoadCommand # @return [String] the name of the owner for this note - field :data_owner, :string, :size => 16, :to_s => true + field :data_owner, :string, :padding => :null, :size => 16, :to_s => true # @return [Integer] the offset, within the file, of the note field :offset, :uint64 diff --git a/lib/macho/sections.rb b/lib/macho/sections.rb index 9ae6301e2..0b48808de 100644 --- a/lib/macho/sections.rb +++ b/lib/macho/sections.rb @@ -89,11 +89,11 @@ module Sections # Represents a section of a segment for 32-bit architectures. class Section < MachOStructure # @return [String] the name of the section, including null pad bytes - field :sectname, :string, :size => 16 + field :sectname, :string, :padding => :null, :size => 16 # @return [String] the name of the segment's section, including null # pad bytes - field :segname, :string, :size => 16 + field :segname, :string, :padding => :null, :size => 16 # @return [Integer] the memory address of the section field :addr, :uint32 diff --git a/lib/macho/structure.rb b/lib/macho/structure.rb index e680046b3..06949a937 100644 --- a/lib/macho/structure.rb +++ b/lib/macho/structure.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true module MachO - # A general purpose pseudo-structure. + # A general purpose pseudo-structure. Described in detail in docs/machostructure-dsl.md. # @abstract class MachOStructure # Constants used for parsing MachOStructure fields @@ -9,14 +9,14 @@ module Fields # 1. All fields with empty strings and zeros aren't used # to calculate the format and sizeof variables. # 2. All fields with nil should provide those values manually - # via the :size and :fmt parameters. + # via the :size parameter. # association of field types to byte size # @api private BYTE_SIZE = { # Binary slices - :binary => nil, :string => nil, + :null_padded_string => nil, :int32 => 4, :uint32 => 4, :uint64 => 8, @@ -36,8 +36,8 @@ module Fields # @api private FORMAT_CODE = { # Binary slices - :binary => "a", - :string => "Z", + :string => "a", + :null_padded_string => "Z", :int32 => "l=", :uint32 => "L=", :uint64 => "Q=", @@ -51,12 +51,12 @@ module Fields # A list of classes that must get initialized # To add a new class append it here and add the init method to the def_class_reader method # @api private - CLASS_LIST = %i[lcstr tool_entries two_level_hints_table].freeze + CLASSES_TO_INIT = %i[lcstr tool_entries two_level_hints_table].freeze - # A list of fields that don't require arguments + # A list of fields that don't require arguments in the initializer # Used to calculate MachOStructure#min_args # @api private - NO_ARGS_LIST = %i[two_level_hints_table].freeze + NO_ARG_REQUIRED = %i[two_level_hints_table].freeze end # map of field names to indices @@ -139,6 +139,7 @@ def inherited(subclass) # rubocop:disable Lint/MissingSuper # :default [Value] default value # :to_s [Boolean] flag for generating #to_s # :endian [Symbol] optionally specify :big or :little endian + # :padding [Symbol] optionally specify :null padding # @api private def field(name, type, **options) raise ArgumentError, "Invalid field type #{type}" unless Fields::FORMAT_CODE.key?(type) @@ -147,13 +148,16 @@ def field(name, type, **options) idx = if @field_idxs.key?(name) @field_idxs[name] else - @min_args += 1 unless options.key?(:default) || Fields::NO_ARGS_LIST.include?(type) + @min_args += 1 unless options.key?(:default) || Fields::NO_ARG_REQUIRED.include?(type) @field_idxs[name] = @field_idxs.size @size_list << nil @fmt_list << nil @field_idxs.size - 1 end + # Update string type if padding is specified + type = :null_padded_string if type == :string && options[:padding] == :null + # Add to size_list and fmt_list @size_list[idx] = Fields::BYTE_SIZE[type] || options[:size] @fmt_list[idx] = if options[:endian] @@ -164,7 +168,7 @@ def field(name, type, **options) @fmt_list[idx] += options[:size].to_s if options.key?(:size) # Generate methods - if Fields::CLASS_LIST.include?(type) + if Fields::CLASSES_TO_INIT.include?(type) def_class_reader(name, type, idx) elsif options.key?(:mask) def_mask_reader(name, idx, options[:mask]) @@ -184,7 +188,7 @@ def field(name, type, **options) # # Generates a reader method for classes that need to be initialized. - # These classes are defined in the Fields::CLASS_LIST array. + # These classes are defined in the Fields::CLASSES_TO_INIT array. # @param name [Symbol] name of internal field # @param type [Symbol] type of field in terms of binary size # @param idx [Integer] the index of the field value in the @values array diff --git a/test/test_structure_dsl.rb b/test/test_structure_dsl.rb index 0cb22584c..3af7c57a3 100644 --- a/test/test_structure_dsl.rb +++ b/test/test_structure_dsl.rb @@ -7,8 +7,8 @@ class MachOStructureTest < Minitest::Test # that information is reflected in the bytesize, min_args # and format. class AllFields < MachO::MachOStructure - field :binary, :binary, :size => 16 - field :string, :string, :size => 32 + field :string, :string, :size => 16 + field :null_term_str, :string, :padding => :null, :size => 32 field :int32, :int32 field :uint32, :uint32 field :uint64, :uint64 @@ -19,8 +19,8 @@ class AllFields < MachO::MachOStructure end def test_all_field_types - assert_includes AllFields.instance_methods, :binary assert_includes AllFields.instance_methods, :string + assert_includes AllFields.instance_methods, :null_term_str assert_includes AllFields.instance_methods, :int32 assert_includes AllFields.instance_methods, :uint32 assert_includes AllFields.instance_methods, :uint64 @@ -66,7 +66,7 @@ def test_mask_option end class UnpackCmd < MachO::MachOStructure - field :unpack_field, :binary, :size => 8, :unpack => "L>2" + field :unpack_field, :string, :size => 8, :unpack => "L>2" end def test_unpack_option @@ -106,4 +106,19 @@ class EndianCmd < MachO::MachOStructure def test_endian_option assert_equal EndianCmd.format, "L>L<" end + + class PaddingCmd < MachO::MachOStructure + field :str, :string, :size => 12 + field :null_term_str, :string, :padding => :null, :size => 12 + end + + def test_padding_option + assert_equal PaddingCmd.format, "a12Z12" + assert_equal PaddingCmd.bytesize, 24 + + padded_str = "Hello\x00World!" * 2 + padding_cmd = PaddingCmd.new_from_bin(:big, padded_str) + assert_equal padding_cmd.str, "Hello\x00World!" + assert_equal padding_cmd.null_term_str, "Hello" + end end