From b44faff96f74c4e8ee6908d571811a6580669de5 Mon Sep 17 00:00:00 2001 From: Philip Craig Date: Tue, 20 Aug 2024 13:59:12 +1000 Subject: [PATCH 1/6] write: add `LineConvert` This allow reuse of the implementation of `LineProgram::from` as part of a more complex transformation. Operation of `LineProgram::from` is mostly unchanged. The one difference is that it now uses the string form chosen by `LineString::new` instead of copying the form of the input. --- src/read/dwarf.rs | 24 +++ src/write/dwarf.rs | 14 +- src/write/line.rs | 502 ++++++++++++++++++++++++++++++++------------- src/write/unit.rs | 4 +- 4 files changed, 397 insertions(+), 147 deletions(-) diff --git a/src/read/dwarf.rs b/src/read/dwarf.rs index c836d8457..43e99081d 100644 --- a/src/read/dwarf.rs +++ b/src/read/dwarf.rs @@ -420,6 +420,30 @@ impl Dwarf { } } + /// Return an attribute value as a string slice. + /// This only handles forms that are usable without an associated unit. + /// + /// If the attribute value is one of: + /// + /// - an inline `DW_FORM_string` string + /// - a `DW_FORM_strp` reference to an offset into the `.debug_str` section + /// - a `DW_FORM_strp_sup` reference to an offset into a supplementary + /// object file + /// - a `DW_FORM_line_strp` reference to an offset into the `.debug_line_str` + /// section + /// + /// then return the attribute's string value. Returns an error if the attribute + /// value does not have a string form, or if a string form has an invalid value. + pub fn attr_line_string(&self, attr: AttributeValue) -> Result { + match attr { + AttributeValue::String(string) => Ok(string), + AttributeValue::DebugStrRef(offset) => self.string(offset), + AttributeValue::DebugStrRefSup(offset) => self.sup_string(offset), + AttributeValue::DebugLineStrRef(offset) => self.line_string(offset), + _ => Err(Error::ExpectedStringAttributeValue), + } + } + /// Return the address at the given index. pub fn address(&self, unit: &Unit, index: DebugAddrIndex) -> Result { self.debug_addr diff --git a/src/write/dwarf.rs b/src/write/dwarf.rs index ea507126a..59c2a6d9a 100644 --- a/src/write/dwarf.rs +++ b/src/write/dwarf.rs @@ -2,8 +2,8 @@ use alloc::vec::Vec; use crate::common::Encoding; use crate::write::{ - AbbreviationTable, LineProgram, LineStringTable, Result, Sections, StringTable, Unit, - UnitTable, Writer, + AbbreviationTable, LineProgram, LineString, LineStringTable, Result, Sections, StringTable, + Unit, UnitTable, Writer, }; /// Writable DWARF information for more than one unit. @@ -48,6 +48,11 @@ impl Dwarf { } Ok(()) } + + /// Get a reference to the data for a line string. + pub fn get_line_string<'a>(&'a self, string: &'a LineString) -> &'a [u8] { + string.get(&self.strings, &self.line_strings) + } } /// Writable DWARF information for a single unit. @@ -102,6 +107,11 @@ impl DwarfUnit { abbrevs.write(&mut sections.debug_abbrev)?; Ok(()) } + + /// Get a reference to the data for a line string. + pub fn get_line_string<'a>(&'a self, string: &'a LineString) -> &'a [u8] { + string.get(&self.strings, &self.line_strings) + } } #[cfg(feature = "read")] diff --git a/src/write/line.rs b/src/write/line.rs index 0e5c06243..cf4c48bb7 100644 --- a/src/write/line.rs +++ b/src/write/line.rs @@ -7,7 +7,7 @@ use crate::constants; use crate::leb128; use crate::write::{ Address, DebugLineStrOffsets, DebugStrOffsets, Error, LineStringId, LineStringTable, Result, - Section, StringId, Writer, + Section, StringId, StringTable, Writer, }; /// The number assigned to the first special opcode. @@ -853,6 +853,19 @@ impl LineString { } } + /// Get a reference to the string data. + pub fn get<'a>( + &'a self, + strings: &'a StringTable, + line_strings: &'a LineStringTable, + ) -> &'a [u8] { + match self { + LineString::String(val) => val, + LineString::StringRef(val) => strings.get(*val), + LineString::LineStringRef(val) => line_strings.get(*val), + } + } + fn form(&self) -> constants::DwForm { match *self { LineString::String(..) => constants::DW_FORM_string, @@ -992,6 +1005,8 @@ define_section!( "A writable `.debug_line` section." ); +#[cfg(feature = "read")] +pub use convert::*; #[cfg(feature = "read")] mod convert { use super::*; @@ -1003,177 +1018,378 @@ mod convert { /// /// Return the program and a mapping from file index to `FileId`. pub fn from>( - mut from_program: read::IncompleteLineProgram, - dwarf: &read::Dwarf, + from_program: read::IncompleteLineProgram, + from_dwarf: &read::Dwarf, line_strings: &mut write::LineStringTable, strings: &mut write::StringTable, convert_address: &dyn Fn(u64) -> Option
, ) -> ConvertResult<(LineProgram, Vec)> { + let encoding = from_program.header().encoding(); + let line_encoding = from_program.header().line_encoding(); + let comp_name = match from_program.header().file(0) { + Some(file) => Some(from_dwarf.attr_line_string(file.path_name())?), + None => None, + }; + let mut convert = LineConvert::new( + from_dwarf, + from_program, + comp_name, + encoding, + line_encoding, + line_strings, + strings, + )?; + + while let Some(row) = convert.read_row()? { + match row { + LineConvertRow::SetAddress(address, row) => { + if convert.in_sequence() { + return Err(ConvertError::InvalidAddress); + } + let address = + convert_address(address).ok_or(ConvertError::InvalidAddress)?; + convert.begin_sequence(Some(address)); + convert.generate_row(row); + } + LineConvertRow::Row(row) => { + if !convert.in_sequence() { + convert.begin_sequence(None); + } + convert.generate_row(row); + } + LineConvertRow::EndSequence(length) => { + if !convert.in_sequence() { + convert.begin_sequence(None); + } + convert.end_sequence(length); + } + } + } + Ok(convert.program()) + } + } + + /// The result of [`LineConvert::read_row`]. + #[derive(Debug)] + pub enum LineConvertRow { + /// A row that used a `DW_LNS_set_address` instruction. + /// + /// This is expected to be the first row in a sequence, + /// but [`LineConvert::read_row`] doesn't enforce that. + /// + /// This row will have its `address_offset` field set to 0. + /// All subsequent rows in the sequence will have their `address_offset` + /// field set to an offset from this address. + SetAddress(u64, LineRow), + /// A row produced by the line number program. + Row(LineRow), + /// The address offset of the end of the sequence. + EndSequence(u64), + } + + /// The result of [`LineConvert::read_sequence`]. + #[derive(Debug)] + pub struct LineConvertSequence { + /// The address of the first instruction in the sequence. + pub start: Option, + /// The offset in bytes of the next instruction after the sequence. + pub length: u64, + /// The rows in the sequence. + /// + /// The `LineRow::address` fields are set to an offset from `Self::start`. + pub rows: Vec, + } + + /// The state for the conversion of a line number program. + #[derive(Debug)] + pub struct LineConvert<'a, R: Reader> { + #[allow(unused)] + from_dwarf: &'a read::Dwarf, + from_program: read::IncompleteLineProgram, + from_row: read::LineRow, + from_instructions: read::LineInstructions, + files: Vec, + dirs: Vec, + program: LineProgram, + #[allow(unused)] + line_strings: &'a mut write::LineStringTable, + #[allow(unused)] + strings: &'a mut write::StringTable, + } + + impl<'a, R: Reader + 'a> LineConvert<'a, R> { + /// Start a new conversion of a line number program. + /// + /// `encoding` and `line_encoding` apply to the converted program, and + /// may be different from the source program. + pub fn new( + from_dwarf: &'a read::Dwarf, + from_program: read::IncompleteLineProgram, + from_comp_name: Option, + encoding: Encoding, + line_encoding: LineEncoding, + line_strings: &'a mut write::LineStringTable, + strings: &'a mut write::StringTable, + ) -> ConvertResult { // Create mappings in case the source has duplicate files or directories. let mut dirs = Vec::new(); let mut files = Vec::new(); - let mut program = { - let from_header = from_program.header(); - let encoding = from_header.encoding(); + let from_header = from_program.header(); - let comp_dir = match from_header.directory(0) { - Some(comp_dir) => LineString::from(comp_dir, dwarf, line_strings, strings)?, - None => LineString::new(&[][..], encoding, line_strings), - }; + let comp_dir = match from_header.directory(0) { + Some(comp_dir) => { + Self::convert_string(comp_dir, from_dwarf, encoding, line_strings)? + } + None => LineString::new(&[][..], encoding, line_strings), + }; - let comp_name = match from_header.file(0) { - Some(comp_file) => { - if comp_file.directory_index() != 0 { - return Err(ConvertError::InvalidDirectoryIndex); - } - LineString::from(comp_file.path_name(), dwarf, line_strings, strings)? - } - None => LineString::new(&[][..], encoding, line_strings), - }; + let comp_name = match from_comp_name { + Some(comp_name) => LineString::new(comp_name.to_slice()?, encoding, line_strings), + None => LineString::new(&[][..], encoding, line_strings), + }; - if from_header.line_base() > 0 { - return Err(ConvertError::InvalidLineBase); - } - let mut program = LineProgram::new( - encoding, - from_header.line_encoding(), - comp_dir, - comp_name, - None, // We'll set this later if needed when we add the file again. - ); - - if from_header.version() <= 4 { - // The first directory is implicit. - dirs.push(DirectoryId(0)); - // A file index of 0 is invalid for version <= 4, but putting - // something there makes the indexing easier. - files.push(FileId::new(0)); - } + if from_header.line_base() > 0 { + return Err(ConvertError::InvalidLineBase); + } + let mut program = LineProgram::new( + encoding, + line_encoding, + comp_dir, + comp_name, + None, // We'll set this later if needed when we add the file again. + ); - for from_dir in from_header.include_directories() { - let from_dir = - LineString::from(from_dir.clone(), dwarf, line_strings, strings)?; - dirs.push(program.add_directory(from_dir)); - } + if from_header.version() <= 4 { + // The first directory is implicit. + dirs.push(DirectoryId(0)); + // A file index of 0 is invalid for version <= 4, but putting + // something there makes the indexing easier. + files.push(FileId::new(0)); + } - program.file_has_timestamp = from_header.file_has_timestamp(); - program.file_has_size = from_header.file_has_size(); - program.file_has_md5 = from_header.file_has_md5(); - program.file_has_source = from_header.file_has_source(); - for from_file in from_header.file_names().iter() { - let from_name = - LineString::from(from_file.path_name(), dwarf, line_strings, strings)?; - let from_dir = from_file.directory_index(); - if from_dir >= dirs.len() as u64 { - return Err(ConvertError::InvalidDirectoryIndex); - } - let from_dir = dirs[from_dir as usize]; - let from_info = Some(FileInfo { - timestamp: from_file.timestamp(), - size: from_file.size(), - md5: *from_file.md5(), - source: match from_file.source() { - Some(source) => { - Some(LineString::from(source, dwarf, line_strings, strings)?) - } - None => None, - }, - }); - files.push(program.add_file(from_name, from_dir, from_info)); - } + for from_attr in from_header.include_directories() { + let from_dir = + Self::convert_string(from_attr.clone(), from_dwarf, encoding, line_strings)?; + dirs.push(program.add_directory(from_dir)); + } - program - }; + program.file_has_timestamp = from_header.file_has_timestamp(); + program.file_has_size = from_header.file_has_size(); + program.file_has_md5 = from_header.file_has_md5(); + program.file_has_source = from_header.file_has_source(); + for from_file in from_header.file_names() { + let (from_name, from_dir, from_info) = + Self::convert_file(from_file, from_dwarf, &dirs, encoding, line_strings)?; + files.push(program.add_file(from_name, from_dir, from_info)); + } // We can't use the `from_program.rows()` because that wouldn't let // us preserve address relocations. - let mut from_row = read::LineRow::new(from_program.header()); - let mut instructions = from_program.header().instructions(); + let from_row = read::LineRow::new(from_program.header()); + let from_instructions = from_program.header().instructions(); + Ok(LineConvert { + from_dwarf, + from_program, + from_row, + from_instructions, + files, + dirs, + program, + line_strings, + strings, + }) + } + + fn convert_string( + from_attr: read::AttributeValue, + from_dwarf: &read::Dwarf, + encoding: Encoding, + line_strings: &mut write::LineStringTable, + ) -> ConvertResult { + let r = from_dwarf.attr_line_string(from_attr)?; + Ok(LineString::new(r.to_slice()?, encoding, line_strings)) + } + + fn convert_file( + from_file: &read::FileEntry, + from_dwarf: &read::Dwarf, + dirs: &[DirectoryId], + encoding: Encoding, + line_strings: &mut write::LineStringTable, + ) -> ConvertResult<(LineString, DirectoryId, Option)> { + let from_name = + Self::convert_string(from_file.path_name(), from_dwarf, encoding, line_strings)?; + let from_dir = from_file.directory_index(); + if from_dir >= dirs.len() as u64 { + return Err(ConvertError::InvalidDirectoryIndex); + } + let from_dir = dirs[from_dir as usize]; + let from_info = Some(FileInfo { + timestamp: from_file.timestamp(), + size: from_file.size(), + md5: *from_file.md5(), + source: match from_file.source() { + Some(source) => Some(Self::convert_string( + source, + from_dwarf, + encoding, + line_strings, + )?), + None => None, + }, + }); + Ok((from_name, from_dir, from_info)) + } + + /// Read the next row from the source program. + /// + /// Use [`LineConvert::generate_row`] to add the row to the program. + pub fn read_row(&mut self) -> ConvertResult> { + let mut tombstone = false; let mut address = None; - while let Some(instruction) = instructions.next_instruction(from_program.header())? { + self.from_row.reset(self.from_program.header()); + while let Some(instruction) = self + .from_instructions + .next_instruction(self.from_program.header())? + { match instruction { read::LineInstruction::SetAddress(val) => { - if program.in_sequence() { - return Err(ConvertError::UnsupportedLineInstruction); - } - match convert_address(val) { - Some(val) => address = Some(val), - None => return Err(ConvertError::InvalidAddress), + // Use address 0 so that all addresses are offsets. + self.from_row.execute( + read::LineInstruction::SetAddress(0), + &mut self.from_program, + )?; + // Handle tombstones the same way that `from_row.execute` would have. + let tombstone_address = + !0 >> (64 - self.from_program.header().encoding().address_size * 8); + tombstone = val == tombstone_address; + if !tombstone { + address = Some(val); } - from_row - .execute(read::LineInstruction::SetAddress(0), &mut from_program)?; + continue; } - read::LineInstruction::DefineFile(_) => { - return Err(ConvertError::UnsupportedLineInstruction); + read::LineInstruction::DefineFile(ref from_file) => { + let (from_name, from_dir, from_info) = Self::convert_file( + from_file, + self.from_dwarf, + &self.dirs, + self.program.encoding(), + self.line_strings, + )?; + self.files + .push(self.program.add_file(from_name, from_dir, from_info)); } - _ => { - if from_row.execute(instruction, &mut from_program)? { - if !program.in_sequence() { - program.begin_sequence(address); - address = None; - } - if from_row.end_sequence() { - program.end_sequence(from_row.address()); - } else { - program.row().address_offset = from_row.address(); - program.row().op_index = from_row.op_index(); - program.row().file = { - let file = from_row.file_index(); - if file >= files.len() as u64 { - return Err(ConvertError::InvalidFileIndex); - } - if file == 0 && program.version() <= 4 { - return Err(ConvertError::InvalidFileIndex); - } - files[file as usize] - }; - program.row().line = match from_row.line() { - Some(line) => line.get(), - None => 0, - }; - program.row().column = match from_row.column() { - read::ColumnType::LeftEdge => 0, - read::ColumnType::Column(val) => val.get(), - }; - program.row().discriminator = from_row.discriminator(); - program.row().is_statement = from_row.is_stmt(); - program.row().basic_block = from_row.basic_block(); - program.row().prologue_end = from_row.prologue_end(); - program.row().epilogue_begin = from_row.epilogue_begin(); - program.row().isa = from_row.isa(); - program.generate_row(); - } - from_row.reset(from_program.header()); - } + _ => {} + } + if !self.from_row.execute(instruction, &mut self.from_program)? { + // This instruction didn't generate a new row. + continue; + } + if tombstone { + // Perform any reset that was required for the tombstone row. + // Normally this is done when `read_row` is called again, but for + // tombstones we loop immediately. + if self.from_row.end_sequence() { + tombstone = false; + address = None; } + self.from_row.reset(self.from_program.header()); + continue; + } + if self.from_row.end_sequence() { + return Ok(Some(LineConvertRow::EndSequence(self.from_row.address()))); + } + let row = LineRow { + address_offset: self.from_row.address(), + op_index: self.from_row.op_index(), + file: { + let file = self.from_row.file_index(); + if file >= self.files.len() as u64 { + return Err(ConvertError::InvalidFileIndex); + } + if file == 0 && self.from_program.header().version() <= 4 { + return Err(ConvertError::InvalidFileIndex); + } + self.files[file as usize] + }, + line: match self.from_row.line() { + Some(line) => line.get(), + None => 0, + }, + column: match self.from_row.column() { + read::ColumnType::LeftEdge => 0, + read::ColumnType::Column(val) => val.get(), + }, + discriminator: self.from_row.discriminator(), + is_statement: self.from_row.is_stmt(), + basic_block: self.from_row.basic_block(), + prologue_end: self.from_row.prologue_end(), + epilogue_begin: self.from_row.epilogue_begin(), + isa: self.from_row.isa(), }; + if let Some(address) = address { + return Ok(Some(LineConvertRow::SetAddress(address, row))); + } else { + return Ok(Some(LineConvertRow::Row(row))); + } } - Ok((program, files)) + Ok(None) } - } - impl LineString { - fn from>( - from_attr: read::AttributeValue, - dwarf: &read::Dwarf, - line_strings: &mut write::LineStringTable, - strings: &mut write::StringTable, - ) -> ConvertResult { - Ok(match from_attr { - read::AttributeValue::String(r) => LineString::String(r.to_slice()?.to_vec()), - read::AttributeValue::DebugStrRef(offset) => { - let r = dwarf.debug_str.get_str(offset)?; - let id = strings.add(r.to_slice()?); - LineString::StringRef(id) - } - read::AttributeValue::DebugLineStrRef(offset) => { - let r = dwarf.debug_line_str.get_str(offset)?; - let id = line_strings.add(r.to_slice()?); - LineString::LineStringRef(id) + /// Read the next sequence from the source program. + pub fn read_sequence(&mut self) -> ConvertResult> { + let mut start = None; + let mut rows = Vec::new(); + while let Some(row) = self.read_row()? { + match row { + LineConvertRow::SetAddress(address, row) => { + // We only support the setting the address for the first row in a sequence. + if !rows.is_empty() { + return Err(ConvertError::InvalidAddress); + } + start = Some(address); + rows.push(row); + } + LineConvertRow::Row(row) => { + rows.push(row); + } + LineConvertRow::EndSequence(length) => { + return Ok(Some(LineConvertSequence { + start, + length, + rows, + })); + } } - _ => return Err(ConvertError::UnsupportedLineStringForm), - }) + } + Ok(None) + } + + /// Call [`LineProgram::begin_sequence`] for the converted program. + pub fn begin_sequence(&mut self, address: Option
) { + self.program.begin_sequence(address); + } + + /// Call [`LineProgram::end_sequence`] for the converted program. + pub fn end_sequence(&mut self, address_offset: u64) { + self.program.end_sequence(address_offset); + } + + /// Return [`LineProgram::in_sequence`] for the converted program. + pub fn in_sequence(&self) -> bool { + self.program.in_sequence() + } + + /// Set the next row and call [`LineProgram::generate_row`] for the converted program. + pub fn generate_row(&mut self, row: LineRow) { + *self.program.row() = row; + self.program.generate_row(); + } + + /// Return the program and a mapping from source file index to `FileId`. + pub fn program(self) -> (LineProgram, Vec) { + (self.program, self.files) } } } diff --git a/src/write/unit.rs b/src/write/unit.rs index c7f54e96f..261277dd5 100644 --- a/src/write/unit.rs +++ b/src/write/unit.rs @@ -2923,10 +2923,10 @@ mod tests { }; let convert_path = get_convert_path(constants::DW_AT_decl_file); - assert_eq!(convert_path, &file_string1); + assert_eq!(convert_dwarf.get_line_string(convert_path), file_bytes1); let convert_path = get_convert_path(constants::DW_AT_call_file); - assert_eq!(convert_path, &file_string2); + assert_eq!(convert_dwarf.get_line_string(convert_path), file_bytes2); } } } From 6ad2c405c2a795b357aeae02e75cd873767ab796 Mon Sep 17 00:00:00 2001 From: Philip Craig Date: Fri, 23 Aug 2024 17:16:13 +1000 Subject: [PATCH 2/6] Add documentation and some fixes --- src/read/dwarf.rs | 1 + src/read/line.rs | 2 +- src/write/line.rs | 177 +++++++++++++++++++++++++++++++++++++++++++--- src/write/mod.rs | 20 ++++++ 4 files changed, 188 insertions(+), 12 deletions(-) diff --git a/src/read/dwarf.rs b/src/read/dwarf.rs index 43e99081d..f44d6e4cf 100644 --- a/src/read/dwarf.rs +++ b/src/read/dwarf.rs @@ -421,6 +421,7 @@ impl Dwarf { } /// Return an attribute value as a string slice. + /// /// This only handles forms that are usable without an associated unit. /// /// If the attribute value is one of: diff --git a/src/read/line.rs b/src/read/line.rs index ceaf3b3fb..86983722a 100644 --- a/src/read/line.rs +++ b/src/read/line.rs @@ -1652,7 +1652,7 @@ where /// /// Note: For DWARF v5 files this may return an empty attribute that /// indicates that no source code is available, which this function - /// represents as Some(). + /// represents as `Some()`. pub fn source(&self) -> Option> { self.source.clone() } diff --git a/src/write/line.rs b/src/write/line.rs index cf4c48bb7..e6ca1020b 100644 --- a/src/write/line.rs +++ b/src/write/line.rs @@ -1044,7 +1044,8 @@ mod convert { match row { LineConvertRow::SetAddress(address, row) => { if convert.in_sequence() { - return Err(ConvertError::InvalidAddress); + // TODO: this is valid DWARF + return Err(ConvertError::UnsupportedLineSetAddress); } let address = convert_address(address).ok_or(ConvertError::InvalidAddress)?; @@ -1053,18 +1054,23 @@ mod convert { } LineConvertRow::Row(row) => { if !convert.in_sequence() { + // This is valid DWARF but unusual. convert.begin_sequence(None); } convert.generate_row(row); } LineConvertRow::EndSequence(length) => { if !convert.in_sequence() { + // Preserve empty sequences. convert.begin_sequence(None); } convert.end_sequence(length); } } } + if convert.in_sequence() { + return Err(ConvertError::MissingLineEndSequence); + } Ok(convert.program()) } } @@ -1072,7 +1078,7 @@ mod convert { /// The result of [`LineConvert::read_row`]. #[derive(Debug)] pub enum LineConvertRow { - /// A row that used a `DW_LNS_set_address` instruction. + /// A row that used a `DW_LNE_set_address` instruction. /// /// This is expected to be the first row in a sequence, /// but [`LineConvert::read_row`] doesn't enforce that. @@ -1101,9 +1107,89 @@ mod convert { } /// The state for the conversion of a line number program. + /// + /// This is used for the implementation of [`LineProgram::from`]. + /// + /// Users may want to use this directly instead of [`LineProgram::from`] in order + /// to implement transformations on the line number program during the conversion. + /// For example, it may be useful to modify the addresses of the rows to match a + /// corresponding transformation of the machine instructions. + /// + /// After calling [`LineConvert::new`], you may call either [`LineConvert::read_row`] + /// to read the next row, or [`LineConvert::read_sequence`] to read a sequence of rows. + /// + /// ## Example Usage + /// + /// Convert a line program using [`LineConvert::read_row`]. + /// + /// ```rust,no_run + /// use gimli::write::{Address, LineConvert, LineConvertRow}; + /// # fn example() -> Result<(), gimli::write::ConvertError> { + /// # type Reader = gimli::read::EndianSlice<'static, gimli::LittleEndian>; + /// # let from_program: gimli::read::IncompleteLineProgram = unimplemented!(); + /// # let from_dwarf: gimli::read::Dwarf<_> = unimplemented!(); + /// # let mut line_strings = gimli::write::LineStringTable::default(); + /// # let mut strings = gimli::write::StringTable::default(); + /// // Choose an encoding for the new program. This can copy the original encoding, + /// // or use a different one. + /// let encoding = from_program.header().encoding(); + /// let line_encoding = from_program.header().line_encoding(); + /// // Start the conversion. This will convert the header, directories and files. + /// let mut convert = LineConvert::new( + /// &from_dwarf, + /// from_program, + /// None, + /// encoding, + /// line_encoding, + /// &mut line_strings, + /// &mut strings, + /// )?; + /// // Read and convert each row in the program. + /// while let Some(row) = convert.read_row()? { + /// match row { + /// LineConvertRow::SetAddress(address, row) => { + /// if convert.in_sequence() { + /// // There was a `DW_LNE_set_address` instruction in the middle of a + /// // sequence. `gimli::write::LineProgram` doesn't currently support this. + /// // You may be able to handle this by converting `address` into an address + /// // offset for the current sequence (and adjusting future row offsets), + /// // or by splitting the sequence into two. + /// return Err(gimli::write::ConvertError::UnsupportedLineSetAddress); + /// } + /// convert.begin_sequence(Some(Address::Constant(address))); + /// convert.generate_row(row); + /// } + /// LineConvertRow::Row(row) => { + /// if !convert.in_sequence() { + /// // There wasn't a `DW_LNE_set_address` instruction at the start of the + /// // sequence. This is unusual in practice, but you can support it by + /// // starting a sequence without an address. This will use the initial + /// // address defined by DWARF, which is 0. The row itself can can still + /// // have a non-zero address offset. + /// convert.begin_sequence(None); + /// } + /// convert.generate_row(row); + /// } + /// LineConvertRow::EndSequence(length) => { + /// if !convert.in_sequence() { + /// // The sequence had no rows. We can either match this with an empty + /// // sequence in the converted program, or ignore it. + /// convert.begin_sequence(None); + /// } + /// convert.end_sequence(length); + /// } + /// } + /// } + /// if convert.in_sequence() { + /// // The sequence was never ended. This is invalid DWARF. + /// return Err(gimli::write::ConvertError::MissingLineEndSequence); + /// } + /// let (program, files) = convert.program(); + /// # Ok(()) + /// # } + /// ``` #[derive(Debug)] pub struct LineConvert<'a, R: Reader> { - #[allow(unused)] from_dwarf: &'a read::Dwarf, from_program: read::IncompleteLineProgram, from_row: read::LineRow, @@ -1111,9 +1197,8 @@ mod convert { files: Vec, dirs: Vec, program: LineProgram, - #[allow(unused)] line_strings: &'a mut write::LineStringTable, - #[allow(unused)] + #[allow(unused)] // May need LineString::StringRef in future. strings: &'a mut write::StringTable, } @@ -1122,6 +1207,12 @@ mod convert { /// /// `encoding` and `line_encoding` apply to the converted program, and /// may be different from the source program. + /// + /// The compilation directory and name are taken from the source program header. + /// For DWARF version <= 4, this relies on the correct directory and name being + /// passed to [`read::DebugLine::program`]. However, for split DWARF the line program + /// is associated with a skeleton compilation unit which may not have the correct + /// name. In this case, the name should be passed as `from_comp_name`. pub fn new( from_dwarf: &'a read::Dwarf, from_program: read::IncompleteLineProgram, @@ -1141,12 +1232,34 @@ mod convert { Some(comp_dir) => { Self::convert_string(comp_dir, from_dwarf, encoding, line_strings)? } - None => LineString::new(&[][..], encoding, line_strings), + None => { + if encoding.version >= 5 { + return Err(ConvertError::MissingCompilationDirectory); + } else { + // This value won't be emitted, but we need to provide something. + LineString::String(Vec::new()) + } + } }; let comp_name = match from_comp_name { Some(comp_name) => LineString::new(comp_name.to_slice()?, encoding, line_strings), - None => LineString::new(&[][..], encoding, line_strings), + None => match from_header.file(0) { + Some(comp_file) => Self::convert_string( + comp_file.path_name(), + from_dwarf, + encoding, + line_strings, + )?, + None => { + if encoding.version >= 5 { + return Err(ConvertError::MissingCompilationName); + } else { + // This value won't be emitted, but we need to provide something. + LineString::String(Vec::new()) + } + } + }, }; if from_header.line_base() > 0 { @@ -1244,7 +1357,7 @@ mod convert { /// Read the next row from the source program. /// - /// Use [`LineConvert::generate_row`] to add the row to the program. + /// See [`LineConvert`] for an example of how to add the row to the converted program. pub fn read_row(&mut self) -> ConvertResult> { let mut tombstone = false; let mut address = None; @@ -1279,6 +1392,7 @@ mod convert { )?; self.files .push(self.program.add_file(from_name, from_dir, from_info)); + continue; } _ => {} } @@ -1338,6 +1452,29 @@ mod convert { } /// Read the next sequence from the source program. + /// + /// This will read all rows in the sequence, and return the sequence when it ends. + /// Returns `None` if there are no more rows in the program. + /// + /// Note that this will return an error for `DW_LNE_set_address` in the middle of + /// a sequence, or a missing `DW_LNE_end_sequence`. + /// + /// ## Example Usage + /// + /// ```rust,no_run + /// # use gimli::write::{Address, LineConvert, LineConvertRow}; + /// # fn example(mut convert: LineConvert) -> Result<(), gimli::write::ConvertError> { + /// // Read and convert each sequence in the program. + /// while let Some(sequence) = convert.read_sequence()? { + /// convert.begin_sequence(sequence.start.map(Address::Constant)); + /// for row in sequence.rows { + /// convert.generate_row(row); + /// } + /// convert.end_sequence(sequence.length); + /// } + /// # Ok(()) + /// # } + /// ``` pub fn read_sequence(&mut self) -> ConvertResult> { let mut start = None; let mut rows = Vec::new(); @@ -1346,7 +1483,7 @@ mod convert { LineConvertRow::SetAddress(address, row) => { // We only support the setting the address for the first row in a sequence. if !rows.is_empty() { - return Err(ConvertError::InvalidAddress); + return Err(ConvertError::UnsupportedLineSetAddress); } start = Some(address); rows.push(row); @@ -1363,6 +1500,9 @@ mod convert { } } } + if !rows.is_empty() { + return Err(ConvertError::MissingLineEndSequence); + } Ok(None) } @@ -1388,6 +1528,9 @@ mod convert { } /// Return the program and a mapping from source file index to `FileId`. + /// + /// The file index mapping is 0 based, regardless of the DWARF version. + /// For DWARF version <= 4, the entry at index 0 should not be used. pub fn program(self) -> (LineProgram, Vec) { (self.program, self.files) } @@ -1810,10 +1953,16 @@ mod tests { let mut program = program.clone(); program.row = test.0; program.generate_row(); + program.end_sequence(test.0.address_offset); assert_eq!( - &program.instructions[base_instructions.len()..], + &program.instructions + [base_instructions.len()..program.instructions.len() - 1], &test.1[..] ); + assert_eq!( + program.instructions.last(), + Some(&LineInstruction::EndSequence) + ); // Test LineProgram::from(). let mut unit = Unit::new(encoding, program); @@ -1834,9 +1983,14 @@ mod tests { let convert_program = &convert_unit.line_program; assert_eq!( - &convert_program.instructions[base_instructions.len()..], + &convert_program.instructions + [base_instructions.len()..convert_program.instructions.len() - 1], &test.1[..] ); + assert_eq!( + convert_program.instructions.last(), + Some(&LineInstruction::EndSequence) + ); } } } @@ -2141,6 +2295,7 @@ mod tests { program.row().file = file_id; program.row().line = 0x10000; program.generate_row(); + program.end_sequence(20); let mut unit = Unit::new(encoding, program); let root = unit.get_mut(unit.root()); diff --git a/src/write/mod.rs b/src/write/mod.rs index b7adbbe26..992bac0f2 100644 --- a/src/write/mod.rs +++ b/src/write/mod.rs @@ -337,6 +337,14 @@ mod convert { UnsupportedLineInstruction, /// Writing this form of line string is not implemented yet. UnsupportedLineStringForm, + /// A `DW_LNE_set_address` instruction occurred after the start of a sequence. + UnsupportedLineSetAddress, + /// A `DW_LNE_end_sequence` instruction was missing at the end of a sequence. + MissingLineEndSequence, + /// The name of the compilation unit is missing. + MissingCompilationName, + /// The path of the compilation directory is missing. + MissingCompilationDirectory, /// A `.debug_line` file index is invalid. InvalidFileIndex, /// A `.debug_line` directory index is invalid. @@ -388,6 +396,18 @@ mod convert { f, "Writing this form of line string is not implemented yet." ), + UnsupportedLineSetAddress => write!( + f, + "A `DW_LNE_set_address` instruction occurred after the start of a sequence." + ), + MissingLineEndSequence => write!( + f, + "A `DW_LNE_end_sequence` instruction was missing at the end of a sequence." + ), + MissingCompilationName => write!(f, "The name of the compilation unit is missing."), + MissingCompilationDirectory => { + write!(f, "The path of the compilation directory is missing.") + } InvalidFileIndex => write!(f, "A `.debug_line` file index is invalid."), InvalidDirectoryIndex => write!(f, "A `.debug_line` directory index is invalid."), InvalidLineBase => write!(f, "A `.debug_line` line base is invalid."), From a5ab91fda784ab52219d50738e26d65ec6ceb43b Mon Sep 17 00:00:00 2001 From: Philip Craig Date: Sun, 25 Aug 2024 15:52:23 +1000 Subject: [PATCH 3/6] Relax the sequence constraints on `write::LineProgram` We now automatically start sequences if needed. This better matches how the underlying DWARF instructions operate, and simplifies use of `LineConvert` since we no longer need to check if a sequence has started. This also means that `LineConvert::read_row` will work as expected if there are multiple `DW_LNE_set_address` in a sequence. `LineConvert::read_sequence` still does not support this. --- src/write/line.rs | 222 +++++++++++++++++++++++++++++++++------------- 1 file changed, 161 insertions(+), 61 deletions(-) diff --git a/src/write/line.rs b/src/write/line.rs index e6ca1020b..c1f8ea77d 100644 --- a/src/write/line.rs +++ b/src/write/line.rs @@ -311,6 +311,11 @@ impl LineProgram { /// Begin a new sequence and set its base address. /// + /// It is not necessary to call this method, since all of the other methods + /// will automatically begin a sequence if one has not already begun. + /// However, it may be useful in rare cases where you need to begin a sequence + /// while optionally setting the address. + /// /// # Panics /// /// Panics if a sequence has already begun. @@ -322,15 +327,23 @@ impl LineProgram { } } + /// Set the address for the next row. + /// + /// This begins a sequence if one has not already begun. + /// + /// This does not begin a new sequence if one has already begun. Hence, the caller must + /// ensure that this address is greater than or equal to the address of the previous row. + pub fn set_address(&mut self, address: Address) { + self.in_sequence = true; + self.instructions.push(LineInstruction::SetAddress(address)); + } + /// End the sequence, and reset the row to its default values. /// /// Only the `address_offset` and op_index` fields of the current row are used. /// - /// # Panics - /// - /// Panics if a sequence has not begun. + /// This will emit an `EndSequence` instruction even if a sequence has not begun. pub fn end_sequence(&mut self, address_offset: u64) { - assert!(self.in_sequence); self.in_sequence = false; self.row.address_offset = address_offset; let op_advance = self.op_advance(); @@ -360,13 +373,12 @@ impl LineProgram { /// After the instructions are generated, it sets `discriminator` to 0, and sets /// `basic_block`, `prologue_end`, and `epilogue_begin` to false. /// + /// This begins a sequence if one has not already begun. + /// /// # Panics /// - /// Panics if a sequence has not begun. /// Panics if the address_offset decreases. pub fn generate_row(&mut self) { - assert!(self.in_sequence); - // Output fields that are reset on every row. if self.row.discriminator != 0 { self.instructions @@ -1043,27 +1055,15 @@ mod convert { while let Some(row) = convert.read_row()? { match row { LineConvertRow::SetAddress(address, row) => { - if convert.in_sequence() { - // TODO: this is valid DWARF - return Err(ConvertError::UnsupportedLineSetAddress); - } let address = convert_address(address).ok_or(ConvertError::InvalidAddress)?; - convert.begin_sequence(Some(address)); + convert.set_address(address); convert.generate_row(row); } LineConvertRow::Row(row) => { - if !convert.in_sequence() { - // This is valid DWARF but unusual. - convert.begin_sequence(None); - } convert.generate_row(row); } LineConvertRow::EndSequence(length) => { - if !convert.in_sequence() { - // Preserve empty sequences. - convert.begin_sequence(None); - } convert.end_sequence(length); } } @@ -1148,34 +1148,13 @@ mod convert { /// while let Some(row) = convert.read_row()? { /// match row { /// LineConvertRow::SetAddress(address, row) => { - /// if convert.in_sequence() { - /// // There was a `DW_LNE_set_address` instruction in the middle of a - /// // sequence. `gimli::write::LineProgram` doesn't currently support this. - /// // You may be able to handle this by converting `address` into an address - /// // offset for the current sequence (and adjusting future row offsets), - /// // or by splitting the sequence into two. - /// return Err(gimli::write::ConvertError::UnsupportedLineSetAddress); - /// } - /// convert.begin_sequence(Some(Address::Constant(address))); + /// convert.set_address(Address::Constant(address)); /// convert.generate_row(row); /// } /// LineConvertRow::Row(row) => { - /// if !convert.in_sequence() { - /// // There wasn't a `DW_LNE_set_address` instruction at the start of the - /// // sequence. This is unusual in practice, but you can support it by - /// // starting a sequence without an address. This will use the initial - /// // address defined by DWARF, which is 0. The row itself can can still - /// // have a non-zero address offset. - /// convert.begin_sequence(None); - /// } /// convert.generate_row(row); /// } /// LineConvertRow::EndSequence(length) => { - /// if !convert.in_sequence() { - /// // The sequence had no rows. We can either match this with an empty - /// // sequence in the converted program, or ignore it. - /// convert.begin_sequence(None); - /// } /// convert.end_sequence(length); /// } /// } @@ -1511,6 +1490,11 @@ mod convert { self.program.begin_sequence(address); } + /// Call [`LineProgram::set_address`] for the converted program. + pub fn set_address(&mut self, address: Address) { + self.program.set_address(address); + } + /// Call [`LineProgram::end_sequence`] for the converted program. pub fn end_sequence(&mut self, address_offset: u64) { self.program.end_sequence(address_offset); @@ -1679,7 +1663,7 @@ mod tests { } #[test] - fn test_line_row() { + fn test_line_sequence() { let dir1 = &b"dir1"[..]; let file1 = &b"file1"[..]; let file2 = &b"file2"[..]; @@ -1692,27 +1676,19 @@ mod tests { version, address_size, }; - let line_base = -5; - let line_range = 14; - let neg_line_base = (-line_base) as u8; let mut program = LineProgram::new( encoding, - LineEncoding { - line_base, - line_range, - ..Default::default() - }, + LineEncoding::default(), LineString::String(dir1.to_vec()), LineString::String(file1.to_vec()), None, ); let dir_id = program.default_directory(); - let file1_id = - program.add_file(LineString::String(file1.to_vec()), dir_id, None); - let file2_id = - program.add_file(LineString::String(file2.to_vec()), dir_id, None); + program.add_file(LineString::String(file1.to_vec()), dir_id, None); + // Ensure the default file index is valid. + program.add_file(LineString::String(file2.to_vec()), dir_id, None); - // Test sequences. + // Test instructions added by sequence related methods. { let mut program = program.clone(); let address = Address::Constant(0x12); @@ -1731,7 +1707,22 @@ mod tests { { let mut program = program.clone(); - program.begin_sequence(None); + let address = Address::Constant(0x12); + program.set_address(address); + assert_eq!( + program.instructions, + vec![LineInstruction::SetAddress(address)] + ); + } + + { + let mut program = program.clone(); + program.generate_row(); + assert_eq!(program.instructions, vec![LineInstruction::Copy]); + } + + { + let mut program = program.clone(); program.end_sequence(0x1234); assert_eq!( program.instructions, @@ -1742,8 +1733,117 @@ mod tests { ); } - // Create a base program. + { + let mut program = program.clone(); + program.end_sequence(0); + assert_eq!(program.instructions, vec![LineInstruction::EndSequence]); + } + + // Test conversion of sequences. + let mut expected_instructions = Vec::new(); + + // Basic sequence. + let address = Address::Constant(0x12); + program.begin_sequence(Some(address)); + program.generate_row(); + program.end_sequence(0x1234); + expected_instructions.extend_from_slice(&[ + LineInstruction::SetAddress(address), + LineInstruction::Copy, + LineInstruction::AdvancePc(0x1234), + LineInstruction::EndSequence, + ]); + + // Empty sequence. program.begin_sequence(None); + program.end_sequence(0); + expected_instructions.extend_from_slice(&[LineInstruction::EndSequence]); + + // Multiple `DW_LNE_set_address`. + let address1 = Address::Constant(0x12); + let address2 = Address::Constant(0x34); + program.set_address(address1); + program.generate_row(); + program.set_address(address2); + program.generate_row(); + program.end_sequence(0x1234); + expected_instructions.extend_from_slice(&[ + LineInstruction::SetAddress(address1), + LineInstruction::Copy, + LineInstruction::SetAddress(address2), + LineInstruction::Copy, + LineInstruction::AdvancePc(0x1234), + LineInstruction::EndSequence, + ]); + + // No `DW_LNE_set_address`. + program.generate_row(); + program.end_sequence(0x1234); + expected_instructions.extend_from_slice(&[ + LineInstruction::Copy, + LineInstruction::AdvancePc(0x1234), + LineInstruction::EndSequence, + ]); + + assert_eq!(program.instructions, expected_instructions); + + let mut unit = Unit::new(encoding, program); + let root = unit.get_mut(unit.root()); + root.set(constants::DW_AT_stmt_list, AttributeValue::LineProgramRef); + + let mut dwarf = Dwarf::new(); + dwarf.units.add(unit); + + let mut sections = Sections::new(EndianVec::new(LittleEndian)); + dwarf.write(&mut sections).unwrap(); + let read_dwarf = sections.read(LittleEndian); + + let convert_dwarf = + Dwarf::from(&read_dwarf, &|address| Some(Address::Constant(address))) + .unwrap(); + let convert_unit = convert_dwarf.units.iter().next().unwrap().1; + let convert_program = &convert_unit.line_program; + assert_eq!(convert_program.instructions, expected_instructions); + } + } + } + } + + #[test] + fn test_line_row() { + let dir1 = &b"dir1"[..]; + let file1 = &b"file1"[..]; + let file2 = &b"file2"[..]; + + for &version in &[2, 3, 4, 5] { + for &address_size in &[4, 8] { + for &format in &[Format::Dwarf32, Format::Dwarf64] { + let encoding = Encoding { + format, + version, + address_size, + }; + let line_base = -5; + let line_range = 14; + let neg_line_base = (-line_base) as u8; + let mut program = LineProgram::new( + encoding, + LineEncoding { + line_base, + line_range, + ..Default::default() + }, + LineString::String(dir1.to_vec()), + LineString::String(file1.to_vec()), + None, + ); + let dir_id = program.default_directory(); + let file1_id = + program.add_file(LineString::String(file1.to_vec()), dir_id, None); + let file2_id = + program.add_file(LineString::String(file2.to_vec()), dir_id, None); + + // Create a base program. program.row.line = 0x1000; program.generate_row(); let base_row = program.row; @@ -2148,7 +2248,7 @@ mod tests { None, ); for address_advance in addresses.clone() { - program.begin_sequence(Some(Address::Constant(0x1000))); + program.set_address(Address::Constant(0x1000)); program.row().line = 0x10000; program.generate_row(); for line_advance in lines.clone() { @@ -2244,7 +2344,7 @@ mod tests { file.clone(), None, ); - program.begin_sequence(Some(Address::Constant(0x1000))); + program.set_address(Address::Constant(0x1000)); program.row().line = 0x10000; program.generate_row(); @@ -2291,7 +2391,7 @@ mod tests { let dir_id = program.default_directory(); let file_id = program.add_file(LineString::String(b"file1".to_vec()), dir_id, None); - program.begin_sequence(Some(Address::Constant(0x1000))); + program.set_address(Address::Constant(0x1000)); program.row().file = file_id; program.row().line = 0x10000; program.generate_row(); From d36b6b15776fa81d5b56d1c50fafb0c0ad93c5ea Mon Sep 17 00:00:00 2001 From: Philip Craig Date: Sun, 25 Aug 2024 18:05:43 +1000 Subject: [PATCH 4/6] Support `DW_LNE_set_address` in `LineConvert::read_sequence` --- src/write/line.rs | 176 ++++++++++++++++++++++++++++++---------------- src/write/mod.rs | 6 -- 2 files changed, 116 insertions(+), 66 deletions(-) diff --git a/src/write/line.rs b/src/write/line.rs index c1f8ea77d..b8b21ea59 100644 --- a/src/write/line.rs +++ b/src/write/line.rs @@ -1054,11 +1054,10 @@ mod convert { while let Some(row) = convert.read_row()? { match row { - LineConvertRow::SetAddress(address, row) => { + LineConvertRow::SetAddress(address) => { let address = convert_address(address).ok_or(ConvertError::InvalidAddress)?; convert.set_address(address); - convert.generate_row(row); } LineConvertRow::Row(row) => { convert.generate_row(row); @@ -1078,15 +1077,11 @@ mod convert { /// The result of [`LineConvert::read_row`]. #[derive(Debug)] pub enum LineConvertRow { - /// A row that used a `DW_LNE_set_address` instruction. - /// - /// This is expected to be the first row in a sequence, - /// but [`LineConvert::read_row`] doesn't enforce that. + /// The address from a `DW_LNE_set_address` instruction. /// - /// This row will have its `address_offset` field set to 0. /// All subsequent rows in the sequence will have their `address_offset` /// field set to an offset from this address. - SetAddress(u64, LineRow), + SetAddress(u64), /// A row produced by the line number program. Row(LineRow), /// The address offset of the end of the sequence. @@ -1096,16 +1091,41 @@ mod convert { /// The result of [`LineConvert::read_sequence`]. #[derive(Debug)] pub struct LineConvertSequence { - /// The address of the first instruction in the sequence. + /// The address of the first instruction in the given rows. + /// + /// This will be `None` if there was no `DW_LNE_set_address` instruction. pub start: Option, - /// The offset in bytes of the next instruction after the sequence. - pub length: u64, + /// The location of the next instruction after the given rows. + pub end: LineConvertSequenceEnd, /// The rows in the sequence. /// /// The `LineRow::address` fields are set to an offset from `Self::start`. pub rows: Vec, } + /// The location of the next instruction after a sequence of rows. + #[derive(Debug)] + pub enum LineConvertSequenceEnd { + /// An offset in bytes from [`LineConvertSequence::start`]. + Length(u64), + /// An address set by a `DW_LNE_set_address` instruction. + Address(u64), + } + + /// The next action to take in the conversion of a line number program. + #[derive(Debug)] + enum LineConvertState { + /// Read and evaluate instructions for the next row. + ReadRow, + /// Return `self.address` then switch to `ConvertRow`. + SetAddress, + /// Convert `self.from_row` to a `LineRow`. + /// + /// If this state occurred after a `SetAddress`, `self.address` is still the + /// address that was set. + ConvertRow, + } + /// The state for the conversion of a line number program. /// /// This is used for the implementation of [`LineProgram::from`]. @@ -1147,9 +1167,8 @@ mod convert { /// // Read and convert each row in the program. /// while let Some(row) = convert.read_row()? { /// match row { - /// LineConvertRow::SetAddress(address, row) => { + /// LineConvertRow::SetAddress(address) => { /// convert.set_address(Address::Constant(address)); - /// convert.generate_row(row); /// } /// LineConvertRow::Row(row) => { /// convert.generate_row(row); @@ -1179,6 +1198,8 @@ mod convert { line_strings: &'a mut write::LineStringTable, #[allow(unused)] // May need LineString::StringRef in future. strings: &'a mut write::StringTable, + address: Option, + state: LineConvertState, } impl<'a, R: Reader + 'a> LineConvert<'a, R> { @@ -1290,6 +1311,8 @@ mod convert { program, line_strings, strings, + address: None, + state: LineConvertState::ReadRow, }) } @@ -1338,8 +1361,23 @@ mod convert { /// /// See [`LineConvert`] for an example of how to add the row to the converted program. pub fn read_row(&mut self) -> ConvertResult> { + match self.state { + LineConvertState::ReadRow => {} + LineConvertState::SetAddress => { + if let Some(address) = self.address.take() { + self.state = LineConvertState::ConvertRow; + return Ok(Some(LineConvertRow::SetAddress(address))); + } + self.state = LineConvertState::ReadRow; + return Ok(Some(LineConvertRow::Row(self.convert_row()?))); + } + LineConvertState::ConvertRow => { + self.state = LineConvertState::ReadRow; + return Ok(Some(LineConvertRow::Row(self.convert_row()?))); + } + } let mut tombstone = false; - let mut address = None; + self.address = None; self.from_row.reset(self.from_program.header()); while let Some(instruction) = self .from_instructions @@ -1357,7 +1395,7 @@ mod convert { !0 >> (64 - self.from_program.header().encoding().address_size * 8); tombstone = val == tombstone_address; if !tombstone { - address = Some(val); + self.address = Some(val); } continue; } @@ -1385,7 +1423,7 @@ mod convert { // tombstones we loop immediately. if self.from_row.end_sequence() { tombstone = false; - address = None; + self.address = None; } self.from_row.reset(self.from_program.header()); continue; @@ -1393,63 +1431,69 @@ mod convert { if self.from_row.end_sequence() { return Ok(Some(LineConvertRow::EndSequence(self.from_row.address()))); } - let row = LineRow { - address_offset: self.from_row.address(), - op_index: self.from_row.op_index(), - file: { - let file = self.from_row.file_index(); - if file >= self.files.len() as u64 { - return Err(ConvertError::InvalidFileIndex); - } - if file == 0 && self.from_program.header().version() <= 4 { - return Err(ConvertError::InvalidFileIndex); - } - self.files[file as usize] - }, - line: match self.from_row.line() { - Some(line) => line.get(), - None => 0, - }, - column: match self.from_row.column() { - read::ColumnType::LeftEdge => 0, - read::ColumnType::Column(val) => val.get(), - }, - discriminator: self.from_row.discriminator(), - is_statement: self.from_row.is_stmt(), - basic_block: self.from_row.basic_block(), - prologue_end: self.from_row.prologue_end(), - epilogue_begin: self.from_row.epilogue_begin(), - isa: self.from_row.isa(), - }; - if let Some(address) = address { - return Ok(Some(LineConvertRow::SetAddress(address, row))); + if let Some(address) = self.address.take() { + self.state = LineConvertState::ConvertRow; + return Ok(Some(LineConvertRow::SetAddress(address))); } else { - return Ok(Some(LineConvertRow::Row(row))); + self.state = LineConvertState::ReadRow; + return Ok(Some(LineConvertRow::Row(self.convert_row()?))); } } Ok(None) } + fn convert_row(&self) -> ConvertResult { + Ok(LineRow { + address_offset: self.from_row.address(), + op_index: self.from_row.op_index(), + file: { + let file = self.from_row.file_index(); + if file >= self.files.len() as u64 { + return Err(ConvertError::InvalidFileIndex); + } + if file == 0 && self.from_program.header().version() <= 4 { + return Err(ConvertError::InvalidFileIndex); + } + self.files[file as usize] + }, + line: match self.from_row.line() { + Some(line) => line.get(), + None => 0, + }, + column: match self.from_row.column() { + read::ColumnType::LeftEdge => 0, + read::ColumnType::Column(val) => val.get(), + }, + discriminator: self.from_row.discriminator(), + is_statement: self.from_row.is_stmt(), + basic_block: self.from_row.basic_block(), + prologue_end: self.from_row.prologue_end(), + epilogue_begin: self.from_row.epilogue_begin(), + isa: self.from_row.isa(), + }) + } + /// Read the next sequence from the source program. /// - /// This will read all rows in the sequence, and return the sequence when it ends. - /// Returns `None` if there are no more rows in the program. - /// - /// Note that this will return an error for `DW_LNE_set_address` in the middle of - /// a sequence, or a missing `DW_LNE_end_sequence`. + /// This will read rows in the sequence up to either the end of the sequence, + /// or the next `DW_LNE_set_address` instruction. /// /// ## Example Usage /// /// ```rust,no_run - /// # use gimli::write::{Address, LineConvert, LineConvertRow}; + /// # use gimli::write::{Address, LineConvert, LineConvertSequenceEnd}; /// # fn example(mut convert: LineConvert) -> Result<(), gimli::write::ConvertError> { /// // Read and convert each sequence in the program. /// while let Some(sequence) = convert.read_sequence()? { - /// convert.begin_sequence(sequence.start.map(Address::Constant)); + /// if let Some(start) = sequence.start { + /// convert.set_address(Address::Constant(start)); + /// } /// for row in sequence.rows { /// convert.generate_row(row); /// } - /// convert.end_sequence(sequence.length); + /// if let LineConvertSequenceEnd::Length(length) = sequence.end { + /// convert.end_sequence(length); + /// } /// } /// # Ok(()) /// # } @@ -1457,15 +1501,27 @@ mod convert { pub fn read_sequence(&mut self) -> ConvertResult> { let mut start = None; let mut rows = Vec::new(); + match self.state { + LineConvertState::ReadRow => {} + LineConvertState::SetAddress | LineConvertState::ConvertRow => { + start = self.address; + rows.push(self.convert_row()?); + self.state = LineConvertState::ReadRow; + } + } while let Some(row) = self.read_row()? { match row { - LineConvertRow::SetAddress(address, row) => { - // We only support the setting the address for the first row in a sequence. + LineConvertRow::SetAddress(address) => { if !rows.is_empty() { - return Err(ConvertError::UnsupportedLineSetAddress); + self.address = Some(address); + self.state = LineConvertState::SetAddress; + return Ok(Some(LineConvertSequence { + start, + end: LineConvertSequenceEnd::Address(address), + rows, + })); } start = Some(address); - rows.push(row); } LineConvertRow::Row(row) => { rows.push(row); @@ -1473,7 +1529,7 @@ mod convert { LineConvertRow::EndSequence(length) => { return Ok(Some(LineConvertSequence { start, - length, + end: LineConvertSequenceEnd::Length(length), rows, })); } diff --git a/src/write/mod.rs b/src/write/mod.rs index 992bac0f2..5b940c3d5 100644 --- a/src/write/mod.rs +++ b/src/write/mod.rs @@ -337,8 +337,6 @@ mod convert { UnsupportedLineInstruction, /// Writing this form of line string is not implemented yet. UnsupportedLineStringForm, - /// A `DW_LNE_set_address` instruction occurred after the start of a sequence. - UnsupportedLineSetAddress, /// A `DW_LNE_end_sequence` instruction was missing at the end of a sequence. MissingLineEndSequence, /// The name of the compilation unit is missing. @@ -396,10 +394,6 @@ mod convert { f, "Writing this form of line string is not implemented yet." ), - UnsupportedLineSetAddress => write!( - f, - "A `DW_LNE_set_address` instruction occurred after the start of a sequence." - ), MissingLineEndSequence => write!( f, "A `DW_LNE_end_sequence` instruction was missing at the end of a sequence." From 71e52e6e9e90c9e936dc54235901d3e3f39f88e7 Mon Sep 17 00:00:00 2001 From: Philip Craig Date: Mon, 26 Aug 2024 14:53:49 +1000 Subject: [PATCH 5/6] Simplify LineProgram::from LineConvert::new already retrieves the compilation name from the program. --- src/write/line.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/write/line.rs b/src/write/line.rs index b8b21ea59..5b2983789 100644 --- a/src/write/line.rs +++ b/src/write/line.rs @@ -1038,14 +1038,10 @@ mod convert { ) -> ConvertResult<(LineProgram, Vec)> { let encoding = from_program.header().encoding(); let line_encoding = from_program.header().line_encoding(); - let comp_name = match from_program.header().file(0) { - Some(file) => Some(from_dwarf.attr_line_string(file.path_name())?), - None => None, - }; let mut convert = LineConvert::new( from_dwarf, from_program, - comp_name, + None, encoding, line_encoding, line_strings, From 83b53b29be59d677e9f059b881a6eb2b4ceaa54c Mon Sep 17 00:00:00 2001 From: Philip Craig Date: Mon, 26 Aug 2024 17:20:27 +1000 Subject: [PATCH 6/6] Test LineConvert::read_sequence --- src/write/line.rs | 151 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 127 insertions(+), 24 deletions(-) diff --git a/src/write/line.rs b/src/write/line.rs index 5b2983789..6c70c90bb 100644 --- a/src/write/line.rs +++ b/src/write/line.rs @@ -689,7 +689,7 @@ impl LineProgram { } /// A row in the line number table that corresponds to a machine instruction. -#[derive(Debug, Clone, Copy)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct LineRow { /// The offset of the instruction from the start address of the sequence. pub address_offset: u64, @@ -1071,7 +1071,7 @@ mod convert { } /// The result of [`LineConvert::read_row`]. - #[derive(Debug)] + #[derive(Debug, PartialEq, Eq)] pub enum LineConvertRow { /// The address from a `DW_LNE_set_address` instruction. /// @@ -1085,7 +1085,7 @@ mod convert { } /// The result of [`LineConvert::read_sequence`]. - #[derive(Debug)] + #[derive(Debug, PartialEq, Eq)] pub struct LineConvertSequence { /// The address of the first instruction in the given rows. /// @@ -1100,7 +1100,7 @@ mod convert { } /// The location of the next instruction after a sequence of rows. - #[derive(Debug)] + #[derive(Debug, PartialEq, Eq)] pub enum LineConvertSequenceEnd { /// An offset in bytes from [`LineConvertSequence::start`]. Length(u64), @@ -1793,69 +1793,172 @@ mod tests { // Test conversion of sequences. let mut expected_instructions = Vec::new(); + let mut sequence_rows = Vec::new(); + let mut expected_rows = Vec::new(); + let mut expected_sequences = Vec::new(); // Basic sequence. - let address = Address::Constant(0x12); - program.begin_sequence(Some(address)); + let address = 0x12; + let length = 0x1234; + + expected_rows.push(LineConvertRow::SetAddress(address)); + program.begin_sequence(Some(Address::Constant(address))); + + expected_rows.push(LineConvertRow::Row(program.row)); + sequence_rows.push(program.row); program.generate_row(); - program.end_sequence(0x1234); + + expected_rows.push(LineConvertRow::EndSequence(length)); + expected_sequences.push(LineConvertSequence { + start: Some(0x12), + end: LineConvertSequenceEnd::Length(length), + rows: std::mem::take(&mut sequence_rows), + }); + program.end_sequence(length); + expected_instructions.extend_from_slice(&[ - LineInstruction::SetAddress(address), + LineInstruction::SetAddress(Address::Constant(address)), LineInstruction::Copy, - LineInstruction::AdvancePc(0x1234), + LineInstruction::AdvancePc(length), LineInstruction::EndSequence, ]); // Empty sequence. program.begin_sequence(None); + + expected_rows.push(LineConvertRow::EndSequence(0)); + expected_sequences.push(LineConvertSequence { + start: None, + end: LineConvertSequenceEnd::Length(0), + rows: std::mem::take(&mut sequence_rows), + }); program.end_sequence(0); + expected_instructions.extend_from_slice(&[LineInstruction::EndSequence]); - // Multiple `DW_LNE_set_address`. - let address1 = Address::Constant(0x12); - let address2 = Address::Constant(0x34); - program.set_address(address1); + // Multiple `DW_LNE_set_address` with tombstone. + let address1 = 0x12; + let address2 = 0x34; + let tombstone = !0u64 >> (64 - address_size * 8); + + expected_rows.push(LineConvertRow::SetAddress(address1)); + program.set_address(Address::Constant(address1)); + + expected_rows.push(LineConvertRow::Row(program.row)); + sequence_rows.push(program.row); program.generate_row(); - program.set_address(address2); + + program.set_address(Address::Constant(tombstone)); + program.generate_row(); + + expected_rows.push(LineConvertRow::SetAddress(address2)); + expected_sequences.push(LineConvertSequence { + start: Some(address1), + end: LineConvertSequenceEnd::Address(address2), + rows: std::mem::take(&mut sequence_rows), + }); + program.set_address(Address::Constant(address2)); + + expected_rows.push(LineConvertRow::Row(program.row)); + sequence_rows.push(program.row); program.generate_row(); - program.end_sequence(0x1234); + + expected_rows.push(LineConvertRow::EndSequence(length)); + expected_sequences.push(LineConvertSequence { + start: Some(address2), + end: LineConvertSequenceEnd::Length(length), + rows: std::mem::take(&mut sequence_rows), + }); + program.end_sequence(length); + expected_instructions.extend_from_slice(&[ - LineInstruction::SetAddress(address1), + LineInstruction::SetAddress(Address::Constant(address1)), LineInstruction::Copy, - LineInstruction::SetAddress(address2), + LineInstruction::SetAddress(Address::Constant(address2)), LineInstruction::Copy, - LineInstruction::AdvancePc(0x1234), + LineInstruction::AdvancePc(length), LineInstruction::EndSequence, ]); // No `DW_LNE_set_address`. + expected_rows.push(LineConvertRow::Row(program.row)); + sequence_rows.push(program.row); program.generate_row(); - program.end_sequence(0x1234); + + expected_rows.push(LineConvertRow::EndSequence(length)); + expected_sequences.push(LineConvertSequence { + start: None, + end: LineConvertSequenceEnd::Length(length), + rows: std::mem::take(&mut sequence_rows), + }); + program.end_sequence(length); + expected_instructions.extend_from_slice(&[ LineInstruction::Copy, - LineInstruction::AdvancePc(0x1234), + LineInstruction::AdvancePc(length), LineInstruction::EndSequence, ]); - assert_eq!(program.instructions, expected_instructions); - + // Write the DWARF. let mut unit = Unit::new(encoding, program); let root = unit.get_mut(unit.root()); root.set(constants::DW_AT_stmt_list, AttributeValue::LineProgramRef); - let mut dwarf = Dwarf::new(); dwarf.units.add(unit); - let mut sections = Sections::new(EndianVec::new(LittleEndian)); dwarf.write(&mut sections).unwrap(); + + // Read the DWARF. let read_dwarf = sections.read(LittleEndian); + let read_unit_header = read_dwarf.units().next().unwrap().unwrap(); + let read_unit = read_dwarf.unit(read_unit_header).unwrap(); + let read_program = &read_unit.line_program.unwrap(); + // Test LineProgram::from. let convert_dwarf = Dwarf::from(&read_dwarf, &|address| Some(Address::Constant(address))) .unwrap(); let convert_unit = convert_dwarf.units.iter().next().unwrap().1; let convert_program = &convert_unit.line_program; assert_eq!(convert_program.instructions, expected_instructions); + + // Test LineConvert::read_row. + let mut convert_line_strings = LineStringTable::default(); + let mut convert_strings = StringTable::default(); + let mut convert_line = LineConvert::new( + &read_dwarf, + read_program.clone(), + None, + read_program.header().encoding(), + read_program.header().line_encoding(), + &mut convert_line_strings, + &mut convert_strings, + ) + .unwrap(); + let mut convert_rows = Vec::new(); + while let Some(convert_row) = convert_line.read_row().unwrap() { + convert_rows.push(convert_row); + } + assert_eq!(convert_rows, expected_rows); + + // Test LineConvert::read_sequence. + let mut convert_line_strings = LineStringTable::default(); + let mut convert_strings = StringTable::default(); + let mut convert_line = LineConvert::new( + &read_dwarf, + read_program.clone(), + None, + read_program.header().encoding(), + read_program.header().line_encoding(), + &mut convert_line_strings, + &mut convert_strings, + ) + .unwrap(); + let mut convert_sequences = Vec::new(); + while let Some(convert_sequence) = convert_line.read_sequence().unwrap() { + convert_sequences.push(convert_sequence); + } + assert_eq!(convert_sequences, expected_sequences); } } }